2019-01-26 23:22:56 +01:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2019-01-26 23:22:56 +01:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-01-26 23:22:56 +01:00
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2024-07-16 14:51:57 +02:00
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2019-01-26 23:22:56 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-01-26 23:22:56 +01:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Get ATC Properties from WHOCC Website
2019-01-26 23:22:56 +01:00
#'
2021-08-19 23:43:02 +02:00
#' Gets data from the WHOCC website to determine properties of an Anatomical Therapeutic Chemical (ATC) (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit.
#' @param atc_code a [character] (vector) with ATC code(s) of antibiotics, will be coerced with [as.ab()] and [ab_atc()] internally if not a valid ATC code
2021-01-18 16:57:56 +01:00
#' @param property property of an ATC code. Valid values are `"ATC"`, `"Name"`, `"DDD"`, `"U"` (`"unit"`), `"Adm.R"`, `"Note"` and `groups`. For this last option, all hierarchical groups of an ATC code will be returned, see *Examples*.
#' @param administration type of administration when using `property = "Adm.R"`, see *Details*
2020-09-18 16:05:53 +02:00
#' @param url url of website of the WHOCC. The sign `%s` can be used as a placeholder for ATC codes.
#' @param url_vet url of website of the WHOCC for veterinary medicine. The sign `%s` can be used as a placeholder for ATC_vet codes (that all start with "Q").
2020-12-22 00:51:17 +01:00
#' @param ... arguments to pass on to `atc_property`
2019-01-26 23:22:56 +01:00
#' @details
2020-12-22 00:51:17 +01:00
#' Options for argument `administration`:
2022-08-28 10:31:50 +02:00
#'
2019-11-28 22:32:17 +01:00
#' - `"Implant"` = Implant
#' - `"Inhal"` = Inhalation
#' - `"Instill"` = Instillation
#' - `"N"` = nasal
#' - `"O"` = oral
#' - `"P"` = parenteral
#' - `"R"` = rectal
#' - `"SL"` = sublingual/buccal
#' - `"TD"` = transdermal
#' - `"V"` = vaginal
2019-01-26 23:22:56 +01:00
#'
2019-11-28 22:32:17 +01:00
#' Abbreviations of return values when using `property = "U"` (unit):
2022-08-28 10:31:50 +02:00
#'
2019-11-28 22:32:17 +01:00
#' - `"g"` = gram
#' - `"mg"` = milligram
2022-06-03 13:28:55 +02:00
#' - `"mcg"` = microgram
2019-11-28 22:32:17 +01:00
#' - `"U"` = unit
#' - `"TU"` = thousand units
#' - `"MU"` = million units
#' - `"mmol"` = millimole
2021-05-20 10:10:40 +02:00
#' - `"ml"` = millilitre (e.g. eyedrops)
2022-08-28 10:31:50 +02:00
#'
2020-09-03 12:31:48 +02:00
#' **N.B. This function requires an internet connection and only works if the following packages are installed: `curl`, `rvest`, `xml2`.**
2019-01-26 23:22:56 +01:00
#' @export
#' @rdname atc_online
2024-03-03 23:24:57 +01:00
#' @source <https://atcddd.fhi.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/>
2019-01-26 23:22:56 +01:00
#' @examples
2021-05-24 09:34:08 +02:00
#' \donttest{
2022-08-28 10:31:50 +02:00
#' if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) {
2021-05-24 09:34:08 +02:00
#' # oral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "O")
2021-08-19 23:43:02 +02:00
#' atc_online_ddd(ab_atc("amox"))
2022-08-28 10:31:50 +02:00
#'
2021-05-24 09:34:08 +02:00
#' # parenteral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "P")
2019-01-26 23:22:56 +01:00
#'
2021-05-24 09:34:08 +02:00
#' atc_online_property("J01CA04", property = "groups") # search hierarchical groups of amoxicillin
#' }
2019-01-26 23:22:56 +01:00
#' }
atc_online_property <- function ( atc_code ,
property ,
2019-10-11 17:21:02 +02:00
administration = " O" ,
2024-03-03 23:24:57 +01:00
url = " https://atcddd.fhi.no/atc_ddd_index/?code=%s&showdescription=no" ,
url_vet = " https://atcddd.fhi.no/atcvet/atcvet_index/?code=%s&showdescription=no" ) {
2024-02-24 15:16:52 +01:00
meet_criteria ( atc_code , allow_class = " character" , allow_NA = TRUE )
2021-08-19 23:43:02 +02:00
meet_criteria ( property , allow_class = " character" , has_length = 1 , is_in = c ( " ATC" , " Name" , " DDD" , " U" , " unit" , " Adm.R" , " Note" , " groups" ) , ignore.case = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( administration , allow_class = " character" , has_length = 1 )
meet_criteria ( url , allow_class = " character" , has_length = 1 , looks_like = " https?://" )
meet_criteria ( url_vet , allow_class = " character" , has_length = 1 , looks_like = " https?://" )
2022-08-28 10:31:50 +02:00
2020-06-17 15:14:37 +02:00
has_internet <- import_fn ( " has_internet" , " curl" )
html_attr <- import_fn ( " html_attr" , " rvest" )
html_children <- import_fn ( " html_children" , " rvest" )
html_node <- import_fn ( " html_node" , " rvest" )
html_nodes <- import_fn ( " html_nodes" , " rvest" )
html_table <- import_fn ( " html_table" , " rvest" )
html_text <- import_fn ( " html_text" , " rvest" )
read_html <- import_fn ( " read_html" , " xml2" )
2022-08-28 10:31:50 +02:00
2022-10-05 09:12:22 +02:00
if ( ! all ( atc_code %in% unlist ( AMR :: antibiotics $ atc ) ) ) {
2021-08-17 14:34:11 +02:00
atc_code <- as.character ( ab_atc ( atc_code , only_first = TRUE ) )
2019-01-26 23:22:56 +01:00
}
2022-08-28 10:31:50 +02:00
2020-05-16 20:08:21 +02:00
if ( ! has_internet ( ) ) {
2020-10-27 15:56:51 +01:00
message_ ( " There appears to be no internet connection, returning NA." ,
2022-08-28 10:31:50 +02:00
add_fn = font_red ,
as_note = FALSE
)
2019-01-26 23:22:56 +01:00
return ( rep ( NA , length ( atc_code ) ) )
}
2022-08-28 10:31:50 +02:00
2021-08-19 23:43:02 +02:00
property <- tolower ( property )
2019-01-26 23:22:56 +01:00
# also allow unit as property
2021-08-19 23:43:02 +02:00
if ( property == " unit" ) {
property <- " u"
2019-01-26 23:22:56 +01:00
}
2019-10-11 17:21:02 +02:00
if ( property == " ddd" ) {
2019-01-26 23:22:56 +01:00
returnvalue <- rep ( NA_real_ , length ( atc_code ) )
2019-10-11 17:21:02 +02:00
} else if ( property == " groups" ) {
2019-01-26 23:22:56 +01:00
returnvalue <- list ( )
} else {
returnvalue <- rep ( NA_character_ , length ( atc_code ) )
}
2022-08-28 10:31:50 +02:00
2020-09-18 16:05:53 +02:00
progress <- progress_ticker ( n = length ( atc_code ) , 3 )
2020-05-18 13:59:34 +02:00
on.exit ( close ( progress ) )
2022-08-28 10:31:50 +02:00
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( length ( atc_code ) ) ) {
2020-05-18 10:30:53 +02:00
progress $ tick ( )
2024-02-24 15:16:52 +01:00
if ( is.na ( atc_code [i ] ) ) {
next
}
2020-09-18 16:05:53 +02:00
if ( atc_code [i ] %like% " ^Q" ) {
# veterinary drugs, ATC_vet codes start with a "Q"
atc_url <- url_vet
} else {
atc_url <- url
}
atc_url <- sub ( " %s" , atc_code [i ] , atc_url , fixed = TRUE )
2022-08-28 10:31:50 +02:00
2019-01-26 23:22:56 +01:00
if ( property == " groups" ) {
2021-10-05 14:00:35 +02:00
out <- tryCatch (
2023-02-09 13:07:39 +01:00
read_html ( atc_url ) %pm>%
html_node ( " #content" ) %pm>%
html_children ( ) %pm>%
2022-08-28 10:31:50 +02:00
html_node ( " a" ) ,
error = function ( e ) NULL
)
2021-10-05 14:00:35 +02:00
if ( is.null ( out ) ) {
message_ ( " Connection to " , atc_url , " failed." )
return ( rep ( NA , length ( atc_code ) ) )
}
2022-08-28 10:31:50 +02:00
2019-01-26 23:22:56 +01:00
# get URLS of items
2023-02-09 13:07:39 +01:00
hrefs <- out %pm>% html_attr ( " href" )
2019-01-26 23:22:56 +01:00
# get text of items
2023-02-09 13:07:39 +01:00
texts <- out %pm>% html_text ( )
2019-01-26 23:22:56 +01:00
# select only text items where URL like "code="
texts <- texts [grepl ( " ?code=" , tolower ( hrefs ) , fixed = TRUE ) ]
# last one is antibiotics, skip it
2019-10-11 17:21:02 +02:00
texts <- texts [seq_len ( length ( texts ) ) - 1 ]
2019-01-26 23:22:56 +01:00
returnvalue <- c ( list ( texts ) , returnvalue )
} else {
2021-10-05 14:00:35 +02:00
out <- tryCatch (
2023-02-09 13:07:39 +01:00
read_html ( atc_url ) %pm>%
html_nodes ( " table" ) %pm>%
html_table ( header = TRUE ) %pm>%
2022-08-28 10:31:50 +02:00
as.data.frame ( stringsAsFactors = FALSE ) ,
error = function ( e ) NULL
)
2021-10-05 14:00:35 +02:00
if ( is.null ( out ) ) {
message_ ( " Connection to " , atc_url , " failed." )
return ( rep ( NA , length ( atc_code ) ) )
}
2022-08-28 10:31:50 +02:00
2019-01-26 23:22:56 +01:00
# case insensitive column names
2021-10-05 14:00:35 +02:00
colnames ( out ) <- gsub ( " ^atc.*" , " atc" , tolower ( colnames ( out ) ) )
2022-08-28 10:31:50 +02:00
2021-10-05 14:00:35 +02:00
if ( length ( out ) == 0 ) {
2024-02-24 15:16:52 +01:00
message_ ( " in `atc_online_property()`: no properties found for ATC " , atc_code [i ] , " . Please check " , font_url ( atc_url , " this WHOCC webpage" ) , " ." )
2019-01-26 23:22:56 +01:00
returnvalue [i ] <- NA
next
}
2022-08-28 10:31:50 +02:00
2019-10-11 17:21:02 +02:00
if ( property %in% c ( " atc" , " name" ) ) {
2019-01-26 23:22:56 +01:00
# ATC and name are only in first row
2022-08-27 20:49:37 +02:00
returnvalue [i ] <- out [1 , property , drop = TRUE ]
2019-01-26 23:22:56 +01:00
} else {
2022-10-05 09:12:22 +02:00
if ( ! " adm.r" %in% colnames ( out ) || is.na ( out [1 , " adm.r" , drop = TRUE ] ) ) {
2019-01-26 23:22:56 +01:00
returnvalue [i ] <- NA
next
} else {
2021-10-05 14:00:35 +02:00
for ( j in seq_len ( nrow ( out ) ) ) {
if ( out [j , " adm.r" ] == administration ) {
2022-08-27 20:49:37 +02:00
returnvalue [i ] <- out [j , property , drop = TRUE ]
2019-01-26 23:22:56 +01:00
}
}
}
}
}
}
2022-08-28 10:31:50 +02:00
2022-10-05 09:12:22 +02:00
if ( property == " groups" && length ( returnvalue ) == 1 ) {
2019-01-26 23:22:56 +01:00
returnvalue <- returnvalue [ [1 ] ]
}
2022-08-28 10:31:50 +02:00
2019-01-26 23:22:56 +01:00
returnvalue
}
#' @rdname atc_online
#' @export
atc_online_groups <- function ( atc_code , ... ) {
2024-02-24 15:16:52 +01:00
meet_criteria ( atc_code , allow_class = " character" , allow_NA = TRUE )
2019-01-26 23:22:56 +01:00
atc_online_property ( atc_code = atc_code , property = " groups" , ... )
}
#' @rdname atc_online
#' @export
atc_online_ddd <- function ( atc_code , ... ) {
2024-02-24 15:16:52 +01:00
meet_criteria ( atc_code , allow_class = " character" , allow_NA = TRUE )
2019-01-26 23:22:56 +01:00
atc_online_property ( atc_code = atc_code , property = " ddd" , ... )
}
2021-08-19 23:43:02 +02:00
#' @rdname atc_online
#' @export
atc_online_ddd_units <- function ( atc_code , ... ) {
2024-02-24 15:16:52 +01:00
meet_criteria ( atc_code , allow_class = " character" , allow_NA = TRUE )
2021-08-19 23:43:02 +02:00
atc_online_property ( atc_code = atc_code , property = " unit" , ... )
}