2019-01-26 23:22:56 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-01-26 23:22:56 +01:00
# #
# LICENCE #
2020-01-05 17:22:09 +01:00
# (c) 2018-2020 Berends MS, Luz CF et al. #
2019-01-26 23:22:56 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-07-08 14:48:06 +02:00
# Visit our website for more info: https://msberends.github.io/AMR. #
2019-01-26 23:22:56 +01:00
# ==================================================================== #
2019-01-27 19:30:40 +01:00
#' Get ATC properties from WHOCC website
2019-01-26 23:22:56 +01:00
#'
2020-01-05 17:22:09 +01:00
#' @inheritSection lifecycle Questioning lifecycle
2019-11-28 22:32:17 +01:00
#' @description Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit.
#'
#' **This function requires an internet connection.**
2019-01-26 23:22:56 +01:00
#' @param atc_code a character or character vector with ATC code(s) of antibiotic(s)
2019-11-28 22:32:17 +01:00
#' @param property property of an ATC code. Valid values are `"ATC"`, `"Name"`, `"DDD"`, `"U"` (`"unit"`), `"Adm.R"`, `"Note"` and `groups`. For this last option, all hierarchical groups of an ATC code will be returned, see Examples.
#' @param administration type of administration when using `property = "Adm.R"`, see Details
#' @param url url of website of the WHO. The sign `%s` can be used as a placeholder for ATC codes.
#' @param ... parameters to pass on to `atc_property`
2019-01-26 23:22:56 +01:00
#' @details
2019-11-28 22:32:17 +01:00
#' Options for parameter `administration`:
#'
#' - `"Implant"` = Implant
#' - `"Inhal"` = Inhalation
#' - `"Instill"` = Instillation
#' - `"N"` = nasal
#' - `"O"` = oral
#' - `"P"` = parenteral
#' - `"R"` = rectal
#' - `"SL"` = sublingual/buccal
#' - `"TD"` = transdermal
#' - `"V"` = vaginal
2019-01-26 23:22:56 +01:00
#'
2019-11-28 22:32:17 +01:00
#' Abbreviations of return values when using `property = "U"` (unit):
#'
#' - `"g"` = gram
#' - `"mg"` = milligram
#' - `"mcg"`` = microgram
#' - `"U"` = unit
#' - `"TU"` = thousand units
#' - `"MU"` = million units
#' - `"mmol"` = millimole
#' - `"ml"` = milliliter (e.g. eyedrops)
2019-01-26 23:22:56 +01:00
#' @export
#' @rdname atc_online
2019-02-21 18:55:52 +01:00
#' @inheritSection AMR Read more on our website!
2019-11-28 22:32:17 +01:00
#' @source <https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/>
2019-01-26 23:22:56 +01:00
#' @examples
2020-05-16 20:42:45 +02:00
#' \dontrun{
2019-01-26 23:22:56 +01:00
#' # oral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "O")
#' # parenteral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "P")
#'
#' atc_online_property("J01CA04", property = "groups") # search hierarchical groups of amoxicillin
#' # [1] "ANTIINFECTIVES FOR SYSTEMIC USE"
#' # [2] "ANTIBACTERIALS FOR SYSTEMIC USE"
#' # [3] "BETA-LACTAM ANTIBACTERIALS, PENICILLINS"
#' # [4] "Penicillins with extended spectrum"
#' }
atc_online_property <- function ( atc_code ,
property ,
2019-10-11 17:21:02 +02:00
administration = " O" ,
url = " https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no" ) {
2020-02-14 19:54:13 +01:00
2020-06-17 15:14:37 +02:00
has_internet <- import_fn ( " has_internet" , " curl" )
html_attr <- import_fn ( " html_attr" , " rvest" )
html_children <- import_fn ( " html_children" , " rvest" )
html_node <- import_fn ( " html_node" , " rvest" )
html_nodes <- import_fn ( " html_nodes" , " rvest" )
html_table <- import_fn ( " html_table" , " rvest" )
html_text <- import_fn ( " html_text" , " rvest" )
read_html <- import_fn ( " read_html" , " xml2" )
2020-07-13 09:17:24 +02:00
2020-05-16 13:05:47 +02:00
check_dataset_integrity ( )
2020-02-14 19:54:13 +01:00
if ( ! all ( atc_code %in% antibiotics ) ) {
2019-09-12 15:08:53 +02:00
atc_code <- as.character ( ab_atc ( atc_code ) )
2019-01-26 23:22:56 +01:00
}
2020-05-16 20:08:21 +02:00
if ( ! has_internet ( ) ) {
2019-02-01 16:55:55 +01:00
message ( " There appears to be no internet connection." )
2019-01-26 23:22:56 +01:00
return ( rep ( NA , length ( atc_code ) ) )
}
2020-07-13 09:17:24 +02:00
2020-06-22 11:18:40 +02:00
stop_if ( length ( property ) != 1L , " `property` must be of length 1" )
stop_if ( length ( administration ) != 1L , " `administration` must be of length 1" )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# also allow unit as property
2019-10-11 17:21:02 +02:00
if ( property %like% " unit" ) {
property <- " U"
2019-01-26 23:22:56 +01:00
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# validation of properties
valid_properties <- c ( " ATC" , " Name" , " DDD" , " U" , " Adm.R" , " Note" , " groups" )
valid_properties.bak <- valid_properties
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
property <- tolower ( property )
valid_properties <- tolower ( valid_properties )
2020-07-13 09:17:24 +02:00
2020-06-22 11:18:40 +02:00
stop_ifnot ( property %in% valid_properties ,
" Invalid `property`, use one of " , paste ( valid_properties.bak , collapse = " , " ) )
2020-07-13 09:17:24 +02:00
2019-10-11 17:21:02 +02:00
if ( property == " ddd" ) {
2019-01-26 23:22:56 +01:00
returnvalue <- rep ( NA_real_ , length ( atc_code ) )
2019-10-11 17:21:02 +02:00
} else if ( property == " groups" ) {
2019-01-26 23:22:56 +01:00
returnvalue <- list ( )
} else {
returnvalue <- rep ( NA_character_ , length ( atc_code ) )
}
2020-07-13 09:17:24 +02:00
2020-05-21 12:28:57 +02:00
progress <- progress_estimated ( n = length ( atc_code ) , 3 )
2020-05-18 13:59:34 +02:00
on.exit ( close ( progress ) )
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( length ( atc_code ) ) ) {
2020-07-13 09:17:24 +02:00
2020-05-18 10:30:53 +02:00
progress $ tick ( )
2020-07-13 09:17:24 +02:00
2019-10-11 17:21:02 +02:00
atc_url <- sub ( " %s" , atc_code [i ] , url , fixed = TRUE )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
if ( property == " groups" ) {
2020-05-16 20:08:21 +02:00
tbl <- read_html ( atc_url ) %>%
html_node ( " #content" ) %>%
html_children ( ) %>%
html_node ( " a" )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# get URLS of items
2020-05-16 20:08:21 +02:00
hrefs <- tbl %>% html_attr ( " href" )
2019-01-26 23:22:56 +01:00
# get text of items
2020-05-16 20:08:21 +02:00
texts <- tbl %>% html_text ( )
2019-01-26 23:22:56 +01:00
# select only text items where URL like "code="
texts <- texts [grepl ( " ?code=" , tolower ( hrefs ) , fixed = TRUE ) ]
# last one is antibiotics, skip it
2019-10-11 17:21:02 +02:00
texts <- texts [seq_len ( length ( texts ) ) - 1 ]
2019-01-26 23:22:56 +01:00
returnvalue <- c ( list ( texts ) , returnvalue )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
} else {
2020-05-16 20:08:21 +02:00
tbl <- read_html ( atc_url ) %>%
html_nodes ( " table" ) %>%
html_table ( header = TRUE ) %>%
2019-01-26 23:22:56 +01:00
as.data.frame ( stringsAsFactors = FALSE )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# case insensitive column names
2020-05-21 12:28:57 +02:00
colnames ( tbl ) <- gsub ( " ^atc.*" , " atc" , tolower ( colnames ( tbl ) ) )
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
if ( length ( tbl ) == 0 ) {
2019-10-11 17:21:02 +02:00
warning ( " ATC not found: " , atc_code [i ] , " . Please check " , atc_url , " ." , call. = FALSE )
2019-01-26 23:22:56 +01:00
returnvalue [i ] <- NA
next
}
2020-07-13 09:17:24 +02:00
2019-10-11 17:21:02 +02:00
if ( property %in% c ( " atc" , " name" ) ) {
2019-01-26 23:22:56 +01:00
# ATC and name are only in first row
returnvalue [i ] <- tbl [1 , property ]
} else {
2019-10-11 17:21:02 +02:00
if ( ! " adm.r" %in% colnames ( tbl ) | is.na ( tbl [1 , " adm.r" ] ) ) {
2019-01-26 23:22:56 +01:00
returnvalue [i ] <- NA
next
} else {
2019-10-15 14:35:23 +02:00
for ( j in seq_len ( nrow ( tbl ) ) ) {
2019-10-11 17:21:02 +02:00
if ( tbl [j , " adm.r" ] == administration ) {
2019-01-26 23:22:56 +01:00
returnvalue [i ] <- tbl [j , property ]
}
}
}
}
}
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
if ( property == " groups" & length ( returnvalue ) == 1 ) {
returnvalue <- returnvalue [ [1 ] ]
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
returnvalue
}
#' @rdname atc_online
#' @export
atc_online_groups <- function ( atc_code , ... ) {
atc_online_property ( atc_code = atc_code , property = " groups" , ... )
}
#' @rdname atc_online
#' @export
atc_online_ddd <- function ( atc_code , ... ) {
atc_online_property ( atc_code = atc_code , property = " ddd" , ... )
}