mirror of https://github.com/msberends/AMR.git
370 lines
13 KiB
R
370 lines
13 KiB
R
# ==================================================================== #
|
|
# TITLE #
|
|
# Antimicrobial Resistance (AMR) Analysis #
|
|
# #
|
|
# AUTHORS #
|
|
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
|
|
# #
|
|
# LICENCE #
|
|
# This program is free software; you can redistribute it and/or modify #
|
|
# it under the terms of the GNU General Public License version 2.0, #
|
|
# as published by the Free Software Foundation. #
|
|
# #
|
|
# This program is distributed in the hope that it will be useful, #
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
# GNU General Public License for more details. #
|
|
# ==================================================================== #
|
|
|
|
#' Properties of an ATC code
|
|
#'
|
|
#' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. \strong{This function requires an internet connection.}
|
|
#' @param atc_code a character or character vector with ATC code(s) of antibiotic(s)
|
|
#' @param property property of an ATC code. Valid values are \code{"ATC code"}, \code{"Name"}, \code{"DDD"}, \code{"U"} (\code{"unit"}), \code{"Adm.R"} en \code{"Note"}.
|
|
#' @param administration type of administration, see \emph{Details}
|
|
#' @param url url of website of the WHO. The sign \code{\%s} can be used as a placeholder for ATC codes.
|
|
#' @details
|
|
#' Abbreviations for the property \code{"Adm.R"} (parameter \code{administration}):
|
|
#' \itemize{
|
|
#' \item{\code{"Implant"}}{ = Implant}
|
|
#' \item{\code{"Inhal"}}{ = Inhalation}
|
|
#' \item{\code{"Instill"}}{ = Instillation}
|
|
#' \item{\code{"N"}}{ = nasal}
|
|
#' \item{\code{"O"}}{ = oral}
|
|
#' \item{\code{"P"}}{ = parenteral}
|
|
#' \item{\code{"R"}}{ = rectal}
|
|
#' \item{\code{"SL"}}{ = sublingual/buccal}
|
|
#' \item{\code{"TD"}}{ = transdermal}
|
|
#' \item{\code{"V"}}{ = vaginal}
|
|
#' }
|
|
#'
|
|
#' Abbreviations for the property \code{"U"} (unit):
|
|
#' \itemize{
|
|
#' \item{\code{"g"}}{ = gram}
|
|
#' \item{\code{"mg"}}{ = milligram}
|
|
#' \item{\code{"mcg"}}{ = microgram}
|
|
#' \item{\code{"U"}}{ = unit}
|
|
#' \item{\code{"TU"}}{ = thousand units}
|
|
#' \item{\code{"MU"}}{ = million units}
|
|
#' \item{\code{"mmol"}}{ = millimole}
|
|
#' \item{\code{"ml"}}{ = milliliter (e.g. eyedrops)}
|
|
#' }
|
|
#' @export
|
|
#' @importFrom dplyr %>% progress_estimated
|
|
#' @importFrom xml2 read_html
|
|
#' @importFrom rvest html_nodes html_table
|
|
#' @source \url{https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/}
|
|
#' @examples
|
|
#' \donttest{
|
|
#' atc_property("J01CA04", "DDD", "O") # oral DDD (Defined Daily Dose) of amoxicillin
|
|
#' atc_property("J01CA04", "DDD", "P") # parenteral DDD (Defined Daily Dose) of amoxicillin
|
|
#' }
|
|
atc_property <- function(atc_code,
|
|
property,
|
|
administration = 'O',
|
|
url = 'https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no') {
|
|
|
|
# property <- property %>% tolower()
|
|
#
|
|
if (property %like% 'unit') {
|
|
property <- 'U'
|
|
}
|
|
|
|
# validation of properties
|
|
valid_properties.bak <- c("ATC code", "Name", "DDD", "U", "Adm.R", "Note")
|
|
valid_properties <- valid_properties.bak #%>% tolower()
|
|
if (!property %in% valid_properties) {
|
|
stop('Invalid `property`, use one of ', paste(valid_properties, collapse = ", "), '.')
|
|
}
|
|
|
|
returnvalue <- rep(NA_character_, length(atc_code))
|
|
if (property == 'DDD') {
|
|
returnvalue <- rep(NA_real_, length(atc_code))
|
|
}
|
|
|
|
progress <- progress_estimated(n = length(atc_code))
|
|
|
|
for (i in 1:length(atc_code)) {
|
|
|
|
progress$tick()$print()
|
|
|
|
atc_url <- sub('%s', atc_code[i], url, fixed = TRUE)
|
|
tbl <- xml2::read_html(atc_url) %>%
|
|
rvest::html_nodes('table') %>%
|
|
rvest::html_table(header = TRUE)
|
|
|
|
if (length(tbl) == 0) {
|
|
warning('ATC not found: ', atc_code[i], '. Please check ', atc_url, '.', call. = FALSE)
|
|
returnvalue[i] <- NA
|
|
next
|
|
}
|
|
|
|
tbl <- tbl[[1]]
|
|
|
|
if (property == 'Name') {
|
|
returnvalue[i] <- tbl[1, 2]
|
|
} else {
|
|
|
|
names(returnvalue)[i] <- tbl[1, 2] %>% as.character()
|
|
|
|
if (!'Adm.R' %in% colnames(tbl) | is.na(tbl[1, 'Adm.R'])) {
|
|
returnvalue[i] <- NA
|
|
next
|
|
} else {
|
|
for (j in 1:nrow(tbl)) {
|
|
if (tbl[j, 'Adm.R'] == administration) {
|
|
returnvalue[i] <- tbl[j, property]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
cat('\n')
|
|
returnvalue
|
|
|
|
}
|
|
|
|
#' Name of an antibiotic
|
|
#'
|
|
#' Convert antibiotic codes (from a laboratory information system like MOLIS or GLIMS) to a (trivial) antibiotic name or ATC code, or vice versa. This uses the data from \code{\link{antibiotics}}.
|
|
#' @param abcode a code or name, like \code{"AMOX"}, \code{"AMCL"} or \code{"J01CA04"}
|
|
#' @param from,to type to transform from and to. See \code{\link{antibiotics}} for its column names. WIth \code{from = "guess"} the from will be guessed from \code{"atc"}, \code{"molis"} and \code{"umcg"}.
|
|
#' @param textbetween text to put between multiple returned texts
|
|
#' @param tolower return output as lower case with function \code{\link{tolower}}.
|
|
#' @keywords ab antibiotics
|
|
#' @source \code{\link{antibiotics}}
|
|
#' @export
|
|
#' @importFrom dplyr %>% filter select slice
|
|
#' @examples
|
|
#' abname("AMCL")
|
|
#' # "amoxicillin and enzyme inhibitor"
|
|
#'
|
|
#' abname("AMCL+GENT")
|
|
#' # "amoxicillin and enzyme inhibitor + gentamicin"
|
|
#'
|
|
#' abname(c("AMCL", "GENT"))
|
|
#' # "amoxicillin and enzyme inhibitor" "gentamicin"
|
|
#'
|
|
#' abname("AMCL", to = "trivial_nl")
|
|
#' # "Amoxicilline/clavulaanzuur"
|
|
#'
|
|
#' abname("AMCL", to = "atc")
|
|
#' # "J01CR02"
|
|
#'
|
|
#' abname("J01CR02", from = "atc", to = "umcg")
|
|
#' # "AMCL"
|
|
abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'official', textbetween = ' + ', tolower = FALSE) {
|
|
|
|
antibiotics <- AMR::antibiotics
|
|
|
|
from <- from[1]
|
|
if (from == "guess") {
|
|
for (i in 1:3) {
|
|
if (abcode[1] %in% (antibiotics %>% pull(i))) {
|
|
from <- colnames(antibiotics)[i]
|
|
}
|
|
}
|
|
if (from == "guess") {
|
|
from <- "umcg"
|
|
}
|
|
}
|
|
|
|
colnames(antibiotics) <- colnames(antibiotics) %>% tolower()
|
|
from <- from %>% tolower()
|
|
to <- to %>% tolower()
|
|
|
|
if (!from %in% colnames(antibiotics) |
|
|
!to %in% colnames(antibiotics)) {
|
|
stop(paste0('Invalid `from` or `to`. Choose one of ',
|
|
colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)
|
|
}
|
|
|
|
abcode <- as.character(abcode)
|
|
|
|
for (i in 1:length(abcode)) {
|
|
drug <- abcode[i]
|
|
if (!grepl('+', drug, fixed = TRUE) & !grepl(' en ', drug, fixed = TRUE)) {
|
|
# only 1 drug
|
|
if (drug %in% (antibiotics %>% pull(from))) {
|
|
abcode[i] <-
|
|
antibiotics %>%
|
|
filter(.[, from] == drug) %>%
|
|
select(to) %>%
|
|
slice(1) %>%
|
|
as.character()
|
|
} else {
|
|
# not found
|
|
warning('Code "', drug, '" not found in antibiotics list.', call. = FALSE)
|
|
abcode[i] <- NA
|
|
}
|
|
} else {
|
|
# more than 1 drug
|
|
if (grepl('+', drug, fixed = TRUE)) {
|
|
drug.group <-
|
|
strsplit(drug, '+', fixed = TRUE) %>%
|
|
unlist() %>%
|
|
trimws('both')
|
|
} else if (grepl(' en ', drug, fixed = TRUE)) {
|
|
drug.group <-
|
|
strsplit(drug, ' en ', fixed = TRUE) %>%
|
|
unlist() %>%
|
|
trimws('both')
|
|
} else {
|
|
warning('Invalid concat.')
|
|
abcode[i] <- NA
|
|
next
|
|
}
|
|
|
|
for (j in 1:length(drug.group)) {
|
|
drug.group[j] <-
|
|
antibiotics %>%
|
|
filter(.[, from] == drug.group[j]) %>%
|
|
select(to) %>%
|
|
slice(1) %>%
|
|
as.character()
|
|
if (j > 1 & to %in% c('official', 'trivial_nl')) {
|
|
drug.group[j] <- drug.group[j] %>% tolower()
|
|
}
|
|
}
|
|
abcode[i] <- paste(drug.group, collapse = textbetween)
|
|
}
|
|
}
|
|
|
|
if (tolower == TRUE) {
|
|
abcode <- abcode %>% tolower()
|
|
}
|
|
|
|
abcode
|
|
}
|
|
|
|
#' Find bacteria ID based on genus/species
|
|
#'
|
|
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
|
#' @param x character vector to determine \code{bactid}
|
|
#' @export
|
|
#' @importFrom dplyr %>% filter slice pull
|
|
#' @return Character (vector).
|
|
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
|
#' @examples
|
|
#' # These examples all return "STAAUR", the ID of S. aureus:
|
|
#' guess_bactid("stau")
|
|
#' guess_bactid("STAU")
|
|
#' guess_bactid("staaur")
|
|
#' guess_bactid("S. aureus")
|
|
#' guess_bactid("S aureus")
|
|
#' guess_bactid("Staphylococcus aureus")
|
|
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
|
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
|
guess_bactid <- function(x) {
|
|
# remove dots and other non-text in case of "E. coli" except spaces
|
|
x <- gsub("[^a-zA-Z ]+", "", x)
|
|
# but spaces before and after should be omitted
|
|
x <- trimws(x, which = "both")
|
|
x.bak <- x
|
|
# replace space by regex sign
|
|
x <- gsub(" ", ".*", x, fixed = TRUE)
|
|
# add start and stop
|
|
x_species <- paste(x, 'species')
|
|
x <- paste0('^', x, '$')
|
|
|
|
for (i in 1:length(x)) {
|
|
if (tolower(x[i]) == '^e.*coli$') {
|
|
# avoid detection of Entamoeba coli in case of E. coli
|
|
x[i] <- 'Escherichia coli'
|
|
}
|
|
if (tolower(x[i]) == '^h.*influenzae$') {
|
|
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
|
x[i] <- 'Haemophilus influenzae'
|
|
}
|
|
if (tolower(x[i]) == '^st.*au$'
|
|
| tolower(x[i]) == '^stau$'
|
|
| tolower(x[i]) == '^staaur$') {
|
|
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
|
x[i] <- 'Staphylococcus aureus'
|
|
}
|
|
if (tolower(x[i]) == '^p.*aer$') {
|
|
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
|
x[i] <- 'Pseudomonas aeruginosa'
|
|
}
|
|
|
|
# translate known trivial names to genus+species
|
|
if (toupper(x.bak[i]) == 'MRSA'
|
|
| toupper(x.bak[i]) == 'VISA'
|
|
| toupper(x.bak[i]) == 'VRSA') {
|
|
x[i] <- 'Staphylococcus aureus'
|
|
}
|
|
if (toupper(x.bak[i]) == 'MRSE') {
|
|
x[i] <- 'Staphylococcus epidermidis'
|
|
}
|
|
if (toupper(x.bak[i]) == 'VRE') {
|
|
x[i] <- 'Enterococcus'
|
|
}
|
|
if (toupper(x.bak[i]) == 'MRPA') {
|
|
# multi resistant P. aeruginosa
|
|
x[i] <- 'Pseudomonas aeruginosa'
|
|
}
|
|
if (toupper(x.bak[i]) == 'PISP'
|
|
| toupper(x.bak[i]) == 'PRSP') {
|
|
# peni resistant S. pneumoniae
|
|
x[i] <- 'Streptococcus pneumoniae'
|
|
}
|
|
if (toupper(x.bak[i]) == 'VISP'
|
|
| toupper(x.bak[i]) == 'VRSP') {
|
|
# vanco resistant S. pneumoniae
|
|
x[i] <- 'Streptococcus pneumoniae'
|
|
}
|
|
|
|
# let's try the ID's first
|
|
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
|
|
|
if (nrow(found) == 0) {
|
|
# now try exact match
|
|
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
|
}
|
|
if (nrow(found) == 0) {
|
|
# try any match
|
|
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
|
}
|
|
if (nrow(found) == 0) {
|
|
# try only genus, with 'species' attached
|
|
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
|
}
|
|
if (nrow(found) == 0) {
|
|
# search for GLIMS code
|
|
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
|
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
|
}
|
|
}
|
|
if (nrow(found) == 0) {
|
|
# try splitting of characters and then find ID
|
|
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
|
x_split <- x
|
|
x_length <- nchar(x.bak[i])
|
|
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
|
'.* ',
|
|
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
|
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
|
}
|
|
if (nrow(found) == 0) {
|
|
# try any match with text before and after original search string
|
|
# so "negative rods" will be "GNR"
|
|
if (x.bak[i] %like% "^Gram") {
|
|
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
|
# remove leading and trailing spaces again
|
|
x.bak[i] <- trimws(x.bak[i], which = "both")
|
|
}
|
|
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
|
}
|
|
|
|
if (nrow(found) != 0) {
|
|
x[i] <- found %>%
|
|
slice(1) %>%
|
|
pull(bactid)
|
|
} else {
|
|
x[i] <- ""
|
|
}
|
|
}
|
|
x
|
|
}
|