mirror of
https://github.com/msberends/AMR.git
synced 2024-12-27 12:06:14 +01:00
179 lines
6.7 KiB
R
179 lines
6.7 KiB
R
# ==================================================================== #
|
|
# TITLE: #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
|
# #
|
|
# SOURCE CODE: #
|
|
# https://github.com/msberends/AMR #
|
|
# #
|
|
# PLEASE CITE THIS SOFTWARE AS: #
|
|
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
|
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
|
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
|
# https://doi.org/10.18637/jss.v104.i03 #
|
|
# #
|
|
# Developed at the University of Groningen and the University Medical #
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
# colleagues from around the world, see our website. #
|
|
# #
|
|
# This R package is free software; you can freely use and distribute #
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
# the Free Software Foundation. #
|
|
# We created this package for both routine data analysis and academic #
|
|
# research and it was publicly released in the hope that it will be #
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
# #
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
|
# ==================================================================== #
|
|
|
|
library(dplyr)
|
|
library(readxl)
|
|
library(cleaner)
|
|
|
|
# URL:
|
|
# https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/Dosages_v_13.0_Breakpoint_Tables.pdf
|
|
# download the PDF file, open in Adobe Acrobat and export as Excel workbook
|
|
breakpoints_version <- 13
|
|
|
|
dosage_source <- read_excel("data-raw/Dosages_v_12.0_Breakpoint_Tables.xlsx", skip = 4, na = "None") %>%
|
|
format_names(snake_case = TRUE, penicillins = "drug") %>%
|
|
filter(!tolower(standard_dosage) %in% c("standard dosage", "standard dosage_source", "under review")) %>%
|
|
filter(!is.na(standard_dosage)) %>%
|
|
# keep only one drug in the table
|
|
arrange(desc(drug)) %>%
|
|
mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
|
|
# distinct(drug, .keep_all = TRUE) %>%
|
|
arrange(drug) %>%
|
|
mutate(
|
|
ab = as.ab(drug),
|
|
ab_name = ab_name(ab, language = NULL)
|
|
)
|
|
|
|
dosage_source <- bind_rows(
|
|
# oral
|
|
dosage_source %>%
|
|
filter(standard_dosage %like% " oral") %>%
|
|
mutate(
|
|
standard_dosage = gsub("oral.*", "oral", standard_dosage),
|
|
high_dosage = if_else(high_dosage %like% "oral",
|
|
gsub("oral.*", "oral", high_dosage),
|
|
NA_character_
|
|
)
|
|
),
|
|
# iv
|
|
dosage_source %>%
|
|
filter(standard_dosage %like% " iv") %>%
|
|
mutate(
|
|
standard_dosage = gsub(".* or ", "", standard_dosage),
|
|
high_dosage = if_else(high_dosage %like% "( or | iv)",
|
|
gsub(".* or ", "", high_dosage),
|
|
NA_character_
|
|
)
|
|
),
|
|
# im
|
|
dosage_source %>%
|
|
filter(standard_dosage %like% " im")
|
|
) %>%
|
|
arrange(drug)
|
|
|
|
|
|
get_dosage_lst <- function(col_data) {
|
|
standard <- col_data %>%
|
|
# remove new lines
|
|
gsub(" ?(\n|\t)+ ?", " ", .) %>%
|
|
# keep only the first suggestion, replace all after 'or' and more informative texts
|
|
gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
|
|
# remove (1 MU)
|
|
gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
|
|
# remove parentheses
|
|
gsub("[)(]", "", .) %>%
|
|
# remove drug names
|
|
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
|
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
|
gsub(" (acid|dose)", "", .) # %>%
|
|
# keep lowest value only (25-30 mg -> 25 mg)
|
|
# gsub("[-].*? ", " ", .)
|
|
|
|
dosage_lst <- lapply(
|
|
strsplit(standard, " x "),
|
|
function(x) {
|
|
dose <- x[1]
|
|
if (dose %like% "under") {
|
|
dose <- NA_character_
|
|
}
|
|
admin <- x[2]
|
|
|
|
list(
|
|
dose = trimws(dose),
|
|
dose_times = gsub("^([0-9.]+).*", "\\1", admin),
|
|
administration = clean_character(admin),
|
|
notes = "",
|
|
original_txt = ""
|
|
)
|
|
}
|
|
)
|
|
for (i in seq_len(length(col_data))) {
|
|
dosage_lst[[i]]$original_txt <- gsub("\n", " ", col_data[i])
|
|
if (col_data[i] %like% " (or|with|loading|depending|over) ") {
|
|
dosage_lst[[i]]$notes <- gsub("\n", " ", gsub(".* ((or|with|loading|depending|over) .*)", "\\1", col_data[i]))
|
|
}
|
|
}
|
|
dosage_lst
|
|
}
|
|
|
|
standard <- get_dosage_lst(dosage_source$standard_dosage)
|
|
high <- get_dosage_lst(dosage_source$high_dosage)
|
|
uti <- get_dosage_lst(dosage_source$uncomplicated_uti)
|
|
dosage_new <- bind_rows(
|
|
# standard dose
|
|
data.frame(
|
|
ab = dosage_source$ab,
|
|
name = dosage_source$ab_name,
|
|
type = "standard_dosage",
|
|
dose = sapply(standard, function(x) x$dose),
|
|
dose_times = sapply(standard, function(x) x$dose_times),
|
|
administration = sapply(standard, function(x) x$administration),
|
|
notes = sapply(standard, function(x) x$notes),
|
|
original_txt = sapply(standard, function(x) x$original_txt),
|
|
stringsAsFactors = FALSE
|
|
),
|
|
# high dose
|
|
data.frame(
|
|
ab = dosage_source$ab,
|
|
name = dosage_source$ab_name,
|
|
type = "high_dosage",
|
|
dose = sapply(high, function(x) x$dose),
|
|
dose_times = sapply(high, function(x) x$dose_times),
|
|
administration = sapply(high, function(x) x$administration),
|
|
notes = sapply(high, function(x) x$notes),
|
|
original_txt = sapply(high, function(x) x$original_txt),
|
|
stringsAsFactors = FALSE
|
|
),
|
|
# UTIs
|
|
data.frame(
|
|
ab = dosage_source$ab,
|
|
name = dosage_source$ab_name,
|
|
type = "uncomplicated_uti",
|
|
dose = sapply(uti, function(x) x$dose),
|
|
dose_times = sapply(uti, function(x) x$dose_times),
|
|
administration = sapply(uti, function(x) x$administration),
|
|
notes = sapply(uti, function(x) x$notes),
|
|
original_txt = sapply(uti, function(x) x$original_txt),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
) %>%
|
|
mutate(
|
|
eucast_version = breakpoints_version,
|
|
dose_times = as.integer(dose_times),
|
|
administration = gsub("([a-z]+) .*", "\\1", administration)
|
|
) %>%
|
|
arrange(name, administration, type) %>%
|
|
filter(!is.na(dose), dose != ".") %>%
|
|
# this makes it a tibble as well:
|
|
dataset_UTF8_to_ASCII()
|
|
|
|
dosage <- bind_rows(dosage_new, AMR::dosage)
|
|
|
|
usethis::use_data(dosage, internal = FALSE, overwrite = TRUE, version = 2, compress = "xz")
|