1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-11 13:01:55 +02:00

styled, unit test fix

This commit is contained in:
2022-08-28 10:31:50 +02:00
parent 4cb1db4554
commit 4d050aef7c
147 changed files with 10897 additions and 8169 deletions

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -33,34 +33,43 @@ library(dplyr)
library(readxl)
DRGLST <- read_excel("DRGLST.xlsx")
abx <- DRGLST %>%
select(ab = WHON5_CODE,
name = ANTIBIOTIC) %>%
select(
ab = WHON5_CODE,
name = ANTIBIOTIC
) %>%
# remove the ones without WHONET code
filter(!is.na(ab)) %>%
distinct(name, .keep_all = TRUE) %>%
# add the ones without WHONET code
bind_rows(
DRGLST %>%
select(ab = WHON5_CODE,
name = ANTIBIOTIC) %>%
select(
ab = WHON5_CODE,
name = ANTIBIOTIC
) %>%
filter(is.na(ab)) %>%
distinct(name, .keep_all = TRUE)
# add new ab code later
# add new ab code later
) %>%
arrange(name)
# add old ATC codes
ab_old <- AMR::antibiotics %>%
mutate(official = gsub("( and |, )", "/", official),
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
ifelse(is.na(certe), "", certe),
ifelse(is.na(umcg), "", umcg),
sep = "|")))
mutate(
official = gsub("( and |, )", "/", official),
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
ifelse(is.na(certe), "", certe),
ifelse(is.na(umcg), "", umcg),
sep = "|"
))
)
for (i in 1:nrow(ab_old)) {
abbr <- ab_old[i, "abbr"]
abbr <- strsplit(abbr, "|", fixed = TRUE) %>% unlist() %>% unique()
abbr <- strsplit(abbr, "|", fixed = TRUE) %>%
unlist() %>%
unique()
abbr <- abbr[abbr != ""]
#print(abbr)
# print(abbr)
if (length(abbr) == 0) {
ab_old[i, "abbr"] <- NA_character_
} else {
@ -72,50 +81,54 @@ for (i in 1:nrow(ab_old)) {
abx_atc1 <- abx %>%
mutate(name_lower = tolower(name)) %>%
left_join(ab_old %>%
select(ears_net, atc), by = c(ab = "ears_net")) %>%
select(ears_net, atc), by = c(ab = "ears_net")) %>%
rename(atc1 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc2 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc3 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc4 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc5 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
mutate(official = gsub("ine$", "in", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
mutate(official = gsub("ine$", "in", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc6 = atc) %>%
mutate(atc = case_when(!is.na(atc1) ~ atc1,
!is.na(atc2) ~ atc2,
!is.na(atc3) ~ atc3,
!is.na(atc4) ~ atc4,
!is.na(atc4) ~ atc5,
TRUE ~ atc6)) %>%
mutate(atc = case_when(
!is.na(atc1) ~ atc1,
!is.na(atc2) ~ atc2,
!is.na(atc3) ~ atc3,
!is.na(atc4) ~ atc4,
!is.na(atc4) ~ atc5,
TRUE ~ atc6
)) %>%
distinct(ab, name, .keep_all = TRUE) %>%
select(ab, atc, name)
abx_atc2 <- ab_old %>%
filter(!atc %in% abx_atc1$atc,
is.na(ears_net),
!is.na(atc_group1),
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
official %unlike% "(combinations| with )") %>%
filter(
!atc %in% abx_atc1$atc,
is.na(ears_net),
!is.na(atc_group1),
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
official %unlike% "(combinations| with )"
) %>%
mutate(ab = NA_character_) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
select(ab, atc, name = official)
@ -125,12 +138,15 @@ abx2 <- bind_rows(abx_atc1, abx_atc2)
rm(abx_atc1)
rm(abx_atc2)
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub("[/0-9-]",
" ",
abx2$name[is.na(abx2$ab)]),
minlength = 3,
method = "left.kept",
strict = TRUE))
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub(
"[/0-9-]",
" ",
abx2$name[is.na(abx2$ab)]
),
minlength = 3,
method = "left.kept",
strict = TRUE
))
n_distinct(abx2$ab)
@ -150,7 +166,9 @@ for (i in 2:nrow(abx2)) {
abx2[i, "ab"] <- paste0(abx2[i, "ab", drop = TRUE], abx2[i, "seqnr", drop = TRUE])
}
}
abx2 <- abx2 %>% select(-seqnr) %>% arrange(name)
abx2 <- abx2 %>%
select(-seqnr) %>%
arrange(name)
# everything unique??
nrow(abx2) == n_distinct(abx2$ab)
@ -158,8 +176,10 @@ nrow(abx2) == n_distinct(abx2$ab)
# get ATC properties
abx2 <- abx2 %>%
left_join(ab_old %>%
select(atc, abbr, atc_group1, atc_group2,
oral_ddd, oral_units, iv_ddd, iv_units))
select(
atc, abbr, atc_group1, atc_group2,
oral_ddd, oral_units, iv_ddd, iv_units
))
abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixed = TRUE)))
@ -171,29 +191,41 @@ get_CID <- function(ab) {
p$tick()$print()
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=complete"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=complete"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
if (is.na(CID[i])) {
# try with removing the text in brackets
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
"/cids/TXT?name_type=complete"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
"/cids/TXT?name_type=complete"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
}
if (is.na(CID[i])) {
# try match on word and take the lowest CID value (sorted)
ab[i] <- gsub("[^a-z0-9]+", " ", ab[i], ignore.case = TRUE)
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=word"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=word"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
}
Sys.sleep(0.1)
}
@ -203,15 +235,15 @@ get_CID <- function(ab) {
# get CIDs (2-3 min)
CIDs <- get_CID(abx2$name)
# These could not be found:
abx2[is.na(CIDs),] %>% View()
abx2[is.na(CIDs), ] %>% View()
# returns list with synonyms (brand names), with CIDs as names
get_synonyms <- function(CID, clean = TRUE) {
synonyms <- rep(NA_character_, length(CID))
#p <- progress_ticker(n = length(CID), min_time = 0)
# p <- progress_ticker(n = length(CID), min_time = 0)
for (i in 1:length(CID)) {
#p$tick()$print()
# p$tick()$print()
synonyms_txt <- ""
@ -220,27 +252,37 @@ get_synonyms <- function(CID, clean = TRUE) {
}
synonyms_txt <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
CID[i],
"/synonyms/TXT"),
sep = "\n",
showProgress = FALSE)[[1]],
error = function(e) NA_character_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
CID[i],
"/synonyms/TXT"
),
sep = "\n",
showProgress = FALSE
)[[1]],
error = function(e) NA_character_
)
Sys.sleep(0.1)
if (clean == TRUE) {
# remove text between brackets
synonyms_txt <- trimws(gsub("[(].*[)]", "",
gsub("[[].*[]]", "",
gsub("[(].*[]]", "",
gsub("[[].*[)]", "", synonyms_txt)))))
synonyms_txt <- trimws(gsub(
"[(].*[)]", "",
gsub(
"[[].*[]]", "",
gsub(
"[(].*[]]", "",
gsub("[[].*[)]", "", synonyms_txt)
)
)
))
synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
# only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)
& !grepl("[-&{},_0-9/]", synonyms_txt)
& grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) &
!grepl("[-&{},_0-9/]", synonyms_txt) &
grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
}
synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]))
synonyms[i] <- list(sort(synonyms_txt))
@ -251,52 +293,56 @@ get_synonyms <- function(CID, clean = TRUE) {
# get brand names from PubChem (2-3 min)
synonyms <- get_synonyms(CIDs)
synonyms <- lapply(synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}})
synonyms <- lapply(
synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}
}
)
# add them to data set
antibiotics <- abx2 %>%
left_join(DRGLST %>%
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
distinct(ab, .keep_all = TRUE), by = "ab") %>%
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
distinct(ab, .keep_all = TRUE), by = "ab") %>%
transmute(ab,
atc,
cid = CIDs,
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
name = name %>%
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
gsub("edta", "EDTA", ., ignore.case = TRUE),
group = case_when(
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
TRUE ~ "Other antibacterials"
),
atc_group1, atc_group2,
abbreviations = unname(abbr),
synonyms = unname(synonyms),
oral_ddd, oral_units,
iv_ddd, iv_units) %>%
atc,
cid = CIDs,
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
name = name %>%
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
gsub("edta", "EDTA", ., ignore.case = TRUE),
group = case_when(
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
TRUE ~ "Other antibacterials"
),
atc_group1, atc_group2,
abbreviations = unname(abbr),
synonyms = unname(synonyms),
oral_ddd, oral_units,
iv_ddd, iv_units
) %>%
as.data.frame(stringsAsFactors = FALSE)
# some exceptions
@ -329,13 +375,15 @@ antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <-
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
# Add cefoxitin screening
class(antibiotics$ab) <- "character"
antibiotics <- rbind(antibiotics,data.frame(ab = "FOX1", atc = NA, cid = NA,
name = "Cefoxitin screening",
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
abbreviations = "cfsc", synonyms = NA,
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
loinc = NA,
stringsAsFactors = FALSE))
antibiotics <- rbind(antibiotics, data.frame(
ab = "FOX1", atc = NA, cid = NA,
name = "Cefoxitin screening",
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
abbreviations = "cfsc", synonyms = NA,
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
loinc = NA,
stringsAsFactors = FALSE
))
# More GLIMS codes
antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]], "amf"))
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
@ -520,27 +568,33 @@ antibiotics[which(antibiotics$ab == "RFP"), "abbreviations"][[1]] <- list(sort(c
antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]], "RET")))
antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]], "TVN")))
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(ab = "AMH",
name = "Amphotericin B-high",
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")))) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(ab = "TOH",
name = "Tobramycin-high",
abbreviations = list(c("tohl", "tobra high", "tobramycin high")))) %>%
rbind(antibiotics %>%
filter(ab == "BUT") %>%
mutate(ab = "CIX",
atc = "D01AE14",
name = "Ciclopirox",
group = "Antifungals/antimycotics",
atc_group1 = "Antifungals for topical use",
atc_group2 = "Other antifungals for topical use",
abbreviations = list(c("cipx"))))
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(
ab = "AMH",
name = "Amphotericin B-high",
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high"))
)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(
ab = "TOH",
name = "Tobramycin-high",
abbreviations = list(c("tohl", "tobra high", "tobramycin high"))
)) %>%
rbind(antibiotics %>%
filter(ab == "BUT") %>%
mutate(
ab = "CIX",
atc = "D01AE14",
name = "Ciclopirox",
group = "Antifungals/antimycotics",
atc_group1 = "Antifungals for topical use",
atc_group2 = "Other antifungals for topical use",
abbreviations = list(c("cipx"))
))
antibiotics[which(antibiotics$ab == "SSS"), "name"] <- "Sulfonamide"
# ESBL E-test codes:
antibiotics[which(antibiotics$ab == "CCV"), "abbreviations"][[1]] <- list(c("xtzl"))
@ -600,13 +654,13 @@ antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin"
antibiotics[which(antibiotics$ab == "PEN"), "atc"] <- "J01CE01"
# WHONET cleanup
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]], "Bicozamycin"))))
antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]], "Cefcatacol"))))
antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]], "Tuberactinomycin"))))
@ -617,7 +671,7 @@ antibiotics[which(antibiotics$ab == "PMR"), "abbreviations"][[1]] <- list(sort(u
# set cephalosporins groups for the ones that could not be determined automatically:
antibiotics <- antibiotics %>%
antibiotics <- antibiotics %>%
mutate(group = case_when(
name == "Cefcapene" ~ "Cephalosporins (3rd gen.)",
name == "Cefcapene pivoxil" ~ "Cephalosporins (3rd gen.)",
@ -650,21 +704,24 @@ antibiotics <- antibiotics %>%
name == "Ceftolozane/enzyme inhibitor" ~ "Cephalosporins (5th gen.)",
name == "Ceftolozane/tazobactam" ~ "Cephalosporins (5th gen.)",
name == "Cefuroxime axetil" ~ "Cephalosporins (2nd gen.)",
TRUE ~ group))
TRUE ~ group
))
antibiotics[which(antibiotics$ab %in% c("CYC", "LNZ", "THA", "TZD")), "group"] <- "Oxazolidinones"
# add pretomanid
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
mutate(ab = as.character(ab)) %>%
bind_rows(antibiotics %>%
mutate(ab = as.character(ab)) %>%
filter(ab == "SMF") %>%
mutate(ab = "PMD",
atc = "J04AK08",
cid = 456199,
name = "Pretomanid",
abbreviations = list(""),
oral_ddd = NA_real_))
mutate(ab = as.character(ab)) %>%
filter(ab == "SMF") %>%
mutate(
ab = "PMD",
atc = "J04AK08",
cid = 456199,
name = "Pretomanid",
abbreviations = list(""),
oral_ddd = NA_real_
))
@ -675,25 +732,24 @@ antibiotics <- antibiotics %>%
updated_atc <- as.list(antibiotics$atc)
get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
ab_name <- gsub("/", " and ", tolower(ab_name), fixed = TRUE)
# we will do a search on their website, which means:
# go to the url
atc_tbl <- read_html(url) %>%
atc_tbl <- read_html(url) %>%
# get all forms
html_form() %>%
# get the second form (the first form is a global website form)
.[[2]] %>%
.[[2]] %>%
# set the name input box to our search parameter
html_form_set(name = ab_name) %>%
html_form_set(name = ab_name) %>%
# hit Submit
html_form_submit() %>%
html_form_submit() %>%
# read the resulting page
read_html() %>%
read_html() %>%
# retrieve the table on it
html_node("table") %>%
html_node("table") %>%
# transform it to an R data set
html_table(header = FALSE)
# and get the ATCs (first column) of only exact hits
@ -702,9 +758,10 @@ get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
# this takes around 4 minutes (some are skipped and go faster)
for (i in seq_len(nrow(antibiotics))) {
message(percentage(i / nrow(antibiotics), digits = 1),
" - Downloading ", antibiotics$name[i],
appendLF = FALSE)
message(percentage(i / nrow(antibiotics), digits = 1),
" - Downloading ", antibiotics$name[i],
appendLF = FALSE
)
atcs <- get_atcs(antibiotics$name[i])
if (length(atcs) > 0) {
updated_atc[[i]] <- atcs