1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 19:01:51 +02:00

styled, unit test fix

This commit is contained in:
2022-08-28 10:31:50 +02:00
parent 4cb1db4554
commit 4d050aef7c
147 changed files with 10897 additions and 8169 deletions

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -28,10 +28,14 @@ install.packages("data-raw/tinytest_1.3.1.tar.gz", dependencies = c("Depends", "
install.packages(getwd(), repos = NULL, type = "source")
# install.packages("data-raw/AMR_latest.tar.gz", dependencies = FALSE)
pkg_suggests <- gsub("[^a-zA-Z0-9]+", "",
unlist(strsplit(unlist(packageDescription("AMR",
fields = c("Suggests", "Enhances", "LinkingTo"))),
split = ", ?")))
pkg_suggests <- gsub(
"[^a-zA-Z0-9]+", "",
unlist(strsplit(unlist(packageDescription("AMR",
fields = c("Suggests", "Enhances", "LinkingTo")
)),
split = ", ?"
))
)
pkg_suggests <- unname(pkg_suggests[!is.na(pkg_suggests)])
cat("################################################\n")
cat("Packages listed in Suggests/Enhances:", paste(pkg_suggests, collapse = ", "), "\n")
@ -49,22 +53,26 @@ if (length(to_install) == 0) {
for (i in seq_len(length(to_install))) {
cat("Installing package", to_install[i], "\n")
tryCatch(install.packages(to_install[i],
type = "source",
repos = "https://cran.rstudio.com/",
dependencies = c("Depends", "Imports", "LinkingTo"),
quiet = FALSE),
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message))
type = "source",
repos = "https://cran.rstudio.com/",
dependencies = c("Depends", "Imports", "LinkingTo"),
quiet = FALSE
),
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message)
)
if (.Platform$OS.type != "unix" && !to_install[i] %in% rownames(utils::installed.packages())) {
tryCatch(install.packages(to_install[i],
type = "binary",
repos = "https://cran.rstudio.com/",
dependencies = c("Depends", "Imports", "LinkingTo"),
quiet = FALSE),
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message))
type = "binary",
repos = "https://cran.rstudio.com/",
dependencies = c("Depends", "Imports", "LinkingTo"),
quiet = FALSE
),
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message)
)
}
}
@ -76,8 +84,9 @@ if (length(to_update) == 0) {
for (i in seq_len(length(to_update))) {
cat("Updating package '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
tryCatch(update.packages(to_update[i], repos = "https://cran.rstudio.com/", ask = FALSE),
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message))
# message = function(m) invisible(),
warning = function(w) message(w$message),
error = function(e) message(e$message)
)
cat("Updated to '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
}

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -28,9 +28,10 @@
if (!file.exists("DESCRIPTION") || !"Package: AMR" %in% readLines("DESCRIPTION")) {
stop("Be sure to run this script in the root location of the AMR package folder.\n",
"Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
"Current working directory: ", getwd(),
call. = FALSE)
"Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
"Current working directory: ", getwd(),
call. = FALSE
)
}
# save old global env to restore later
@ -41,34 +42,42 @@ load("R/sysdata.rda", envir = lang_env)
# replace language objects with updates
message("Reading translation file...")
lang_env$TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
lang_env$TRANSLATIONS <- utils::read.delim(
file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = ""
)
lang_env$LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
lapply(lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
function(x) list(exonym = x[1], endonym = x[2])))
lang_env$LANGUAGES_SUPPORTED_NAMES <- c(
list(en = list(exonym = "English", endonym = "English")),
lapply(
lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
function(x) list(exonym = x[1], endonym = x[2])
)
)
lang_env$LANGUAGES_SUPPORTED <- names(lang_env$LANGUAGES_SUPPORTED_NAMES)
# save env to internal package data
# usethis::use_data() does not allow to save a list :(
message("Saving to internal data...")
save(list = names(lang_env),
file = "R/sysdata.rda",
ascii = FALSE,
version = 2,
compress = "xz",
envir = lang_env)
save(
list = names(lang_env),
file = "R/sysdata.rda",
ascii = FALSE,
version = 2,
compress = "xz",
envir = lang_env
)
rm(lang_env)

View File

@ -24,7 +24,7 @@
# ==================================================================== #
# Run this file to update the package using:
# source("data-raw/pre-commit-hook.R")
# source("data-raw/_pre_commit_hook.R")
library(dplyr, warn.conflicts = FALSE)
devtools::load_all(quiet = TRUE)
@ -36,41 +36,54 @@ old_globalenv <- ls(envir = globalenv())
# Save internal data to R/sysdata.rda -------------------------------------
# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
skip = 10,
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
strip.white = TRUE,
na = c(NA, "", NULL)) %>%
EUCAST_RULES_DF <- utils::read.delim(
file = "data-raw/eucast_rules.tsv",
skip = 10,
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
strip.white = TRUE,
na = c(NA, "", NULL)
) %>%
# take the order of the reference.rule_group column in the original data file
mutate(reference.rule_group = factor(reference.rule_group,
levels = unique(reference.rule_group),
ordered = TRUE),
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
arrange(reference.rule_group,
reference.version,
sorting_rule,
reference.rule) %>%
mutate(
reference.rule_group = factor(reference.rule_group,
levels = unique(reference.rule_group),
ordered = TRUE
),
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)
) %>%
arrange(
reference.rule_group,
reference.version,
sorting_rule,
reference.rule
) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
select(-sorting_rule)
TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
TRANSLATIONS <- utils::read.delim(
file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = ""
)
LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
function(x) list(exonym = x[1], endonym = x[2])))
LANGUAGES_SUPPORTED_NAMES <- c(
list(en = list(exonym = "English", endonym = "English")),
lapply(
TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
function(x) list(exonym = x[1], endonym = x[2])
)
)
LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)
@ -84,43 +97,53 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
MO_staph <- AMR::microorganisms
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
if (type == "CoNS") {
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
"croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis",
"equorum", "felis", "fleurettii", "gallinarum",
"haemolyticus", "hominis", "jettensis", "kloosii",
"lentus", "lugdunensis", "massiliensis", "microti",
"muscae", "nepalensis", "pasteuri", "petrasii",
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
"sciuri", "simulans", "stepanovicii", "succinus",
"ureilyticus",
"vitulinus", "vitulus", "warneri", "xylosus",
"caledonicus", "canis",
"durrellii", "lloydii")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
"mo", drop = TRUE]
MO_staph[which(MO_staph$species %in% c(
"coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
"croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis",
"equorum", "felis", "fleurettii", "gallinarum",
"haemolyticus", "hominis", "jettensis", "kloosii",
"lentus", "lugdunensis", "massiliensis", "microti",
"muscae", "nepalensis", "pasteuri", "petrasii",
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
"sciuri", "simulans", "stepanovicii", "succinus",
"ureilyticus",
"vitulinus", "vitulus", "warneri", "xylosus",
"caledonicus", "canis",
"durrellii", "lloydii"
) |
(MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
"mo",
drop = TRUE
]
} else if (type == "CoPS") {
MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
"agnetis", "argenteus",
"cornubiensis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schweitzeri", "simiae",
"roterodami")
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
"mo", drop = TRUE]
MO_staph[which(MO_staph$species %in% c(
"coagulase-positive", "coagulans",
"agnetis", "argenteus",
"cornubiensis",
"delphini", "lutrae",
"hyicus", "intermedius",
"pseudintermedius", "pseudointermedius",
"schweitzeri", "simiae",
"roterodami"
) |
(MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
"mo",
drop = TRUE
]
}
}
create_MO_fullname_lower <- function() {
MO_lookup <- AMR::microorganisms
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$species,
MO_lookup$subspecies)))
MO_lookup$fullname_lower <- tolower(trimws(paste(
MO_lookup$genus,
MO_lookup$species,
MO_lookup$subspecies
)))
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname", drop = TRUE])
MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
@ -130,59 +153,103 @@ MO_CONS <- create_species_cons_cops("CoNS")
MO_COPS <- create_species_cons_cops("CoPS")
MO_STREP_ABCG <- as.mo(MO_lookup[which(MO_lookup$genus == "Streptococcus"), "mo", drop = TRUE], Lancefield = TRUE) %in% c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_GRPC", "B_STRPT_GRPG")
MO_FULLNAME_LOWER <- create_MO_fullname_lower()
MO_PREVALENT_GENERA <- c("Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
"Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
"Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
"Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
"Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
"Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
"Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
"Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
"Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
"Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
"Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
"Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
"Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
"Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
"Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
"Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
"Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
"Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
"Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
"Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
"Weeksella", "Wuchereria")
MO_PREVALENT_GENERA <- c(
"Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
"Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
"Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
"Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
"Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
"Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
"Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
"Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
"Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
"Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
"Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
"Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
"Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
"Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
"Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
"Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
"Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
"Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
"Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
"Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
"Weeksella", "Wuchereria"
)
# antibiotic groups
# (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv)
globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab")
AB_AMINOGLYCOSIDES <- antibiotics %>% filter(group %like% "aminoglycoside") %>% pull(ab)
AB_AMINOGLYCOSIDES <- antibiotics %>%
filter(group %like% "aminoglycoside") %>%
pull(ab)
AB_AMINOPENICILLINS <- as.ab(c("AMP", "AMX"))
AB_ANTIFUNGALS <- AB_lookup %>% filter(group %like% "antifungal") %>% pull(ab)
AB_ANTIMYCOBACTERIALS <- AB_lookup %>% filter(group %like% "antimycobacterial") %>% pull(ab)
AB_CARBAPENEMS <- antibiotics %>% filter(group %like% "carbapenem") %>% pull(ab)
AB_CEPHALOSPORINS <- antibiotics %>% filter(group %like% "cephalosporin") %>% pull(ab)
AB_CEPHALOSPORINS_1ST <- antibiotics %>% filter(group %like% "cephalosporin.*1") %>% pull(ab)
AB_CEPHALOSPORINS_2ND <- antibiotics %>% filter(group %like% "cephalosporin.*2") %>% pull(ab)
AB_CEPHALOSPORINS_3RD <- antibiotics %>% filter(group %like% "cephalosporin.*3") %>% pull(ab)
AB_CEPHALOSPORINS_4TH <- antibiotics %>% filter(group %like% "cephalosporin.*4") %>% pull(ab)
AB_CEPHALOSPORINS_5TH <- antibiotics %>% filter(group %like% "cephalosporin.*5") %>% pull(ab)
AB_ANTIFUNGALS <- AB_lookup %>%
filter(group %like% "antifungal") %>%
pull(ab)
AB_ANTIMYCOBACTERIALS <- AB_lookup %>%
filter(group %like% "antimycobacterial") %>%
pull(ab)
AB_CARBAPENEMS <- antibiotics %>%
filter(group %like% "carbapenem") %>%
pull(ab)
AB_CEPHALOSPORINS <- antibiotics %>%
filter(group %like% "cephalosporin") %>%
pull(ab)
AB_CEPHALOSPORINS_1ST <- antibiotics %>%
filter(group %like% "cephalosporin.*1") %>%
pull(ab)
AB_CEPHALOSPORINS_2ND <- antibiotics %>%
filter(group %like% "cephalosporin.*2") %>%
pull(ab)
AB_CEPHALOSPORINS_3RD <- antibiotics %>%
filter(group %like% "cephalosporin.*3") %>%
pull(ab)
AB_CEPHALOSPORINS_4TH <- antibiotics %>%
filter(group %like% "cephalosporin.*4") %>%
pull(ab)
AB_CEPHALOSPORINS_5TH <- antibiotics %>%
filter(group %like% "cephalosporin.*5") %>%
pull(ab)
AB_CEPHALOSPORINS_EXCEPT_CAZ <- AB_CEPHALOSPORINS[AB_CEPHALOSPORINS != "CAZ"]
AB_FLUOROQUINOLONES <- antibiotics %>% filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>% pull(ab)
AB_GLYCOPEPTIDES <- antibiotics %>% filter(group %like% "glycopeptide") %>% pull(ab)
AB_FLUOROQUINOLONES <- antibiotics %>%
filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>%
pull(ab)
AB_GLYCOPEPTIDES <- antibiotics %>%
filter(group %like% "glycopeptide") %>%
pull(ab)
AB_LIPOGLYCOPEPTIDES <- as.ab(c("DAL", "ORI", "TLV")) # dalba/orita/tela
AB_GLYCOPEPTIDES_EXCEPT_LIPO <- AB_GLYCOPEPTIDES[!AB_GLYCOPEPTIDES %in% AB_LIPOGLYCOPEPTIDES]
AB_LINCOSAMIDES <- antibiotics %>% filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>% pull(ab)
AB_MACROLIDES <- antibiotics %>% filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>% pull(ab)
AB_OXAZOLIDINONES <- antibiotics %>% filter(group %like% "oxazolidinone") %>% pull(ab)
AB_PENICILLINS <- antibiotics %>% filter(group %like% "penicillin") %>% pull(ab)
AB_POLYMYXINS <- antibiotics %>% filter(group %like% "polymyxin") %>% pull(ab)
AB_QUINOLONES <- antibiotics %>% filter(group %like% "quinolone") %>% pull(ab)
AB_STREPTOGRAMINS <- antibiotics %>% filter(atc_group2 %like% "streptogramin") %>% pull(ab)
AB_TETRACYCLINES <- antibiotics %>% filter(group %like% "tetracycline") %>% pull(ab)
AB_LINCOSAMIDES <- antibiotics %>%
filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>%
pull(ab)
AB_MACROLIDES <- antibiotics %>%
filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>%
pull(ab)
AB_OXAZOLIDINONES <- antibiotics %>%
filter(group %like% "oxazolidinone") %>%
pull(ab)
AB_PENICILLINS <- antibiotics %>%
filter(group %like% "penicillin") %>%
pull(ab)
AB_POLYMYXINS <- antibiotics %>%
filter(group %like% "polymyxin") %>%
pull(ab)
AB_QUINOLONES <- antibiotics %>%
filter(group %like% "quinolone") %>%
pull(ab)
AB_STREPTOGRAMINS <- antibiotics %>%
filter(atc_group2 %like% "streptogramin") %>%
pull(ab)
AB_TETRACYCLINES <- antibiotics %>%
filter(group %like% "tetracycline") %>%
pull(ab)
AB_TETRACYCLINES_EXCEPT_TGC <- AB_TETRACYCLINES[AB_TETRACYCLINES != "TGC"]
AB_TRIMETHOPRIMS <- antibiotics %>% filter(group %like% "trimethoprim") %>% pull(ab)
AB_TRIMETHOPRIMS <- antibiotics %>%
filter(group %like% "trimethoprim") %>%
pull(ab)
AB_UREIDOPENICILLINS <- as.ab(c("PIP", "TZP", "AZL", "MEZ"))
AB_BETALACTAMS <- c(AB_PENICILLINS, AB_CEPHALOSPORINS, AB_CARBAPENEMS)
# this will be used for documentation:
@ -194,15 +261,21 @@ create_AB_lookup <- function() {
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
c("ab", "atc", "cid", "name",
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
drop = FALSE]),
stringsAsFactors = FALSE)),
function(x) {
x <- generalise_antibiotic_name(unname(unlist(x)))
x[x != ""]
}))
AB_lookup$generalised_all <- unname(lapply(
as.list(as.data.frame(t(AB_lookup[,
c(
"ab", "atc", "cid", "name",
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]
),
drop = FALSE
]),
stringsAsFactors = FALSE
)),
function(x) {
x <- generalise_antibiotic_name(unname(unlist(x)))
x[x != ""]
}
))
AB_lookup[, colnames(AB_lookup)[colnames(AB_lookup) %like% "^generalised"]]
}
AB_LOOKUP <- create_AB_lookup()
@ -210,48 +283,49 @@ AB_LOOKUP <- create_AB_lookup()
# Export to package as internal data ----
usethis::ui_info(paste0("Saving {usethis::ui_value('sysdata.rda')} to {usethis::ui_value('R/')}"))
suppressMessages(usethis::use_data(EUCAST_RULES_DF,
TRANSLATIONS,
LANGUAGES_SUPPORTED_NAMES,
LANGUAGES_SUPPORTED,
MO_CONS,
MO_COPS,
MO_STREP_ABCG,
MO_FULLNAME_LOWER,
MO_PREVALENT_GENERA,
AB_LOOKUP,
AB_AMINOGLYCOSIDES,
AB_AMINOPENICILLINS,
AB_ANTIFUNGALS,
AB_ANTIMYCOBACTERIALS,
AB_CARBAPENEMS,
AB_CEPHALOSPORINS,
AB_CEPHALOSPORINS_1ST,
AB_CEPHALOSPORINS_2ND,
AB_CEPHALOSPORINS_3RD,
AB_CEPHALOSPORINS_4TH,
AB_CEPHALOSPORINS_5TH,
AB_CEPHALOSPORINS_EXCEPT_CAZ,
AB_FLUOROQUINOLONES,
AB_LIPOGLYCOPEPTIDES,
AB_GLYCOPEPTIDES,
AB_GLYCOPEPTIDES_EXCEPT_LIPO,
AB_LINCOSAMIDES,
AB_MACROLIDES,
AB_OXAZOLIDINONES,
AB_PENICILLINS,
AB_POLYMYXINS,
AB_QUINOLONES,
AB_STREPTOGRAMINS,
AB_TETRACYCLINES,
AB_TETRACYCLINES_EXCEPT_TGC,
AB_TRIMETHOPRIMS,
AB_UREIDOPENICILLINS,
AB_BETALACTAMS,
DEFINED_AB_GROUPS,
internal = TRUE,
overwrite = TRUE,
version = 2,
compress = "xz"))
TRANSLATIONS,
LANGUAGES_SUPPORTED_NAMES,
LANGUAGES_SUPPORTED,
MO_CONS,
MO_COPS,
MO_STREP_ABCG,
MO_FULLNAME_LOWER,
MO_PREVALENT_GENERA,
AB_LOOKUP,
AB_AMINOGLYCOSIDES,
AB_AMINOPENICILLINS,
AB_ANTIFUNGALS,
AB_ANTIMYCOBACTERIALS,
AB_CARBAPENEMS,
AB_CEPHALOSPORINS,
AB_CEPHALOSPORINS_1ST,
AB_CEPHALOSPORINS_2ND,
AB_CEPHALOSPORINS_3RD,
AB_CEPHALOSPORINS_4TH,
AB_CEPHALOSPORINS_5TH,
AB_CEPHALOSPORINS_EXCEPT_CAZ,
AB_FLUOROQUINOLONES,
AB_LIPOGLYCOPEPTIDES,
AB_GLYCOPEPTIDES,
AB_GLYCOPEPTIDES_EXCEPT_LIPO,
AB_LINCOSAMIDES,
AB_MACROLIDES,
AB_OXAZOLIDINONES,
AB_PENICILLINS,
AB_POLYMYXINS,
AB_QUINOLONES,
AB_STREPTOGRAMINS,
AB_TETRACYCLINES,
AB_TETRACYCLINES_EXCEPT_TGC,
AB_TRIMETHOPRIMS,
AB_UREIDOPENICILLINS,
AB_BETALACTAMS,
DEFINED_AB_GROUPS,
internal = TRUE,
overwrite = TRUE,
version = 2,
compress = "xz"
))
# Export data sets to the repository in different formats -----------------
@ -273,12 +347,15 @@ write_md5 <- function(object) {
close(conn)
}
changed_md5 <- function(object) {
tryCatch({
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
compared <- md5(object) != readLines(con = conn)
close(conn)
compared
}, error = function(e) TRUE)
tryCatch(
{
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
compared <- md5(object) != readLines(con = conn)
close(conn)
compared
},
error = function(e) TRUE
)
}
# give official names to ABs and MOs
@ -306,7 +383,7 @@ if (changed_md5(microorganisms)) {
max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
mo <- microorganisms
mo$snomed <- max_50_snomed
mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
mo <- dplyr::mutate_if(mo, ~ !is.numeric(.), as.character)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
@ -328,7 +405,7 @@ if (changed_md5(microorganisms.old)) {
try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
}
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
ab <- dplyr::mutate_if(antibiotics, ~ !is.numeric(.), as.character)
if (changed_md5(ab)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('data-raw/')}"))
write_md5(ab)
@ -342,7 +419,7 @@ if (changed_md5(ab)) {
try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
}
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
av <- dplyr::mutate_if(antivirals, ~ !is.numeric(.), as.character)
if (changed_md5(av)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('data-raw/')}"))
write_md5(av)
@ -357,9 +434,11 @@ if (changed_md5(av)) {
}
# give official names to ABs and MOs
intrinsicR <- data.frame(microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
stringsAsFactors = FALSE)
intrinsicR <- data.frame(
microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
stringsAsFactors = FALSE
)
if (changed_md5(intrinsicR)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('intrinsic_resistant')} to {usethis::ui_value('data-raw/')}"))
write_md5(intrinsicR)
@ -394,4 +473,25 @@ rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
rm(current_globalenv)
devtools::load_all(quiet = TRUE)
devtools::document()
# Document pkg ------------------------------------------------------------
usethis::ui_info("Documenting package")
suppressMessages(devtools::document(quiet = TRUE))
# Style pkg ---------------------------------------------------------------
usethis::ui_info("Styling package")
invisible(capture.output(styler::style_pkg(
style = styler::tidyverse_style,
filetype = c("R", "Rmd")
)))
invisible(capture.output(styler::style_dir(
path = "inst", # unit tests
style = styler::tidyverse_style,
filetype = c("R", "Rmd")
)))
# Finished ----------------------------------------------------------------
usethis::ui_info("All done")

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -30,8 +30,9 @@
# 2. Download the CSV from https://loinc.org/download/loinc-table-file-csv/ (Loinc_2.67_Text_2.67.zip)
# 3. Read Loinc.csv that's in this zip file
loinc_df <- read.csv("data-raw/Loinc.csv",
row.names = NULL,
stringsAsFactors = FALSE)
row.names = NULL,
stringsAsFactors = FALSE
)
# 4. Clean and add
library(dplyr)
@ -39,7 +40,10 @@ library(cleaner)
library(AMR)
loinc_df %>% freq(CLASS) # to find the drugs
loinc_df <- loinc_df %>% filter(CLASS == "DRUG/TOX")
ab_names <- antibiotics %>% pull(name) %>% paste0(collapse = "|") %>% paste0("(", ., ")")
ab_names <- antibiotics %>%
pull(name) %>%
paste0(collapse = "|") %>%
paste0("(", ., ")")
antibiotics$loinc <- as.list(rep(NA_character_, nrow(antibiotics)))
for (i in seq_len(nrow(antibiotics))) {

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -34,10 +34,10 @@
#
# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes.
#
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
# copy of the software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
# is furnished to do so', given that a copyright notice is given in the software.
#
# Copyright notice on {date}, the day this code was downloaded, as found on

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -31,11 +31,10 @@ library(AMR)
# USE THIS FUNCTION TO READ THE EUCAST EXCEL FILE THAT CONTAINS THE BREAKPOINT TABLES
read_EUCAST <- function(sheet, file, guideline_name) {
read_EUCAST <- function(sheet, file, guideline_name) {
message("\nGetting sheet: ", sheet)
sheet.bak <- sheet
uncertainties <- NULL
add_uncertainties <- function(old, new) {
if (is.null(old)) {
@ -44,55 +43,64 @@ read_EUCAST <- function(sheet, file, guideline_name) {
bind_rows(old, new)
}
}
raw_data <- read.xlsx(xlsxFile = file,
sheet = sheet,
colNames = FALSE,
skipEmptyRows = FALSE,
skipEmptyCols = FALSE,
fillMergedCells = TRUE,
na.strings = c("", "-", "NA", "IE", "IP"))
probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~sum(!is.na(.))) %>% unlist() %>% max())
raw_data <- read.xlsx(
xlsxFile = file,
sheet = sheet,
colNames = FALSE,
skipEmptyRows = FALSE,
skipEmptyCols = FALSE,
fillMergedCells = TRUE,
na.strings = c("", "-", "NA", "IE", "IP")
)
probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~ sum(!is.na(.))) %>% unlist() %>% max())
if (probable_rows == 0) {
message("NO ROWS FOUND")
message("------------------------")
return(NULL)
}
# in the info header in the Excel file, EUCAST mentions which genera are targeted
if (sheet %like% "anaerob.*Gram.*posi") {
sheet <- paste0(c("Actinomyces", "Bifidobacterium", "Clostridioides",
"Clostridium", "Cutibacterium", "Eggerthella",
"Eubacterium", "Lactobacillus", "Propionibacterium",
"Staphylococcus saccharolyticus"),
collapse = "_")
sheet <- paste0(c(
"Actinomyces", "Bifidobacterium", "Clostridioides",
"Clostridium", "Cutibacterium", "Eggerthella",
"Eubacterium", "Lactobacillus", "Propionibacterium",
"Staphylococcus saccharolyticus"
),
collapse = "_"
)
} else if (sheet %like% "anaerob.*Gram.*nega") {
sheet <- paste0(c("Bacteroides",
"Bilophila",
"Fusobacterium",
"Mobiluncus",
"Parabacteroides",
"Porphyromonas",
"Prevotella"),
collapse = "_")
sheet <- paste0(c(
"Bacteroides",
"Bilophila",
"Fusobacterium",
"Mobiluncus",
"Parabacteroides",
"Porphyromonas",
"Prevotella"
),
collapse = "_"
)
} else if (sheet == "Streptococcus A,B,C,G") {
sheet <- paste0(microorganisms %>%
filter(genus == "Streptococcus") %>%
mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
filter(lancefield %like% "^Streptococcus group") %>%
pull(fullname),
collapse = "_")
filter(genus == "Streptococcus") %>%
mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
filter(lancefield %like% "^Streptococcus group") %>%
pull(fullname),
collapse = "_"
)
} else if (sheet %like% "PK.*PD") {
sheet <- "UNKNOWN"
}
mo_sheet <- paste0(suppressMessages(as.mo(unlist(strsplit(sheet, "_")))), collapse = "|")
if (!is.null(mo_uncertainties())) uncertainties <- add_uncertainties(uncertainties, mo_uncertainties())
set_columns_names <- function(x, cols) {
colnames(x) <- cols[1:length(colnames(x))]
x
}
get_mo <- function(x) {
for (i in seq_len(length(x))) {
y <- trimws(unlist(strsplit(x[i], "(,|and)")))
@ -104,76 +112,91 @@ read_EUCAST <- function(sheet, file, guideline_name) {
}
x
}
MICs_with_trailing_superscript <- c(seq(from = 0.0011, to = 0.0019, by = 0.0001),
seq(from = 0.031, to = 0.039, by = 0.001),
seq(from = 0.061, to = 0.069, by = 0.001),
seq(from = 0.1251, to = 0.1259, by = 0.0001),
seq(from = 0.251, to = 0.259, by = 0.001),
seq(from = 0.51, to = 0.59, by = 0.01),
seq(from = 11, to = 19, by = 1),
seq(from = 161, to = 169, by = 01),
seq(from = 21, to = 29, by = 1),
seq(from = 321, to = 329, by = 1),
seq(from = 41, to = 49, by = 1),
seq(from = 81, to = 89, by = 1))
MICs_with_trailing_superscript <- c(
seq(from = 0.0011, to = 0.0019, by = 0.0001),
seq(from = 0.031, to = 0.039, by = 0.001),
seq(from = 0.061, to = 0.069, by = 0.001),
seq(from = 0.1251, to = 0.1259, by = 0.0001),
seq(from = 0.251, to = 0.259, by = 0.001),
seq(from = 0.51, to = 0.59, by = 0.01),
seq(from = 11, to = 19, by = 1),
seq(from = 161, to = 169, by = 01),
seq(from = 21, to = 29, by = 1),
seq(from = 321, to = 329, by = 1),
seq(from = 41, to = 49, by = 1),
seq(from = 81, to = 89, by = 1)
)
has_superscript <- function(x) {
# because due to floating point error, 0.1252 is not in:
# because due to floating point error, 0.1252 is not in:
# seq(from = 0.1251, to = 0.1259, by = 0.0001)
sapply(x, function(x) any(near(x, MICs_with_trailing_superscript)))
}
has_zone_diameters <- rep(any(unlist(raw_data) %like% "zone diameter"), nrow(raw_data))
cleaned <- raw_data %>%
as_tibble() %>%
set_columns_names(LETTERS) %>%
transmute(drug = A,
MIC_S = B,
MIC_R = C,
disk_dose = ifelse(has_zone_diameters, E, NA_character_),
disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
disk_R = ifelse(has_zone_diameters, G, NA_character_)) %>%
filter(!is.na(drug),
!(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
MIC_S %unlike% "(MIC|S ≤|note)",
MIC_S %unlike% "^[-]",
drug != MIC_S,) %>%
mutate(administration = case_when(drug %like% "[( ]oral" ~ "oral",
drug %like% "[( ]iv" ~ "iv",
TRUE ~ NA_character_),
uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)) %>%
cleaned <- raw_data %>%
as_tibble() %>%
set_columns_names(LETTERS) %>%
transmute(
drug = A,
MIC_S = B,
MIC_R = C,
disk_dose = ifelse(has_zone_diameters, E, NA_character_),
disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
disk_R = ifelse(has_zone_diameters, G, NA_character_)
) %>%
filter(
!is.na(drug),
!(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
MIC_S %unlike% "(MIC|S ≤|note)",
MIC_S %unlike% "^[-]",
drug != MIC_S,
) %>%
mutate(
administration = case_when(
drug %like% "[( ]oral" ~ "oral",
drug %like% "[( ]iv" ~ "iv",
TRUE ~ NA_character_
),
uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)
) %>%
# clean disk doses
mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>%
mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>%
# clean MIC and disk values
mutate(MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
MIC_R = gsub(".,.", "", MIC_R),
disk_S = gsub(".,.", "", disk_S),
disk_R = gsub(".,.", "", disk_R),
MIC_S = clean_double(MIC_S), # make them valid numeric values
MIC_R = clean_double(MIC_R),
disk_S = clean_integer(disk_S),
disk_R = clean_integer(disk_R),
# invalid MIC values have a superscript text, delete those
MIC_S = ifelse(has_superscript(MIC_S),
substr(MIC_S, 1, nchar(MIC_S) - 1),
MIC_S),
MIC_R = ifelse(has_superscript(MIC_R),
substr(MIC_R, 1, nchar(MIC_R) - 1),
MIC_R),
# and some are just awful
MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
) %>%
mutate(
MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
MIC_R = gsub(".,.", "", MIC_R),
disk_S = gsub(".,.", "", disk_S),
disk_R = gsub(".,.", "", disk_R),
MIC_S = clean_double(MIC_S), # make them valid numeric values
MIC_R = clean_double(MIC_R),
disk_S = clean_integer(disk_S),
disk_R = clean_integer(disk_R),
# invalid MIC values have a superscript text, delete those
MIC_S = ifelse(has_superscript(MIC_S),
substr(MIC_S, 1, nchar(MIC_S) - 1),
MIC_S
),
MIC_R = ifelse(has_superscript(MIC_R),
substr(MIC_R, 1, nchar(MIC_R) - 1),
MIC_R
),
# and some are just awful
MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
) %>%
# clean drug names
mutate(drug = gsub(" ?[(, ].*$", "", drug),
drug = gsub("[1-9]+$", "", drug),
ab = as.ab(drug)) %>%
select(ab, mo, everything(), -drug) %>%
mutate(
drug = gsub(" ?[(, ].*$", "", drug),
drug = gsub("[1-9]+$", "", drug),
ab = as.ab(drug)
) %>%
select(ab, mo, everything(), -drug) %>%
as.data.frame(stringsAsFactors = FALSE)
# new row for every different MO mentioned
for (i in 1:nrow(cleaned)) {
mo <- cleaned[i, "mo", drop = TRUE]
@ -181,37 +204,44 @@ read_EUCAST <- function(sheet, file, guideline_name) {
mo_vect <- unlist(strsplit(mo, "|", fixed = TRUE))
cleaned[i, "mo"] <- mo_vect[1]
for (j in seq_len(length(mo_vect))) {
cleaned <- bind_rows(cleaned, cleaned[i , , drop = FALSE])
cleaned <- bind_rows(cleaned, cleaned[i, , drop = FALSE])
cleaned[nrow(cleaned), "mo"] <- mo_vect[j]
}
}
}
cleaned <- cleaned %>%
distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>%
arrange(ab, mo) %>%
cleaned <- cleaned %>%
distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>%
arrange(ab, mo) %>%
mutate_at(c("MIC_S", "MIC_R", "disk_S", "disk_R"), as.double) %>%
pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>%
mutate(method = ifelse(type %like% "MIC", "MIC", "DISK"),
type = gsub("^.*_", "breakpoint_", type)) %>%
pivot_wider(names_from = type, values_from = value) %>%
mutate(guideline = guideline_name,
disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
mo = ifelse(mo == "", mo_sheet, mo)) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>%
pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>%
mutate(
method = ifelse(type %like% "MIC", "MIC", "DISK"),
type = gsub("^.*_", "breakpoint_", type)
) %>%
pivot_wider(names_from = type, values_from = value) %>%
mutate(
guideline = guideline_name,
disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
mo = ifelse(mo == "", mo_sheet, mo)
) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>%
# comply with rsi_translation for now
transmute(guideline,
method,
site = case_when(uti ~ "UTI",
systemic ~ "Systemic",
TRUE ~ administration),
mo, ab,
ref_tbl = sheet.bak,
disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
breakpoint_S,
breakpoint_R) %>%
method,
site = case_when(
uti ~ "UTI",
systemic ~ "Systemic",
TRUE ~ administration
),
mo, ab,
ref_tbl = sheet.bak,
disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
breakpoint_S,
breakpoint_R
) %>%
as.data.frame(stringsAsFactors = FALSE)
if (!is.null(uncertainties)) {
print(uncertainties %>% distinct(input, mo, .keep_all = TRUE))
}
@ -231,24 +261,33 @@ guideline_name <- "EUCAST 2021"
sheets_to_analyse <- sheets[!sheets %in% c("Content", "Changes", "Notes", "Guidance", "Dosages", "Technical uncertainty", "Topical agents")]
# takes the longest time:
new_EUCAST <- read_EUCAST(sheet = sheets_to_analyse[1],
file = file,
guideline_name = guideline_name)
new_EUCAST <- read_EUCAST(
sheet = sheets_to_analyse[1],
file = file,
guideline_name = guideline_name
)
for (i in 2:length(sheets_to_analyse)) {
tryCatch(
new_EUCAST <<- bind_rows(new_EUCAST,
read_EUCAST(sheet = sheets_to_analyse[i],
file = file,
guideline_name = guideline_name))
, error = function(e) message(e$message))
new_EUCAST <<- bind_rows(
new_EUCAST,
read_EUCAST(
sheet = sheets_to_analyse[i],
file = file,
guideline_name = guideline_name
)
),
error = function(e) message(e$message)
)
}
# 2021-07-12 fix for Morganellaceae (check other lines too next time)
morg <- rsi_translation %>%
as_tibble() %>%
filter(ab == "IPM",
guideline == "EUCAST 2021",
mo == as.mo("Enterobacterales")) %>%
filter(
ab == "IPM",
guideline == "EUCAST 2021",
mo == as.mo("Enterobacterales")
) %>%
mutate(mo = as.mo("Morganellaceae"))
morg[which(morg$method == "MIC"), "breakpoint_S"] <- 0.001
morg[which(morg$method == "MIC"), "breakpoint_R"] <- 4
@ -258,5 +297,5 @@ morg[which(morg$method == "DISK"), "breakpoint_R"] <- 19
rsi_translation <- rsi_translation %>%
bind_rows(morg) %>%
bind_rows(morg %>%
mutate(guideline = "EUCAST 2020")) %>%
mutate(guideline = "EUCAST 2020")) %>%
arrange(desc(guideline), ab, mo, method)

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -33,34 +33,43 @@ library(dplyr)
library(readxl)
DRGLST <- read_excel("DRGLST.xlsx")
abx <- DRGLST %>%
select(ab = WHON5_CODE,
name = ANTIBIOTIC) %>%
select(
ab = WHON5_CODE,
name = ANTIBIOTIC
) %>%
# remove the ones without WHONET code
filter(!is.na(ab)) %>%
distinct(name, .keep_all = TRUE) %>%
# add the ones without WHONET code
bind_rows(
DRGLST %>%
select(ab = WHON5_CODE,
name = ANTIBIOTIC) %>%
select(
ab = WHON5_CODE,
name = ANTIBIOTIC
) %>%
filter(is.na(ab)) %>%
distinct(name, .keep_all = TRUE)
# add new ab code later
# add new ab code later
) %>%
arrange(name)
# add old ATC codes
ab_old <- AMR::antibiotics %>%
mutate(official = gsub("( and |, )", "/", official),
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
ifelse(is.na(certe), "", certe),
ifelse(is.na(umcg), "", umcg),
sep = "|")))
mutate(
official = gsub("( and |, )", "/", official),
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
ifelse(is.na(certe), "", certe),
ifelse(is.na(umcg), "", umcg),
sep = "|"
))
)
for (i in 1:nrow(ab_old)) {
abbr <- ab_old[i, "abbr"]
abbr <- strsplit(abbr, "|", fixed = TRUE) %>% unlist() %>% unique()
abbr <- strsplit(abbr, "|", fixed = TRUE) %>%
unlist() %>%
unique()
abbr <- abbr[abbr != ""]
#print(abbr)
# print(abbr)
if (length(abbr) == 0) {
ab_old[i, "abbr"] <- NA_character_
} else {
@ -72,50 +81,54 @@ for (i in 1:nrow(ab_old)) {
abx_atc1 <- abx %>%
mutate(name_lower = tolower(name)) %>%
left_join(ab_old %>%
select(ears_net, atc), by = c(ab = "ears_net")) %>%
select(ears_net, atc), by = c(ab = "ears_net")) %>%
rename(atc1 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc2 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc3 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc4 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc5 = atc) %>%
left_join(ab_old %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
mutate(official = gsub("ine$", "in", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
mutate(official = gsub("f", "ph", official)) %>%
mutate(official = gsub("t", "th", official)) %>%
mutate(official = gsub("ine$", "in", official)) %>%
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
rename(atc6 = atc) %>%
mutate(atc = case_when(!is.na(atc1) ~ atc1,
!is.na(atc2) ~ atc2,
!is.na(atc3) ~ atc3,
!is.na(atc4) ~ atc4,
!is.na(atc4) ~ atc5,
TRUE ~ atc6)) %>%
mutate(atc = case_when(
!is.na(atc1) ~ atc1,
!is.na(atc2) ~ atc2,
!is.na(atc3) ~ atc3,
!is.na(atc4) ~ atc4,
!is.na(atc4) ~ atc5,
TRUE ~ atc6
)) %>%
distinct(ab, name, .keep_all = TRUE) %>%
select(ab, atc, name)
abx_atc2 <- ab_old %>%
filter(!atc %in% abx_atc1$atc,
is.na(ears_net),
!is.na(atc_group1),
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
official %unlike% "(combinations| with )") %>%
filter(
!atc %in% abx_atc1$atc,
is.na(ears_net),
!is.na(atc_group1),
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
official %unlike% "(combinations| with )"
) %>%
mutate(ab = NA_character_) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
select(ab, atc, name = official)
@ -125,12 +138,15 @@ abx2 <- bind_rows(abx_atc1, abx_atc2)
rm(abx_atc1)
rm(abx_atc2)
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub("[/0-9-]",
" ",
abx2$name[is.na(abx2$ab)]),
minlength = 3,
method = "left.kept",
strict = TRUE))
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub(
"[/0-9-]",
" ",
abx2$name[is.na(abx2$ab)]
),
minlength = 3,
method = "left.kept",
strict = TRUE
))
n_distinct(abx2$ab)
@ -150,7 +166,9 @@ for (i in 2:nrow(abx2)) {
abx2[i, "ab"] <- paste0(abx2[i, "ab", drop = TRUE], abx2[i, "seqnr", drop = TRUE])
}
}
abx2 <- abx2 %>% select(-seqnr) %>% arrange(name)
abx2 <- abx2 %>%
select(-seqnr) %>%
arrange(name)
# everything unique??
nrow(abx2) == n_distinct(abx2$ab)
@ -158,8 +176,10 @@ nrow(abx2) == n_distinct(abx2$ab)
# get ATC properties
abx2 <- abx2 %>%
left_join(ab_old %>%
select(atc, abbr, atc_group1, atc_group2,
oral_ddd, oral_units, iv_ddd, iv_units))
select(
atc, abbr, atc_group1, atc_group2,
oral_ddd, oral_units, iv_ddd, iv_units
))
abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixed = TRUE)))
@ -171,29 +191,41 @@ get_CID <- function(ab) {
p$tick()$print()
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=complete"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=complete"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
if (is.na(CID[i])) {
# try with removing the text in brackets
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
"/cids/TXT?name_type=complete"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
"/cids/TXT?name_type=complete"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
}
if (is.na(CID[i])) {
# try match on word and take the lowest CID value (sorted)
ab[i] <- gsub("[^a-z0-9]+", " ", ab[i], ignore.case = TRUE)
CID[i] <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=word"),
showProgress = FALSE)[[1]][1],
error = function(e) NA_integer_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
URLencode(ab[i], reserved = TRUE),
"/cids/TXT?name_type=word"
),
showProgress = FALSE
)[[1]][1],
error = function(e) NA_integer_
)
}
Sys.sleep(0.1)
}
@ -203,15 +235,15 @@ get_CID <- function(ab) {
# get CIDs (2-3 min)
CIDs <- get_CID(abx2$name)
# These could not be found:
abx2[is.na(CIDs),] %>% View()
abx2[is.na(CIDs), ] %>% View()
# returns list with synonyms (brand names), with CIDs as names
get_synonyms <- function(CID, clean = TRUE) {
synonyms <- rep(NA_character_, length(CID))
#p <- progress_ticker(n = length(CID), min_time = 0)
# p <- progress_ticker(n = length(CID), min_time = 0)
for (i in 1:length(CID)) {
#p$tick()$print()
# p$tick()$print()
synonyms_txt <- ""
@ -220,27 +252,37 @@ get_synonyms <- function(CID, clean = TRUE) {
}
synonyms_txt <- tryCatch(
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
CID[i],
"/synonyms/TXT"),
sep = "\n",
showProgress = FALSE)[[1]],
error = function(e) NA_character_)
data.table::fread(paste0(
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
CID[i],
"/synonyms/TXT"
),
sep = "\n",
showProgress = FALSE
)[[1]],
error = function(e) NA_character_
)
Sys.sleep(0.1)
if (clean == TRUE) {
# remove text between brackets
synonyms_txt <- trimws(gsub("[(].*[)]", "",
gsub("[[].*[]]", "",
gsub("[(].*[]]", "",
gsub("[[].*[)]", "", synonyms_txt)))))
synonyms_txt <- trimws(gsub(
"[(].*[)]", "",
gsub(
"[[].*[]]", "",
gsub(
"[(].*[]]", "",
gsub("[[].*[)]", "", synonyms_txt)
)
)
))
synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
# only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)
& !grepl("[-&{},_0-9/]", synonyms_txt)
& grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) &
!grepl("[-&{},_0-9/]", synonyms_txt) &
grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
}
synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]))
synonyms[i] <- list(sort(synonyms_txt))
@ -251,52 +293,56 @@ get_synonyms <- function(CID, clean = TRUE) {
# get brand names from PubChem (2-3 min)
synonyms <- get_synonyms(CIDs)
synonyms <- lapply(synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}})
synonyms <- lapply(
synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}
}
)
# add them to data set
antibiotics <- abx2 %>%
left_join(DRGLST %>%
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
distinct(ab, .keep_all = TRUE), by = "ab") %>%
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
distinct(ab, .keep_all = TRUE), by = "ab") %>%
transmute(ab,
atc,
cid = CIDs,
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
name = name %>%
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
gsub("edta", "EDTA", ., ignore.case = TRUE),
group = case_when(
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
TRUE ~ "Other antibacterials"
),
atc_group1, atc_group2,
abbreviations = unname(abbr),
synonyms = unname(synonyms),
oral_ddd, oral_units,
iv_ddd, iv_units) %>%
atc,
cid = CIDs,
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
name = name %>%
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
gsub("edta", "EDTA", ., ignore.case = TRUE),
group = case_when(
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
TRUE ~ "Other antibacterials"
),
atc_group1, atc_group2,
abbreviations = unname(abbr),
synonyms = unname(synonyms),
oral_ddd, oral_units,
iv_ddd, iv_units
) %>%
as.data.frame(stringsAsFactors = FALSE)
# some exceptions
@ -329,13 +375,15 @@ antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <-
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
# Add cefoxitin screening
class(antibiotics$ab) <- "character"
antibiotics <- rbind(antibiotics,data.frame(ab = "FOX1", atc = NA, cid = NA,
name = "Cefoxitin screening",
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
abbreviations = "cfsc", synonyms = NA,
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
loinc = NA,
stringsAsFactors = FALSE))
antibiotics <- rbind(antibiotics, data.frame(
ab = "FOX1", atc = NA, cid = NA,
name = "Cefoxitin screening",
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
abbreviations = "cfsc", synonyms = NA,
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
loinc = NA,
stringsAsFactors = FALSE
))
# More GLIMS codes
antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]], "amf"))
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
@ -520,27 +568,33 @@ antibiotics[which(antibiotics$ab == "RFP"), "abbreviations"][[1]] <- list(sort(c
antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]], "RET")))
antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]], "TVN")))
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(ab = "AMH",
name = "Amphotericin B-high",
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")))) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(ab = "TOH",
name = "Tobramycin-high",
abbreviations = list(c("tohl", "tobra high", "tobramycin high")))) %>%
rbind(antibiotics %>%
filter(ab == "BUT") %>%
mutate(ab = "CIX",
atc = "D01AE14",
name = "Ciclopirox",
group = "Antifungals/antimycotics",
atc_group1 = "Antifungals for topical use",
atc_group2 = "Other antifungals for topical use",
abbreviations = list(c("cipx"))))
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(
ab = "AMH",
name = "Amphotericin B-high",
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high"))
)) %>%
rbind(antibiotics %>%
filter(ab == "GEH") %>%
mutate(
ab = "TOH",
name = "Tobramycin-high",
abbreviations = list(c("tohl", "tobra high", "tobramycin high"))
)) %>%
rbind(antibiotics %>%
filter(ab == "BUT") %>%
mutate(
ab = "CIX",
atc = "D01AE14",
name = "Ciclopirox",
group = "Antifungals/antimycotics",
atc_group1 = "Antifungals for topical use",
atc_group2 = "Other antifungals for topical use",
abbreviations = list(c("cipx"))
))
antibiotics[which(antibiotics$ab == "SSS"), "name"] <- "Sulfonamide"
# ESBL E-test codes:
antibiotics[which(antibiotics$ab == "CCV"), "abbreviations"][[1]] <- list(c("xtzl"))
@ -600,13 +654,13 @@ antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin"
antibiotics[which(antibiotics$ab == "PEN"), "atc"] <- "J01CE01"
# WHONET cleanup
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]], "Bicozamycin"))))
antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]], "Cefcatacol"))))
antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]], "Tuberactinomycin"))))
@ -617,7 +671,7 @@ antibiotics[which(antibiotics$ab == "PMR"), "abbreviations"][[1]] <- list(sort(u
# set cephalosporins groups for the ones that could not be determined automatically:
antibiotics <- antibiotics %>%
antibiotics <- antibiotics %>%
mutate(group = case_when(
name == "Cefcapene" ~ "Cephalosporins (3rd gen.)",
name == "Cefcapene pivoxil" ~ "Cephalosporins (3rd gen.)",
@ -650,21 +704,24 @@ antibiotics <- antibiotics %>%
name == "Ceftolozane/enzyme inhibitor" ~ "Cephalosporins (5th gen.)",
name == "Ceftolozane/tazobactam" ~ "Cephalosporins (5th gen.)",
name == "Cefuroxime axetil" ~ "Cephalosporins (2nd gen.)",
TRUE ~ group))
TRUE ~ group
))
antibiotics[which(antibiotics$ab %in% c("CYC", "LNZ", "THA", "TZD")), "group"] <- "Oxazolidinones"
# add pretomanid
antibiotics <- antibiotics %>%
mutate(ab = as.character(ab)) %>%
mutate(ab = as.character(ab)) %>%
bind_rows(antibiotics %>%
mutate(ab = as.character(ab)) %>%
filter(ab == "SMF") %>%
mutate(ab = "PMD",
atc = "J04AK08",
cid = 456199,
name = "Pretomanid",
abbreviations = list(""),
oral_ddd = NA_real_))
mutate(ab = as.character(ab)) %>%
filter(ab == "SMF") %>%
mutate(
ab = "PMD",
atc = "J04AK08",
cid = 456199,
name = "Pretomanid",
abbreviations = list(""),
oral_ddd = NA_real_
))
@ -675,25 +732,24 @@ antibiotics <- antibiotics %>%
updated_atc <- as.list(antibiotics$atc)
get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
ab_name <- gsub("/", " and ", tolower(ab_name), fixed = TRUE)
# we will do a search on their website, which means:
# go to the url
atc_tbl <- read_html(url) %>%
atc_tbl <- read_html(url) %>%
# get all forms
html_form() %>%
# get the second form (the first form is a global website form)
.[[2]] %>%
.[[2]] %>%
# set the name input box to our search parameter
html_form_set(name = ab_name) %>%
html_form_set(name = ab_name) %>%
# hit Submit
html_form_submit() %>%
html_form_submit() %>%
# read the resulting page
read_html() %>%
read_html() %>%
# retrieve the table on it
html_node("table") %>%
html_node("table") %>%
# transform it to an R data set
html_table(header = FALSE)
# and get the ATCs (first column) of only exact hits
@ -702,9 +758,10 @@ get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
# this takes around 4 minutes (some are skipped and go faster)
for (i in seq_len(nrow(antibiotics))) {
message(percentage(i / nrow(antibiotics), digits = 1),
" - Downloading ", antibiotics$name[i],
appendLF = FALSE)
message(percentage(i / nrow(antibiotics), digits = 1),
" - Downloading ", antibiotics$name[i],
appendLF = FALSE
)
atcs <- get_atcs(antibiotics$name[i])
if (length(atcs) > 0) {
updated_atc[[i]] <- atcs

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -28,8 +28,10 @@
get_atc_table <- function(atc_group) {
# give as input J0XXX, like atc_group = "J05AB"
downloaded <- read_html(paste0("https://www.whocc.no/atc_ddd_index/?code=", atc_group, "&showdescription=no"))
table_title <- downloaded %>% html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>% html_text()
table_content <- downloaded %>%
table_title <- downloaded %>%
html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>%
html_text()
table_content <- downloaded %>%
html_nodes("table") %>%
html_table(header = TRUE) %>%
# returns list, so make data.frame out of it
@ -37,8 +39,8 @@ get_atc_table <- function(atc_group) {
# select right columns
select(atc = ATC.code, name = Name, ddd = DDD, unit = U, ddd_type = Adm.R) %>%
# fill empty rows
mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>%
pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>%
mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>%
pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>%
mutate(atc_group = table_title)
if (!"ddd_O" %in% colnames(table_content)) {
table_content <- table_content %>% mutate(ddd_O = NA_real_, unit_O = NA_character_)
@ -46,9 +48,10 @@ get_atc_table <- function(atc_group) {
if (!"ddd_P" %in% colnames(table_content)) {
table_content <- table_content %>% mutate(ddd_P = NA_real_, unit_P = NA_character_)
}
table_content %>% select(atc, name, atc_group,
oral_ddd = ddd_O, oral_units = unit_O,
iv_ddd = ddd_P, iv_units = unit_P)
table_content %>% select(atc, name, atc_group,
oral_ddd = ddd_O, oral_units = unit_O,
iv_ddd = ddd_P, iv_units = unit_P
)
}
# these are the relevant groups for input: https://www.whocc.no/atc_ddd_index/?code=J05A (J05 only contains J05A)
@ -62,32 +65,38 @@ for (i in 2:length(atc_groups)) {
}
# arrange on name, untibble it
antivirals <- antivirals %>% arrange(name) %>% as.data.frame(stringsAsFactors = FALSE)
antivirals <- antivirals %>%
arrange(name) %>%
as.data.frame(stringsAsFactors = FALSE)
# add PubChem Compound ID (cid) and their trade names - functions are in file to create `antibiotics` data set
CIDs <- get_CID(antivirals$name)
# these could not be found:
antivirals[is.na(CIDs),] %>% View()
# get brand names from PubChem
antivirals[is.na(CIDs), ] %>% View()
# get brand names from PubChem
synonyms <- get_synonyms(CIDs)
synonyms <- lapply(synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}})
synonyms <- lapply(
synonyms,
function(x) {
if (length(x) == 0 | all(is.na(x))) {
""
} else {
x
}
}
)
antivirals <- antivirals %>%
transmute(atc,
cid = CIDs,
name,
atc_group,
synonyms = unname(synonyms),
oral_ddd,
oral_units,
iv_ddd,
iv_units)
cid = CIDs,
name,
atc_group,
synonyms = unname(synonyms),
oral_ddd,
oral_units,
iv_ddd,
iv_units
)
# save it
usethis::use_data(antivirals, overwrite = TRUE)

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -32,37 +32,45 @@ library(cleaner)
# download the PDF file, open in Acrobat Pro and export as Excel workbook
breakpoints_version <- 11
dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>%
format_names(snake_case = TRUE, penicillins = "drug") %>%
filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>%
filter(!is.na(standard_dosage)) %>%
dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>%
format_names(snake_case = TRUE, penicillins = "drug") %>%
filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>%
filter(!is.na(standard_dosage)) %>%
# keep only one drug in the table
arrange(desc(drug)) %>%
mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
#distinct(drug, .keep_all = TRUE) %>%
arrange(drug) %>%
mutate(ab = as.ab(drug),
ab_name = ab_name(ab, language = NULL))
arrange(desc(drug)) %>%
mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
# distinct(drug, .keep_all = TRUE) %>%
arrange(drug) %>%
mutate(
ab = as.ab(drug),
ab_name = ab_name(ab, language = NULL)
)
dosage_source <- bind_rows(
# oral
dosage_source %>%
dosage_source %>%
filter(standard_dosage %like% " oral") %>%
mutate(standard_dosage = gsub("oral.*", "oral", standard_dosage),
high_dosage = if_else(high_dosage %like% "oral",
gsub("oral.*", "oral", high_dosage),
NA_character_)),
mutate(
standard_dosage = gsub("oral.*", "oral", standard_dosage),
high_dosage = if_else(high_dosage %like% "oral",
gsub("oral.*", "oral", high_dosage),
NA_character_
)
),
# iv
dosage_source %>%
dosage_source %>%
filter(standard_dosage %like% " iv") %>%
mutate(standard_dosage = gsub(".* or ", "", standard_dosage),
high_dosage = if_else(high_dosage %like% "( or | iv)",
gsub(".* or ", "", high_dosage),
NA_character_)),
mutate(
standard_dosage = gsub(".* or ", "", standard_dosage),
high_dosage = if_else(high_dosage %like% "( or | iv)",
gsub(".* or ", "", high_dosage),
NA_character_
)
),
# im
dosage_source %>%
dosage_source %>%
filter(standard_dosage %like% " im")
) %>%
) %>%
arrange(drug)
@ -71,34 +79,36 @@ get_dosage_lst <- function(col_data) {
# remove new lines
gsub(" ?(\n|\t)+ ?", " ", .) %>%
# keep only the first suggestion, replace all after 'or' and more informative texts
gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
# remove (1 MU)
gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
# remove parentheses
gsub("[)(]", "", .) %>%
gsub("[)(]", "", .) %>%
# remove drug names
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
gsub(" (acid|dose)", "", .)# %>%
# keep lowest value only (25-30 mg -> 25 mg)
# gsub("[-].*? ", " ", .)
dosage_lst <- lapply(strsplit(standard, " x "),
function(x) {
dose <- x[1]
if (dose %like% "under") {
dose <- NA_character_
}
admin <- x[2]
list(
dose = trimws(dose),
dose_times = gsub("^([0-9.]+).*", "\\1", admin),
administration = clean_character(admin),
notes = "",
original_txt = ""
)
})
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
gsub(" (acid|dose)", "", .) # %>%
# keep lowest value only (25-30 mg -> 25 mg)
# gsub("[-].*? ", " ", .)
dosage_lst <- lapply(
strsplit(standard, " x "),
function(x) {
dose <- x[1]
if (dose %like% "under") {
dose <- NA_character_
}
admin <- x[2]
list(
dose = trimws(dose),
dose_times = gsub("^([0-9.]+).*", "\\1", admin),
administration = clean_character(admin),
notes = "",
original_txt = ""
)
}
)
for (i in seq_len(length(col_data))) {
dosage_lst[[i]]$original_txt <- gsub("\n", " ", col_data[i])
if (col_data[i] %like% " (or|with|loading|depending|over) ") {
@ -147,12 +157,15 @@ dosage <- bind_rows(
notes = sapply(uti, function(x) x$notes),
original_txt = sapply(uti, function(x) x$original_txt),
stringsAsFactors = FALSE
)) %>%
mutate(eucast_version = breakpoints_version,
dose_times = as.integer(dose_times),
administration = gsub("([a-z]+) .*", "\\1", administration)) %>%
arrange(name, administration, type) %>%
filter(!is.na(dose), dose != ".") %>%
)
) %>%
mutate(
eucast_version = breakpoints_version,
dose_times = as.integer(dose_times),
administration = gsub("([a-z]+) .*", "\\1", administration)
) %>%
arrange(name, administration, type) %>%
filter(!is.na(dose), dose != ".") %>%
as.data.frame(stringsAsFactors = FALSE)
rownames(dosage) <- NULL

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -25,74 +25,118 @@
patients <- unlist(lapply(LETTERS, paste0, 1:10))
patients_table <- data.frame(patient_id = patients,
gender = c(rep("M", 135),
rep("F", 125)))
patients_table <- data.frame(
patient_id = patients,
gender = c(
rep("M", 135),
rep("F", 125)
)
)
dates <- seq(as.Date("2011-01-01"), as.Date("2020-01-01"), by = "day")
bacteria_a <- c("E. coli", "S. aureus",
"S. pneumoniae", "K. pneumoniae")
bacteria_a <- c(
"E. coli", "S. aureus",
"S. pneumoniae", "K. pneumoniae"
)
bacteria_b <- c("esccol", "staaur", "strpne", "klepne")
bacteria_c <- c("Escherichia coli", "Staphylococcus aureus",
"Streptococcus pneumoniae", "Klebsiella pneumoniae")
bacteria_c <- c(
"Escherichia coli", "Staphylococcus aureus",
"Streptococcus pneumoniae", "Klebsiella pneumoniae"
)
ab_interpretations <- c("S", "I", "R")
ab_interpretations_messy = c("R", "< 0.5 S", "I")
ab_interpretations_messy <- c("R", "< 0.5 S", "I")
sample_size <- 1000
data_a <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
hospital = "A",
bacteria = sample(bacteria_a, size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)),
AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)),
AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)),
CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)),
GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)))
data_a <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "A",
bacteria = sample(bacteria_a,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
data_b <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
hospital = "B",
bacteria = sample(bacteria_b, size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)),
AMX = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)),
AMC = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)),
CIP = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)),
GEN = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)))
data_b <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "B",
bacteria = sample(bacteria_b,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
data_c <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
hospital = "C",
bacteria = sample(bacteria_c, size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)),
AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)),
AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)),
CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)),
GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)))
data_c <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "C",
bacteria = sample(bacteria_c,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
example_isolates_unclean <- data_a %>%
example_isolates_unclean <- data_a %>%
bind_rows(data_b, data_c)
example_isolates_unclean$patient_id <- sample(patients, size = nrow(example_isolates_unclean), replace = TRUE)
example_isolates_unclean <- example_isolates_unclean %>%
select(patient_id, hospital, date, bacteria, everything()) %>%
example_isolates_unclean <- example_isolates_unclean %>%
select(patient_id, hospital, date, bacteria, everything()) %>%
dataset_UTF8_to_ASCII()
usethis::use_data(example_isolates_unclean, overwrite = TRUE, internal = FALSE, version = 2, compress = "xz")

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -32,19 +32,22 @@ for (i in seq_len(nrow(antibiotics))) {
}
int_resis <- eucast_rules(int_resis,
eucast_rules_df = subset(AMR:::EUCAST_RULES_DF,
is.na(have_these_values) & reference.version == 3.3),
info = FALSE)
eucast_rules_df = subset(
AMR:::EUCAST_RULES_DF,
is.na(have_these_values) & reference.version == 3.3
),
info = FALSE
)
int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>%
int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>%
tidyr::pivot_longer(-mo) %>%
filter(value == "R") %>%
filter(value == "R") %>%
select(mo, ab = name)
# remove lab drugs
untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE]
int_resis2 <- int_resis2 %>%
filter(!ab %in% untreatable) %>%
int_resis2 <- int_resis2 %>%
filter(!ab %in% untreatable) %>%
arrange(mo, ab)
intrinsic_resistant <- as.data.frame(int_resis2, stringsAsFactors = FALSE)

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -39,7 +39,7 @@ test_mo <- microorganisms$mo
get_author_year <- function(ref) {
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
authors2 <- gsub(" ?\\(Approved Lists [0-9]+\\) ?", " () ", authors2)
authors2 <- gsub(" [)(]+ $", "", authors2)
@ -47,14 +47,16 @@ get_author_year <- function(ref) {
authors2 <- trimws(gsub("^[(](.*)[)]$", "\\1", authors2))
# only take part after brackets if there's a name
authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
gsub(".*[)] (.*)", "\\1", authors2),
authors2)
gsub(".*[)] (.*)", "\\1", authors2),
authors2
)
# get year from last 4 digits
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
# can never be later than now
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear)
lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
NA,
lastyear
)
# get authors without last year
authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
# remove nonsense characters from names
@ -72,18 +74,19 @@ get_author_year <- function(ref) {
authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
# combine author and year if year is available
ref <- ifelse(!is.na(lastyear),
paste0(authors, ", ", lastyear),
authors)
paste0(authors, ", ", lastyear),
authors
)
# fix beginning and ending
ref <- gsub(", $", "", ref)
ref <- gsub("^, ", "", ref)
ref <- gsub("^(emend|et al.,?)", "", ref)
ref <- trimws(ref)
ref <- gsub("'", "", ref)
# a lot start with a lowercase character - fix that
ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
# specific one for the French that are named dOrbigny
# specific one for the French that are named dOrbigny
ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
ref <- gsub(" +", " ", ref)
ref
@ -92,21 +95,23 @@ get_author_year <- function(ref) {
df_remove_nonASCII <- function(df) {
# Remove non-ASCII characters (these are not allowed by CRAN)
df %>%
mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
# also remove invalid characters
mutate_if(is.character, ~gsub("[\"'`]+", "", .)) %>%
mutate_if(is.character, ~ gsub("[\"'`]+", "", .)) %>%
AMR:::dataset_UTF8_to_ASCII()
}
abbreviate_mo <- function(x, minlength = 5, prefix = "", ...) {
# keep a starting Latin ae
suppressWarnings(
gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
abbreviate(minlength = minlength,
use.classes = TRUE,
method = "both.sides", ...) %>%
paste0(prefix, .) %>%
toupper() %>%
gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
abbreviate(
minlength = minlength,
use.classes = TRUE,
method = "both.sides", ...
) %>%
paste0(prefix, .) %>%
toupper() %>%
gsub("(\u00C6|\u00E6)+", "AE", .)
)
}
@ -119,63 +124,74 @@ taxonomy <- read_csv(file_location)
new_synonyms <- taxonomy %>%
left_join(taxonomy,
by = c("record_lnk" = "record_no"),
suffix = c("", ".new")) %>%
filter(!is.na(record_lnk)) %>%
mutate_all(~ifelse(is.na(.), "", .)) %>%
transmute(fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
ref = get_author_year(authors),
prevalence = 0) %>%
distinct(fullname, .keep_all = TRUE) %>%
filter(fullname != fullname_new) %>%
# this part joins this table to itself to correct for entries that had >1 renames,
by = c("record_lnk" = "record_no"),
suffix = c("", ".new")
) %>%
filter(!is.na(record_lnk)) %>%
mutate_all(~ ifelse(is.na(.), "", .)) %>%
transmute(
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
ref = get_author_year(authors),
prevalence = 0
) %>%
distinct(fullname, .keep_all = TRUE) %>%
filter(fullname != fullname_new) %>%
# this part joins this table to itself to correct for entries that had >1 renames,
# such as:
# Bacteroides tectum -> Bacteroides tectus -> Bacteroides pyogenes
left_join(., .,
by = c("fullname_new" = "fullname"),
suffix = c("", ".2")) %>%
mutate(fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
ref = ifelse(!is.na(ref.2), ref.2, ref)) %>%
select(-ends_with(".2"))
left_join(., .,
by = c("fullname_new" = "fullname"),
suffix = c("", ".2")
) %>%
mutate(
fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
ref = ifelse(!is.na(ref.2), ref.2, ref)
) %>%
select(-ends_with(".2"))
mo_became_synonym <- microorganisms %>%
mo_became_synonym <- microorganisms %>%
filter(fullname %in% new_synonyms$fullname)
updated_microorganisms <- taxonomy %>%
filter(is.na(record_lnk)) %>%
mutate_all(~ifelse(is.na(.), "", .)) %>%
transmute(mo = "",
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
kingdom = "Bacteria",
phylum = "",
class = "",
order = "",
family = "",
genus = trimws(genus_name),
species = trimws(replace_na(sp_epithet, "")),
subspecies = trimws(replace_na(subsp_epithet, "")),
rank = case_when(subspecies == "" & species == "" ~ "genus",
subspecies == "" ~ "species",
TRUE ~ "subsp."),
ref = get_author_year(authors),
species_id = as.character(record_no),
source = "LPSN",
prevalence = 0,
snomed = NA)
filter(is.na(record_lnk)) %>%
mutate_all(~ ifelse(is.na(.), "", .)) %>%
transmute(
mo = "",
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
kingdom = "Bacteria",
phylum = "",
class = "",
order = "",
family = "",
genus = trimws(genus_name),
species = trimws(replace_na(sp_epithet, "")),
subspecies = trimws(replace_na(subsp_epithet, "")),
rank = case_when(
subspecies == "" & species == "" ~ "genus",
subspecies == "" ~ "species",
TRUE ~ "subsp."
),
ref = get_author_year(authors),
species_id = as.character(record_no),
source = "LPSN",
prevalence = 0,
snomed = NA
)
new_microorganisms <- updated_microorganisms %>%
new_microorganisms <- updated_microorganisms %>%
filter(!fullname %in% microorganisms$fullname)
genera_with_mo_code <- updated_microorganisms %>%
filter(genus %in% (microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% pull(genus))) %>%
distinct(genus) %>%
distinct(genus) %>%
left_join(microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% select(mo, genus),
by = "genus")
genera_without_mo_code <- updated_microorganisms %>%
filter(!genus %in% genera_with_mo_code$genus) %>%
pull(genus) %>%
by = "genus"
)
genera_without_mo_code <- updated_microorganisms %>%
filter(!genus %in% genera_with_mo_code$genus) %>%
pull(genus) %>%
unique()
genera_without_mo_code_abbr <- genera_without_mo_code %>% abbreviate_mo(5, prefix = "B_")
@ -184,65 +200,73 @@ genera_without_mo_code_abbr[genera_without_mo_code_abbr %in% microorganisms$mo]
# all unique??
sum(genera_without_mo_code_abbr %in% microorganisms$mo) == 0
genus_abb <- tibble(genus = genera_without_mo_code,
abbr = genera_without_mo_code_abbr) %>%
genus_abb <- tibble(
genus = genera_without_mo_code,
abbr = genera_without_mo_code_abbr
) %>%
bind_rows(microorganisms %>%
filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
transmute(genus, abbr = as.character(mo))) %>%
filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
transmute(genus, abbr = as.character(mo))) %>%
arrange(genus)
# Update taxonomy ---------------------------------------------------------
# fill in the taxonomy of new genera
updated_taxonomy <- tibble(phylum = character(0),
class = character(0),
order = character(0),
family = character(0),
genus = character(0))
updated_taxonomy <- tibble(
phylum = character(0),
class = character(0),
order = character(0),
family = character(0),
genus = character(0)
)
for (page in LETTERS) {
message("Downloading page ", page, "... ", appendLF = FALSE)
url <- paste0("https://lpsn.dsmz.de/genus?page=", page)
x <- xml2::read_html(url) %>%
rvest::html_node(".main-list") %>%
x <- xml2::read_html(url) %>%
rvest::html_node(".main-list") %>%
# evety list element with a set <id> attribute
rvest::html_nodes("li[id]")
for (i in seq_len(length(x))) {
txt <- x %>%
magrittr::extract2(i) %>%
txt <- x %>%
magrittr::extract2(i) %>%
rvest::html_text() %>%
gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>%
gsub("Candidatus ", "", ., fixed = TRUE) %>%
gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>%
gsub("Candidatus ", "", ., fixed = TRUE) %>%
gsub("[ \t\r\n\"]+", "|", .) %>%
gsub("\\|ShowHide.*", "", .) %>%
gsub("[\\[\\]]", "", ., fixed = TRUE) %>%
gsub("^\\|", "", .) %>%
strsplit("|", fixed = TRUE) %>%
gsub("\\|ShowHide.*", "", .) %>%
gsub("[\\[\\]]", "", ., fixed = TRUE) %>%
gsub("^\\|", "", .) %>%
strsplit("|", fixed = TRUE) %>%
unlist()
txt[txt == "NA"] <- ""
txt <- gsub("[^A-Za-z]+", "", txt)
updated_taxonomy <- updated_taxonomy %>%
bind_rows(tibble(phylum = txt[2],
class = txt[3],
order = txt[4],
family = txt[5],
genus = txt[6]))
updated_taxonomy <- updated_taxonomy %>%
bind_rows(tibble(
phylum = txt[2],
class = txt[3],
order = txt[4],
family = txt[5],
genus = txt[6]
))
}
message(length(x), " entries (total ", nrow(updated_taxonomy), ")")
}
# Create new microorganisms -----------------------------------------------
new_microorganisms <- new_microorganisms %>%
left_join(genus_abb, by = "genus") %>%
group_by(genus) %>%
mutate(species_abb = abbreviate_mo(species, 4)) %>%
group_by(genus, species) %>%
mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>%
ungroup() %>%
mutate(mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
mo = gsub("_+$", "", mo)) %>%
new_microorganisms <- new_microorganisms %>%
left_join(genus_abb, by = "genus") %>%
group_by(genus) %>%
mutate(species_abb = abbreviate_mo(species, 4)) %>%
group_by(genus, species) %>%
mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>%
ungroup() %>%
mutate(
mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
mo = gsub("_+$", "", mo)
) %>%
select(-matches("abb"))
# add taxonomy new microorganisms
@ -256,128 +280,150 @@ MOs$mo[which(duplicated(MOs$mo))] <- paste0(MOs$mo[which(duplicated(MOs$mo))], 1
# all unique?
!any(duplicated(MOs$mo))
MOs <- MOs %>%
MOs <- MOs %>%
# remove entries that are now a synonym
filter(!fullname %in% new_synonyms$fullname) %>%
filter(!fullname %in% new_synonyms$fullname) %>%
# update the taxonomy
left_join(updated_taxonomy, by = "genus", suffix = c("", ".new")) %>%
mutate(phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
class = ifelse(!is.na(class.new), class.new, class),
order = ifelse(!is.na(order.new), order.new, order),
family = ifelse(!is.na(family.new), family.new, family)) %>%
mutate(
phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
class = ifelse(!is.na(class.new), class.new, class),
order = ifelse(!is.na(order.new), order.new, order),
family = ifelse(!is.na(family.new), family.new, family)
) %>%
select(-ends_with(".new")) %>%
# update prevalence based on taxonomy (Berends et al., 2021)
mutate(prevalence = case_when(
class == "Gammaproteobacteria"
| genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
class == "Gammaproteobacteria" |
genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
~ 1,
kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi")
& (phylum %in% c("Proteobacteria",
"Firmicutes",
"Actinobacteria",
"Sarcomastigophora")
| genus %in% MO_PREVALENT_GENERA
| rank %in% c("kingdom", "phylum", "class", "order", "family"))
kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") &
(phylum %in% c(
"Proteobacteria",
"Firmicutes",
"Actinobacteria",
"Sarcomastigophora"
) |
genus %in% MO_PREVALENT_GENERA |
rank %in% c("kingdom", "phylum", "class", "order", "family"))
~ 2,
TRUE ~ 3
))
# add all mssing genera, families and orders
MOs <- MOs %>%
MOs <- MOs %>%
bind_rows(
MOs %>%
MOs %>%
arrange(genus, species) %>%
distinct(genus, .keep_all = TRUE) %>%
filter(rank == "species", source != "manually added") %>%
mutate(mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
fullname = genus,
species = "",
subspecies = "",
rank = "genus",
species_id = "",
snomed = NA,
ref = NA_character_),
MOs %>%
mutate(
mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
fullname = genus,
species = "",
subspecies = "",
rank = "genus",
species_id = "",
snomed = NA,
ref = NA_character_
),
MOs %>%
group_by(family) %>%
filter(!any(rank == "family") & n() > 1) %>%
ungroup() %>%
ungroup() %>%
arrange(family) %>%
distinct(family, .keep_all = TRUE) %>%
distinct(family, .keep_all = TRUE) %>%
filter(!family %in% c("", NA), source != "manually added") %>%
mutate(mo = paste0(substr(kingdom, 1, 1), "_[FAM]_",
abbreviate(family,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
mo = toupper(mo),
fullname = family,
genus = "",
species = "",
subspecies = "",
rank = "family",
species_id = "",
snomed = NA,
ref = NA_character_),
MOs %>%
mutate(
mo = paste0(
substr(kingdom, 1, 1), "_[FAM]_",
abbreviate(family,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE
)
),
mo = toupper(mo),
fullname = family,
genus = "",
species = "",
subspecies = "",
rank = "family",
species_id = "",
snomed = NA,
ref = NA_character_
),
MOs %>%
group_by(order) %>%
filter(!any(rank == "order") & n() > 1) %>%
ungroup() %>%
ungroup() %>%
arrange(order) %>%
distinct(order, .keep_all = TRUE) %>%
distinct(order, .keep_all = TRUE) %>%
filter(!order %in% c("", NA), source != "manually added") %>%
mutate(mo = paste0(substr(kingdom, 1, 1), "_[ORD]_",
abbreviate(order,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE)),
mo = toupper(mo),
fullname = order,
family = "",
genus = "",
species = "",
subspecies = "",
rank = "order",
species_id = "",
snomed = NA,
ref = NA_character_)
mutate(
mo = paste0(
substr(kingdom, 1, 1), "_[ORD]_",
abbreviate(order,
minlength = 8,
use.classes = TRUE,
method = "both.sides",
strict = FALSE
)
),
mo = toupper(mo),
fullname = order,
family = "",
genus = "",
species = "",
subspecies = "",
rank = "order",
species_id = "",
snomed = NA,
ref = NA_character_
)
) %>%
arrange(fullname)
# clean up
MOs <- MOs %>%
MOs <- MOs %>%
df_remove_nonASCII()
# Add LPSN record IDs -----------------------------------------------------
records_ids <- taxonomy %>%
mutate(across(1:3, function(x) { x[is.na(x)] <- ""; x}),
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))) %>%
transmute(fullname, species_id = as.numeric(record_no)) %>%
arrange(fullname, species_id) %>%
mutate(across(1:3, function(x) {
x[is.na(x)] <- ""
x
}),
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))
) %>%
transmute(fullname, species_id = as.numeric(record_no)) %>%
arrange(fullname, species_id) %>%
distinct(fullname, .keep_all = TRUE)
message("Adding ", sum(records_ids$fullname %in% microorganisms$fullname), " LPSN record IDs")
MOs <- MOs %>%
select(-species_id) %>%
select(-species_id) %>%
left_join(records_ids, by = "fullname") %>%
relocate(species_id, .after = ref) %>%
mutate(source = case_when(!is.na(species_id) ~ "LPSN",
source %unlike% "manual" ~ "CoL",
TRUE ~ source))
relocate(species_id, .after = ref) %>%
mutate(source = case_when(
!is.na(species_id) ~ "LPSN",
source %unlike% "manual" ~ "CoL",
TRUE ~ source
))
# Merge synonyms ----------------------------------------------------------
# remove synonyms that are now valid names
MOs.old <- microorganisms.old %>%
MOs.old <- microorganisms.old %>%
# add new synonyms
bind_rows(new_synonyms) %>%
filter(!fullname %in% MOs$fullname) %>%
arrange(fullname) %>%
distinct(fullname, fullname_new, .keep_all = TRUE) %>%
bind_rows(new_synonyms) %>%
filter(!fullname %in% MOs$fullname) %>%
arrange(fullname) %>%
distinct(fullname, fullname_new, .keep_all = TRUE) %>%
# add prevalence to old taxonomic names
select(-prevalence) %>%
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>%
select(-prevalence) %>%
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>%
# clean up
df_remove_nonASCII()
@ -397,16 +443,18 @@ microorganisms.old <- MOs.old
# we keep them both
microorganisms <- microorganisms %>%
bind_rows(microorganisms %>%
filter(fullname == "Branhamella catarrhalis") %>%
mutate(mo = "B_MRXLL_CTRR",
fullname = "Moraxella catarrhalis",
genus = "Moraxella",
ref = "Henriksen et al., 1968",
species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
snomed = as.list(24226003))) %>%
arrange(fullname) %>%
filter(fullname == "Branhamella catarrhalis") %>%
mutate(
mo = "B_MRXLL_CTRR",
fullname = "Moraxella catarrhalis",
genus = "Moraxella",
ref = "Henriksen et al., 1968",
species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
snomed = as.list(24226003)
)) %>%
arrange(fullname) %>%
df_remove_nonASCII()
microorganisms.old <- microorganisms.old %>%
microorganisms.old <- microorganisms.old %>%
filter(fullname != "Moraxella catarrhalis")
# ---
@ -444,7 +492,7 @@ rm(intrinsic_resistant)
# load new data sets again
devtools::load_all(".")
source("data-raw/pre-commit-hook.R")
source("data-raw/_pre_commit_hook.R")
devtools::load_all(".")

View File

@ -25,26 +25,28 @@ sapply(files, function(file) {
contents <<- c(contents, readLines(file))
invisible()
})
contents <- c(intro,
copyright,
"",
contents)
contents <- c(
intro,
copyright,
"",
contents
)
# remove lines starting with "#'" and NULL and write to file
contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)]
# now make it independent on UseMethod, since we will not export these functions
contents <- gsub('UseMethod[(]"(.*?)"[)]',
'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }',
paste(contents, collapse = "|||"),
perl = TRUE) %>%
'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }',
paste(contents, collapse = "|||"),
perl = TRUE
) %>%
# add commit to intro part
gsub("{commit}", commit, ., fixed = TRUE) %>%
# add date to intro part
gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>%
strsplit(split = "|||", fixed = TRUE) %>%
unlist() %>%
unlist() %>%
# add "pm_" as prefix to all functions
gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .)
@ -56,7 +58,7 @@ for (i in seq_len(length(new_pm_names))) {
contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents)
}
# replace %>% with %pm>%
# replace %>% with %pm>%
contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE)
# fix for new lines, since n() also existed
contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE)
@ -70,6 +72,8 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE)
contents <- gsub("(pm_)+", "pm_", contents)
# special case for pm_distinct(), we need '.keep_all' to work
contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
# pm_pull does not correct for tibbles, misses the drop argument
contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]"
# who needs US spelling?
contents <- contents[!grepl("summarize", contents)]

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -40,88 +40,104 @@ ORGLIST <- read_tsv("data-raw/WHONET/Codes/ORGLIST.txt", na = c("", "NA", "-"),
# create data set for generic rules (i.e., AB-specific but not MO-specific)
rsi_generic <- DRGLST %>%
filter(CLSI == "X" | EUCST == "X") %>%
select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
mutate(ab = as.ab(ab),
across(matches("(CLSI|EUCST)"), as.double)) %>%
pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
separate(method, into = c("guideline", "method"), sep = "_") %>%
select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
mutate(
ab = as.ab(ab),
across(matches("(CLSI|EUCST)"), as.double)
) %>%
pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
separate(method, into = c("guideline", "method"), sep = "_") %>%
mutate(method = ifelse(method %like% "D",
gsub("D", "DISK_", method, fixed = TRUE),
gsub("M", "MIC_", method, fixed = TRUE))) %>%
separate(method, into = c("method", "rsi"), sep = "_") %>%
gsub("D", "DISK_", method, fixed = TRUE),
gsub("M", "MIC_", method, fixed = TRUE)
)) %>%
separate(method, into = c("method", "rsi"), sep = "_") %>%
# I is in the middle, so we only need R and S (saves data)
filter(rsi %in% c("R", "S")) %>%
filter(rsi %in% c("R", "S")) %>%
pivot_wider(names_from = rsi, values_from = value) %>%
transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
method,
site = NA_character_,
mo = as.mo("UNKNOWN"),
ab,
ref_tbl = "Generic rules",
disk_dose,
breakpoint_S = S,
breakpoint_R = R,
uti = FALSE) %>%
transmute(
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
method,
site = NA_character_,
mo = as.mo("UNKNOWN"),
ab,
ref_tbl = "Generic rules",
disk_dose,
breakpoint_S = S,
breakpoint_R = R,
uti = FALSE
) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
rsi_generic
# create data set for AB-specific and MO-specific rules
rsi_specific <- DRGLST1 %>%
rsi_specific <- DRGLST1 %>%
# only support guidelines for humans (for now)
filter(HOST == "Human" & SITE_INF %unlike% "canine|feline",
# only CLSI and EUCAST
GUIDELINES %like% "(CLSI|EUCST)") %>%
filter(
HOST == "Human" & SITE_INF %unlike% "canine|feline",
# only CLSI and EUCAST
GUIDELINES %like% "(CLSI|EUCST)"
) %>%
# get microorganism names from another WHONET table
mutate(ORG_CODE = tolower(ORG_CODE)) %>%
mutate(ORG_CODE = tolower(ORG_CODE)) %>%
left_join(ORGLIST %>%
transmute(ORG_CODE = tolower(ORG),
SCT_TEXT = case_when(is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
is.na(SCT_TEXT) ~ ORGANISM,
TRUE ~ SCT_TEXT)) %>%
# WHO for 'Generic'
bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
# WHO for 'Enterobacterales'
bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))
) %>%
transmute(
ORG_CODE = tolower(ORG),
SCT_TEXT = case_when(
is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
is.na(SCT_TEXT) ~ ORGANISM,
TRUE ~ SCT_TEXT
)
) %>%
# WHO for 'Generic'
bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
# WHO for 'Enterobacterales'
bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))) %>%
# still some manual cleaning required
filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
method = toupper(TESTMETHOD),
site = SITE_INF,
mo = as.mo(SCT_TEXT),
ab = as.ab(WHON5_CODE),
ref_tbl = REF_TABLE,
disk_dose = POTENCY,
breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
uti = site %like% "(UTI|urinary|urine)") %>%
filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
transmute(
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
method = toupper(TESTMETHOD),
site = SITE_INF,
mo = as.mo(SCT_TEXT),
ab = as.ab(WHON5_CODE),
ref_tbl = REF_TABLE,
disk_dose = POTENCY,
breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
uti = site %like% "(UTI|urinary|urine)"
) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
rsi_specific
rsi_translation <- rsi_generic %>%
bind_rows(rsi_specific) %>%
rsi_translation <- rsi_generic %>%
bind_rows(rsi_specific) %>%
# add the taxonomic rank index, used for sorting (so subspecies match first, order matches last)
mutate(rank_index = case_when(mo_rank(mo) %like% "(infra|sub)" ~ 1,
mo_rank(mo) == "species" ~ 2,
mo_rank(mo) == "genus" ~ 3,
mo_rank(mo) == "family" ~ 4,
mo_rank(mo) == "order" ~ 5,
TRUE ~ 6),
.after = mo) %>%
arrange(desc(guideline), ab, mo, method) %>%
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
mutate(
rank_index = case_when(
mo_rank(mo) %like% "(infra|sub)" ~ 1,
mo_rank(mo) == "species" ~ 2,
mo_rank(mo) == "genus" ~ 3,
mo_rank(mo) == "family" ~ 4,
mo_rank(mo) == "order" ~ 5,
TRUE ~ 6
),
.after = mo
) %>%
arrange(desc(guideline), ab, mo, method) %>%
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
as.data.frame(stringsAsFactors = FALSE)
# disks MUST be 6-50 mm, so correct where that is wrong:
rsi_translation[which(rsi_translation$method == "DISK" &
(is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
(is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
rsi_translation[which(rsi_translation$method == "DISK" &
(is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
(is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
m <- unique(as.double(as.mic(levels(as.mic(1)))))
rsi_translation[which(rsi_translation$method == "MIC" &
is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
rsi_translation[which(rsi_translation$method == "MIC" &
is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
# WHONET has no >1024 but instead uses 1025, 513, etc, so raise these one higher valid MIC factor level:
rsi_translation[which(rsi_translation$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
@ -134,18 +150,18 @@ rsi_translation[which(rsi_translation$breakpoint_R == 1025), "breakpoint_R"] <-
# WHONET file: S <= 8 and R >= 16
# this will make an MIC of 12 I, which should be R, so:
eucast_mics <- which(rsi_translation$guideline %like% "EUCAST" &
rsi_translation$method == "MIC" &
log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
!is.na(rsi_translation$breakpoint_R))
rsi_translation$method == "MIC" &
log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
!is.na(rsi_translation$breakpoint_R))
old_R <- rsi_translation[eucast_mics, "breakpoint_R", drop = TRUE]
old_S <- rsi_translation[eucast_mics, "breakpoint_S", drop = TRUE]
new_R <- 2 ^ (log2(old_R) - 1)
new_R <- 2^(log2(old_R) - 1)
new_R[new_R < old_S | is.na(as.mic(new_R))] <- old_S[new_R < old_S | is.na(as.mic(new_R))]
rsi_translation[eucast_mics, "breakpoint_R"] <- new_R
eucast_disks <- which(rsi_translation$guideline %like% "EUCAST" &
rsi_translation$method == "DISK" &
rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
!is.na(rsi_translation$breakpoint_R))
rsi_translation$method == "DISK" &
rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
!is.na(rsi_translation$breakpoint_R))
rsi_translation[eucast_disks, "breakpoint_R"] <- rsi_translation[eucast_disks, "breakpoint_R", drop = TRUE] + 1
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -31,8 +31,8 @@ library(tidyverse)
# - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
# - check that current online version is higher than SNOMED_VERSION$current_version
# - if so, click on 'Download Value Set', choose 'TXT'
snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
select(1:2) %>%
snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
select(1:2) %>%
set_names(c("snomed", "mo"))
# save all valid genera, species and subspecies
@ -41,17 +41,21 @@ vctr <- tolower(vctr[vctr %like% "^[a-z]+$"])
# remove all parts of the name that are no valid values in genera, species or subspecies
# this takes ~20 seconds
snomed <- snomed %>%
mutate(fullname = vapply(FUN.VALUE = character(1),
# split on space and/or comma
strsplit(tolower(mo), "[ ,]"),
function(x) trimws(paste0(x[x %in% vctr], collapse = " "))),
# remove " group"
fullname = gsub(" group", "", fullname, fixed = TRUE))
snomed <- snomed %>%
mutate(
fullname = vapply(
FUN.VALUE = character(1),
# split on space and/or comma
strsplit(tolower(mo), "[ ,]"),
function(x) trimws(paste0(x[x %in% vctr], collapse = " "))
),
# remove " group"
fullname = gsub(" group", "", fullname, fixed = TRUE)
)
snomed_keep <- snomed %>%
filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
group_by(fullname_lower = fullname) %>%
snomed_keep <- snomed %>%
filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
group_by(fullname_lower = fullname) %>%
summarise(snomed = list(snomed))
message(nrow(snomed_keep), " MO's will get a SNOMED code.")
@ -65,10 +69,9 @@ microorganisms <- microorganisms %>%
# join new snomed
left_join(snomed_keep) %>%
# remove dummy var
select(-fullname_lower) %>%
select(-fullname_lower) %>%
AMR:::dataset_UTF8_to_ASCII()
# don't forget to update the version number in SNOMED_VERSION in ./R/globals.R!
# usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")

View File

@ -1,68 +1,77 @@
microorganisms <- microorganisms |> bind_rows(
# Toxoplasma
data.frame(mo = "P_TXPL_GOND", # species
fullname = "Toxoplasma gondii",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "Toxoplasma",
species = "gondii",
subspecies = "",
rank = "species",
ref = "Nicolle et al., 1908",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
data.frame(mo = "P_TXPL", # genus
fullname = "Toxoplasma",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "Toxoplasma",
species = "",
subspecies = "",
rank = "genus",
ref = "Nicolle et al., 1909",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
data.frame(mo = "[FAM]_SRCCYSTD", # family
fullname = "Sarcocystidae",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "",
species = "",
subspecies = "",
rank = "family",
ref = "Poche, 1913",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
data.frame(mo = "[ORD]_EUCCCDRD", # order
fullname = "Eucoccidiorida",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "",
genus = "",
species = "",
subspecies = "",
rank = "order",
ref = "Leger et al., 1910",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE),
) |>
microorganisms <- microorganisms |>
bind_rows(
# Toxoplasma
data.frame(
mo = "P_TXPL_GOND", # species
fullname = "Toxoplasma gondii",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "Toxoplasma",
species = "gondii",
subspecies = "",
rank = "species",
ref = "Nicolle et al., 1908",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE
),
data.frame(
mo = "P_TXPL", # genus
fullname = "Toxoplasma",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "Toxoplasma",
species = "",
subspecies = "",
rank = "genus",
ref = "Nicolle et al., 1909",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE
),
data.frame(
mo = "[FAM]_SRCCYSTD", # family
fullname = "Sarcocystidae",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "Sarcocystidae",
genus = "",
species = "",
subspecies = "",
rank = "family",
ref = "Poche, 1913",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE
),
data.frame(
mo = "[ORD]_EUCCCDRD", # order
fullname = "Eucoccidiorida",
kingdom = "(unknown kingdom)",
phylum = "Apicomplexa",
class = "Conoidasida",
order = "Eucoccidiorida",
family = "",
genus = "",
species = "",
subspecies = "",
rank = "order",
ref = "Leger et al., 1910",
species_id = NA_real_,
source = "manually added",
prevalence = 2,
stringsAsFactors = FALSE
),
) |>
arrange(fullname)