mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 19:01:51 +02:00
styled, unit test fix
This commit is contained in:
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -28,10 +28,14 @@ install.packages("data-raw/tinytest_1.3.1.tar.gz", dependencies = c("Depends", "
|
||||
install.packages(getwd(), repos = NULL, type = "source")
|
||||
# install.packages("data-raw/AMR_latest.tar.gz", dependencies = FALSE)
|
||||
|
||||
pkg_suggests <- gsub("[^a-zA-Z0-9]+", "",
|
||||
unlist(strsplit(unlist(packageDescription("AMR",
|
||||
fields = c("Suggests", "Enhances", "LinkingTo"))),
|
||||
split = ", ?")))
|
||||
pkg_suggests <- gsub(
|
||||
"[^a-zA-Z0-9]+", "",
|
||||
unlist(strsplit(unlist(packageDescription("AMR",
|
||||
fields = c("Suggests", "Enhances", "LinkingTo")
|
||||
)),
|
||||
split = ", ?"
|
||||
))
|
||||
)
|
||||
pkg_suggests <- unname(pkg_suggests[!is.na(pkg_suggests)])
|
||||
cat("################################################\n")
|
||||
cat("Packages listed in Suggests/Enhances:", paste(pkg_suggests, collapse = ", "), "\n")
|
||||
@ -49,22 +53,26 @@ if (length(to_install) == 0) {
|
||||
for (i in seq_len(length(to_install))) {
|
||||
cat("Installing package", to_install[i], "\n")
|
||||
tryCatch(install.packages(to_install[i],
|
||||
type = "source",
|
||||
repos = "https://cran.rstudio.com/",
|
||||
dependencies = c("Depends", "Imports", "LinkingTo"),
|
||||
quiet = FALSE),
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message))
|
||||
type = "source",
|
||||
repos = "https://cran.rstudio.com/",
|
||||
dependencies = c("Depends", "Imports", "LinkingTo"),
|
||||
quiet = FALSE
|
||||
),
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message)
|
||||
)
|
||||
if (.Platform$OS.type != "unix" && !to_install[i] %in% rownames(utils::installed.packages())) {
|
||||
tryCatch(install.packages(to_install[i],
|
||||
type = "binary",
|
||||
repos = "https://cran.rstudio.com/",
|
||||
dependencies = c("Depends", "Imports", "LinkingTo"),
|
||||
quiet = FALSE),
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message))
|
||||
type = "binary",
|
||||
repos = "https://cran.rstudio.com/",
|
||||
dependencies = c("Depends", "Imports", "LinkingTo"),
|
||||
quiet = FALSE
|
||||
),
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,8 +84,9 @@ if (length(to_update) == 0) {
|
||||
for (i in seq_len(length(to_update))) {
|
||||
cat("Updating package '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
|
||||
tryCatch(update.packages(to_update[i], repos = "https://cran.rstudio.com/", ask = FALSE),
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message))
|
||||
# message = function(m) invisible(),
|
||||
warning = function(w) message(w$message),
|
||||
error = function(e) message(e$message)
|
||||
)
|
||||
cat("Updated to '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -28,9 +28,10 @@
|
||||
|
||||
if (!file.exists("DESCRIPTION") || !"Package: AMR" %in% readLines("DESCRIPTION")) {
|
||||
stop("Be sure to run this script in the root location of the AMR package folder.\n",
|
||||
"Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
|
||||
"Current working directory: ", getwd(),
|
||||
call. = FALSE)
|
||||
"Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
|
||||
"Current working directory: ", getwd(),
|
||||
call. = FALSE
|
||||
)
|
||||
}
|
||||
|
||||
# save old global env to restore later
|
||||
@ -41,34 +42,42 @@ load("R/sysdata.rda", envir = lang_env)
|
||||
|
||||
# replace language objects with updates
|
||||
message("Reading translation file...")
|
||||
lang_env$TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
fill = TRUE,
|
||||
strip.white = TRUE,
|
||||
encoding = "UTF-8",
|
||||
fileEncoding = "UTF-8",
|
||||
na.strings = c(NA, "", NULL),
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = "")
|
||||
lang_env$TRANSLATIONS <- utils::read.delim(
|
||||
file = "data-raw/translations.tsv",
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
fill = TRUE,
|
||||
strip.white = TRUE,
|
||||
encoding = "UTF-8",
|
||||
fileEncoding = "UTF-8",
|
||||
na.strings = c(NA, "", NULL),
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = ""
|
||||
)
|
||||
|
||||
lang_env$LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
|
||||
lapply(lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
|
||||
function(x) list(exonym = x[1], endonym = x[2])))
|
||||
lang_env$LANGUAGES_SUPPORTED_NAMES <- c(
|
||||
list(en = list(exonym = "English", endonym = "English")),
|
||||
lapply(
|
||||
lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
|
||||
function(x) list(exonym = x[1], endonym = x[2])
|
||||
)
|
||||
)
|
||||
|
||||
lang_env$LANGUAGES_SUPPORTED <- names(lang_env$LANGUAGES_SUPPORTED_NAMES)
|
||||
|
||||
# save env to internal package data
|
||||
# usethis::use_data() does not allow to save a list :(
|
||||
message("Saving to internal data...")
|
||||
save(list = names(lang_env),
|
||||
file = "R/sysdata.rda",
|
||||
ascii = FALSE,
|
||||
version = 2,
|
||||
compress = "xz",
|
||||
envir = lang_env)
|
||||
save(
|
||||
list = names(lang_env),
|
||||
file = "R/sysdata.rda",
|
||||
ascii = FALSE,
|
||||
version = 2,
|
||||
compress = "xz",
|
||||
envir = lang_env
|
||||
)
|
||||
|
||||
rm(lang_env)
|
||||
|
||||
|
@ -24,7 +24,7 @@
|
||||
# ==================================================================== #
|
||||
|
||||
# Run this file to update the package using:
|
||||
# source("data-raw/pre-commit-hook.R")
|
||||
# source("data-raw/_pre_commit_hook.R")
|
||||
|
||||
library(dplyr, warn.conflicts = FALSE)
|
||||
devtools::load_all(quiet = TRUE)
|
||||
@ -36,41 +36,54 @@ old_globalenv <- ls(envir = globalenv())
|
||||
# Save internal data to R/sysdata.rda -------------------------------------
|
||||
|
||||
# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
|
||||
EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
|
||||
skip = 10,
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
strip.white = TRUE,
|
||||
na = c(NA, "", NULL)) %>%
|
||||
EUCAST_RULES_DF <- utils::read.delim(
|
||||
file = "data-raw/eucast_rules.tsv",
|
||||
skip = 10,
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
strip.white = TRUE,
|
||||
na = c(NA, "", NULL)
|
||||
) %>%
|
||||
# take the order of the reference.rule_group column in the original data file
|
||||
mutate(reference.rule_group = factor(reference.rule_group,
|
||||
levels = unique(reference.rule_group),
|
||||
ordered = TRUE),
|
||||
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
|
||||
arrange(reference.rule_group,
|
||||
reference.version,
|
||||
sorting_rule,
|
||||
reference.rule) %>%
|
||||
mutate(
|
||||
reference.rule_group = factor(reference.rule_group,
|
||||
levels = unique(reference.rule_group),
|
||||
ordered = TRUE
|
||||
),
|
||||
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)
|
||||
) %>%
|
||||
arrange(
|
||||
reference.rule_group,
|
||||
reference.version,
|
||||
sorting_rule,
|
||||
reference.rule
|
||||
) %>%
|
||||
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
|
||||
select(-sorting_rule)
|
||||
|
||||
TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
fill = TRUE,
|
||||
strip.white = TRUE,
|
||||
encoding = "UTF-8",
|
||||
fileEncoding = "UTF-8",
|
||||
na.strings = c(NA, "", NULL),
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = "")
|
||||
TRANSLATIONS <- utils::read.delim(
|
||||
file = "data-raw/translations.tsv",
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
fill = TRUE,
|
||||
strip.white = TRUE,
|
||||
encoding = "UTF-8",
|
||||
fileEncoding = "UTF-8",
|
||||
na.strings = c(NA, "", NULL),
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = ""
|
||||
)
|
||||
|
||||
LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
|
||||
lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
|
||||
function(x) list(exonym = x[1], endonym = x[2])))
|
||||
LANGUAGES_SUPPORTED_NAMES <- c(
|
||||
list(en = list(exonym = "English", endonym = "English")),
|
||||
lapply(
|
||||
TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
|
||||
function(x) list(exonym = x[1], endonym = x[2])
|
||||
)
|
||||
)
|
||||
|
||||
LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)
|
||||
|
||||
@ -84,43 +97,53 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
MO_staph <- AMR::microorganisms
|
||||
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
|
||||
if (type == "CoNS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
|
||||
"croceilyticus",
|
||||
"debuckii", "devriesei", "edaphicus", "epidermidis",
|
||||
"equorum", "felis", "fleurettii", "gallinarum",
|
||||
"haemolyticus", "hominis", "jettensis", "kloosii",
|
||||
"lentus", "lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
|
||||
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
|
||||
"sciuri", "simulans", "stepanovicii", "succinus",
|
||||
"ureilyticus",
|
||||
"vitulinus", "vitulus", "warneri", "xylosus",
|
||||
"caledonicus", "canis",
|
||||
"durrellii", "lloydii")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
|
||||
"mo", drop = TRUE]
|
||||
MO_staph[which(MO_staph$species %in% c(
|
||||
"coagulase-negative", "argensis", "arlettae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
|
||||
"croceilyticus",
|
||||
"debuckii", "devriesei", "edaphicus", "epidermidis",
|
||||
"equorum", "felis", "fleurettii", "gallinarum",
|
||||
"haemolyticus", "hominis", "jettensis", "kloosii",
|
||||
"lentus", "lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
|
||||
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
|
||||
"sciuri", "simulans", "stepanovicii", "succinus",
|
||||
"ureilyticus",
|
||||
"vitulinus", "vitulus", "warneri", "xylosus",
|
||||
"caledonicus", "canis",
|
||||
"durrellii", "lloydii"
|
||||
) |
|
||||
(MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
|
||||
"mo",
|
||||
drop = TRUE
|
||||
]
|
||||
} else if (type == "CoPS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
|
||||
"agnetis", "argenteus",
|
||||
"cornubiensis",
|
||||
"delphini", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schweitzeri", "simiae",
|
||||
"roterodami")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
|
||||
"mo", drop = TRUE]
|
||||
MO_staph[which(MO_staph$species %in% c(
|
||||
"coagulase-positive", "coagulans",
|
||||
"agnetis", "argenteus",
|
||||
"cornubiensis",
|
||||
"delphini", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schweitzeri", "simiae",
|
||||
"roterodami"
|
||||
) |
|
||||
(MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
|
||||
"mo",
|
||||
drop = TRUE
|
||||
]
|
||||
}
|
||||
}
|
||||
create_MO_fullname_lower <- function() {
|
||||
MO_lookup <- AMR::microorganisms
|
||||
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
|
||||
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
|
||||
MO_lookup$species,
|
||||
MO_lookup$subspecies)))
|
||||
MO_lookup$fullname_lower <- tolower(trimws(paste(
|
||||
MO_lookup$genus,
|
||||
MO_lookup$species,
|
||||
MO_lookup$subspecies
|
||||
)))
|
||||
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
|
||||
MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname", drop = TRUE])
|
||||
MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
|
||||
@ -130,59 +153,103 @@ MO_CONS <- create_species_cons_cops("CoNS")
|
||||
MO_COPS <- create_species_cons_cops("CoPS")
|
||||
MO_STREP_ABCG <- as.mo(MO_lookup[which(MO_lookup$genus == "Streptococcus"), "mo", drop = TRUE], Lancefield = TRUE) %in% c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_GRPC", "B_STRPT_GRPG")
|
||||
MO_FULLNAME_LOWER <- create_MO_fullname_lower()
|
||||
MO_PREVALENT_GENERA <- c("Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
|
||||
"Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
|
||||
"Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
|
||||
"Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
|
||||
"Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
|
||||
"Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
|
||||
"Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
|
||||
"Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
|
||||
"Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
|
||||
"Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
|
||||
"Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
|
||||
"Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
|
||||
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
|
||||
"Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
|
||||
"Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
|
||||
"Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
|
||||
"Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
|
||||
"Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
|
||||
"Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
|
||||
"Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
|
||||
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
|
||||
"Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
|
||||
"Weeksella", "Wuchereria")
|
||||
MO_PREVALENT_GENERA <- c(
|
||||
"Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
|
||||
"Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
|
||||
"Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
|
||||
"Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
|
||||
"Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
|
||||
"Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
|
||||
"Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
|
||||
"Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
|
||||
"Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
|
||||
"Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
|
||||
"Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
|
||||
"Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
|
||||
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
|
||||
"Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
|
||||
"Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
|
||||
"Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
|
||||
"Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
|
||||
"Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
|
||||
"Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
|
||||
"Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
|
||||
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
|
||||
"Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
|
||||
"Weeksella", "Wuchereria"
|
||||
)
|
||||
|
||||
# antibiotic groups
|
||||
# (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv)
|
||||
globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab")
|
||||
AB_AMINOGLYCOSIDES <- antibiotics %>% filter(group %like% "aminoglycoside") %>% pull(ab)
|
||||
AB_AMINOGLYCOSIDES <- antibiotics %>%
|
||||
filter(group %like% "aminoglycoside") %>%
|
||||
pull(ab)
|
||||
AB_AMINOPENICILLINS <- as.ab(c("AMP", "AMX"))
|
||||
AB_ANTIFUNGALS <- AB_lookup %>% filter(group %like% "antifungal") %>% pull(ab)
|
||||
AB_ANTIMYCOBACTERIALS <- AB_lookup %>% filter(group %like% "antimycobacterial") %>% pull(ab)
|
||||
AB_CARBAPENEMS <- antibiotics %>% filter(group %like% "carbapenem") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS <- antibiotics %>% filter(group %like% "cephalosporin") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS_1ST <- antibiotics %>% filter(group %like% "cephalosporin.*1") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS_2ND <- antibiotics %>% filter(group %like% "cephalosporin.*2") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS_3RD <- antibiotics %>% filter(group %like% "cephalosporin.*3") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS_4TH <- antibiotics %>% filter(group %like% "cephalosporin.*4") %>% pull(ab)
|
||||
AB_CEPHALOSPORINS_5TH <- antibiotics %>% filter(group %like% "cephalosporin.*5") %>% pull(ab)
|
||||
AB_ANTIFUNGALS <- AB_lookup %>%
|
||||
filter(group %like% "antifungal") %>%
|
||||
pull(ab)
|
||||
AB_ANTIMYCOBACTERIALS <- AB_lookup %>%
|
||||
filter(group %like% "antimycobacterial") %>%
|
||||
pull(ab)
|
||||
AB_CARBAPENEMS <- antibiotics %>%
|
||||
filter(group %like% "carbapenem") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_1ST <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin.*1") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_2ND <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin.*2") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_3RD <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin.*3") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_4TH <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin.*4") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_5TH <- antibiotics %>%
|
||||
filter(group %like% "cephalosporin.*5") %>%
|
||||
pull(ab)
|
||||
AB_CEPHALOSPORINS_EXCEPT_CAZ <- AB_CEPHALOSPORINS[AB_CEPHALOSPORINS != "CAZ"]
|
||||
AB_FLUOROQUINOLONES <- antibiotics %>% filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>% pull(ab)
|
||||
AB_GLYCOPEPTIDES <- antibiotics %>% filter(group %like% "glycopeptide") %>% pull(ab)
|
||||
AB_FLUOROQUINOLONES <- antibiotics %>%
|
||||
filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>%
|
||||
pull(ab)
|
||||
AB_GLYCOPEPTIDES <- antibiotics %>%
|
||||
filter(group %like% "glycopeptide") %>%
|
||||
pull(ab)
|
||||
AB_LIPOGLYCOPEPTIDES <- as.ab(c("DAL", "ORI", "TLV")) # dalba/orita/tela
|
||||
AB_GLYCOPEPTIDES_EXCEPT_LIPO <- AB_GLYCOPEPTIDES[!AB_GLYCOPEPTIDES %in% AB_LIPOGLYCOPEPTIDES]
|
||||
AB_LINCOSAMIDES <- antibiotics %>% filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>% pull(ab)
|
||||
AB_MACROLIDES <- antibiotics %>% filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>% pull(ab)
|
||||
AB_OXAZOLIDINONES <- antibiotics %>% filter(group %like% "oxazolidinone") %>% pull(ab)
|
||||
AB_PENICILLINS <- antibiotics %>% filter(group %like% "penicillin") %>% pull(ab)
|
||||
AB_POLYMYXINS <- antibiotics %>% filter(group %like% "polymyxin") %>% pull(ab)
|
||||
AB_QUINOLONES <- antibiotics %>% filter(group %like% "quinolone") %>% pull(ab)
|
||||
AB_STREPTOGRAMINS <- antibiotics %>% filter(atc_group2 %like% "streptogramin") %>% pull(ab)
|
||||
AB_TETRACYCLINES <- antibiotics %>% filter(group %like% "tetracycline") %>% pull(ab)
|
||||
AB_LINCOSAMIDES <- antibiotics %>%
|
||||
filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>%
|
||||
pull(ab)
|
||||
AB_MACROLIDES <- antibiotics %>%
|
||||
filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>%
|
||||
pull(ab)
|
||||
AB_OXAZOLIDINONES <- antibiotics %>%
|
||||
filter(group %like% "oxazolidinone") %>%
|
||||
pull(ab)
|
||||
AB_PENICILLINS <- antibiotics %>%
|
||||
filter(group %like% "penicillin") %>%
|
||||
pull(ab)
|
||||
AB_POLYMYXINS <- antibiotics %>%
|
||||
filter(group %like% "polymyxin") %>%
|
||||
pull(ab)
|
||||
AB_QUINOLONES <- antibiotics %>%
|
||||
filter(group %like% "quinolone") %>%
|
||||
pull(ab)
|
||||
AB_STREPTOGRAMINS <- antibiotics %>%
|
||||
filter(atc_group2 %like% "streptogramin") %>%
|
||||
pull(ab)
|
||||
AB_TETRACYCLINES <- antibiotics %>%
|
||||
filter(group %like% "tetracycline") %>%
|
||||
pull(ab)
|
||||
AB_TETRACYCLINES_EXCEPT_TGC <- AB_TETRACYCLINES[AB_TETRACYCLINES != "TGC"]
|
||||
AB_TRIMETHOPRIMS <- antibiotics %>% filter(group %like% "trimethoprim") %>% pull(ab)
|
||||
AB_TRIMETHOPRIMS <- antibiotics %>%
|
||||
filter(group %like% "trimethoprim") %>%
|
||||
pull(ab)
|
||||
AB_UREIDOPENICILLINS <- as.ab(c("PIP", "TZP", "AZL", "MEZ"))
|
||||
AB_BETALACTAMS <- c(AB_PENICILLINS, AB_CEPHALOSPORINS, AB_CARBAPENEMS)
|
||||
# this will be used for documentation:
|
||||
@ -194,15 +261,21 @@ create_AB_lookup <- function() {
|
||||
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
|
||||
c("ab", "atc", "cid", "name",
|
||||
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
|
||||
drop = FALSE]),
|
||||
stringsAsFactors = FALSE)),
|
||||
function(x) {
|
||||
x <- generalise_antibiotic_name(unname(unlist(x)))
|
||||
x[x != ""]
|
||||
}))
|
||||
AB_lookup$generalised_all <- unname(lapply(
|
||||
as.list(as.data.frame(t(AB_lookup[,
|
||||
c(
|
||||
"ab", "atc", "cid", "name",
|
||||
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]
|
||||
),
|
||||
drop = FALSE
|
||||
]),
|
||||
stringsAsFactors = FALSE
|
||||
)),
|
||||
function(x) {
|
||||
x <- generalise_antibiotic_name(unname(unlist(x)))
|
||||
x[x != ""]
|
||||
}
|
||||
))
|
||||
AB_lookup[, colnames(AB_lookup)[colnames(AB_lookup) %like% "^generalised"]]
|
||||
}
|
||||
AB_LOOKUP <- create_AB_lookup()
|
||||
@ -210,48 +283,49 @@ AB_LOOKUP <- create_AB_lookup()
|
||||
# Export to package as internal data ----
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('sysdata.rda')} to {usethis::ui_value('R/')}"))
|
||||
suppressMessages(usethis::use_data(EUCAST_RULES_DF,
|
||||
TRANSLATIONS,
|
||||
LANGUAGES_SUPPORTED_NAMES,
|
||||
LANGUAGES_SUPPORTED,
|
||||
MO_CONS,
|
||||
MO_COPS,
|
||||
MO_STREP_ABCG,
|
||||
MO_FULLNAME_LOWER,
|
||||
MO_PREVALENT_GENERA,
|
||||
AB_LOOKUP,
|
||||
AB_AMINOGLYCOSIDES,
|
||||
AB_AMINOPENICILLINS,
|
||||
AB_ANTIFUNGALS,
|
||||
AB_ANTIMYCOBACTERIALS,
|
||||
AB_CARBAPENEMS,
|
||||
AB_CEPHALOSPORINS,
|
||||
AB_CEPHALOSPORINS_1ST,
|
||||
AB_CEPHALOSPORINS_2ND,
|
||||
AB_CEPHALOSPORINS_3RD,
|
||||
AB_CEPHALOSPORINS_4TH,
|
||||
AB_CEPHALOSPORINS_5TH,
|
||||
AB_CEPHALOSPORINS_EXCEPT_CAZ,
|
||||
AB_FLUOROQUINOLONES,
|
||||
AB_LIPOGLYCOPEPTIDES,
|
||||
AB_GLYCOPEPTIDES,
|
||||
AB_GLYCOPEPTIDES_EXCEPT_LIPO,
|
||||
AB_LINCOSAMIDES,
|
||||
AB_MACROLIDES,
|
||||
AB_OXAZOLIDINONES,
|
||||
AB_PENICILLINS,
|
||||
AB_POLYMYXINS,
|
||||
AB_QUINOLONES,
|
||||
AB_STREPTOGRAMINS,
|
||||
AB_TETRACYCLINES,
|
||||
AB_TETRACYCLINES_EXCEPT_TGC,
|
||||
AB_TRIMETHOPRIMS,
|
||||
AB_UREIDOPENICILLINS,
|
||||
AB_BETALACTAMS,
|
||||
DEFINED_AB_GROUPS,
|
||||
internal = TRUE,
|
||||
overwrite = TRUE,
|
||||
version = 2,
|
||||
compress = "xz"))
|
||||
TRANSLATIONS,
|
||||
LANGUAGES_SUPPORTED_NAMES,
|
||||
LANGUAGES_SUPPORTED,
|
||||
MO_CONS,
|
||||
MO_COPS,
|
||||
MO_STREP_ABCG,
|
||||
MO_FULLNAME_LOWER,
|
||||
MO_PREVALENT_GENERA,
|
||||
AB_LOOKUP,
|
||||
AB_AMINOGLYCOSIDES,
|
||||
AB_AMINOPENICILLINS,
|
||||
AB_ANTIFUNGALS,
|
||||
AB_ANTIMYCOBACTERIALS,
|
||||
AB_CARBAPENEMS,
|
||||
AB_CEPHALOSPORINS,
|
||||
AB_CEPHALOSPORINS_1ST,
|
||||
AB_CEPHALOSPORINS_2ND,
|
||||
AB_CEPHALOSPORINS_3RD,
|
||||
AB_CEPHALOSPORINS_4TH,
|
||||
AB_CEPHALOSPORINS_5TH,
|
||||
AB_CEPHALOSPORINS_EXCEPT_CAZ,
|
||||
AB_FLUOROQUINOLONES,
|
||||
AB_LIPOGLYCOPEPTIDES,
|
||||
AB_GLYCOPEPTIDES,
|
||||
AB_GLYCOPEPTIDES_EXCEPT_LIPO,
|
||||
AB_LINCOSAMIDES,
|
||||
AB_MACROLIDES,
|
||||
AB_OXAZOLIDINONES,
|
||||
AB_PENICILLINS,
|
||||
AB_POLYMYXINS,
|
||||
AB_QUINOLONES,
|
||||
AB_STREPTOGRAMINS,
|
||||
AB_TETRACYCLINES,
|
||||
AB_TETRACYCLINES_EXCEPT_TGC,
|
||||
AB_TRIMETHOPRIMS,
|
||||
AB_UREIDOPENICILLINS,
|
||||
AB_BETALACTAMS,
|
||||
DEFINED_AB_GROUPS,
|
||||
internal = TRUE,
|
||||
overwrite = TRUE,
|
||||
version = 2,
|
||||
compress = "xz"
|
||||
))
|
||||
|
||||
# Export data sets to the repository in different formats -----------------
|
||||
|
||||
@ -273,12 +347,15 @@ write_md5 <- function(object) {
|
||||
close(conn)
|
||||
}
|
||||
changed_md5 <- function(object) {
|
||||
tryCatch({
|
||||
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
|
||||
compared <- md5(object) != readLines(con = conn)
|
||||
close(conn)
|
||||
compared
|
||||
}, error = function(e) TRUE)
|
||||
tryCatch(
|
||||
{
|
||||
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
|
||||
compared <- md5(object) != readLines(con = conn)
|
||||
close(conn)
|
||||
compared
|
||||
},
|
||||
error = function(e) TRUE
|
||||
)
|
||||
}
|
||||
|
||||
# give official names to ABs and MOs
|
||||
@ -306,7 +383,7 @@ if (changed_md5(microorganisms)) {
|
||||
max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
|
||||
mo <- microorganisms
|
||||
mo$snomed <- max_50_snomed
|
||||
mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
|
||||
mo <- dplyr::mutate_if(mo, ~ !is.numeric(.), as.character)
|
||||
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
|
||||
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
|
||||
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
|
||||
@ -328,7 +405,7 @@ if (changed_md5(microorganisms.old)) {
|
||||
try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
|
||||
ab <- dplyr::mutate_if(antibiotics, ~ !is.numeric(.), as.character)
|
||||
if (changed_md5(ab)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('data-raw/')}"))
|
||||
write_md5(ab)
|
||||
@ -342,7 +419,7 @@ if (changed_md5(ab)) {
|
||||
try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
|
||||
av <- dplyr::mutate_if(antivirals, ~ !is.numeric(.), as.character)
|
||||
if (changed_md5(av)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('data-raw/')}"))
|
||||
write_md5(av)
|
||||
@ -357,9 +434,11 @@ if (changed_md5(av)) {
|
||||
}
|
||||
|
||||
# give official names to ABs and MOs
|
||||
intrinsicR <- data.frame(microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
|
||||
antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
|
||||
stringsAsFactors = FALSE)
|
||||
intrinsicR <- data.frame(
|
||||
microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
|
||||
antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
if (changed_md5(intrinsicR)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('intrinsic_resistant')} to {usethis::ui_value('data-raw/')}"))
|
||||
write_md5(intrinsicR)
|
||||
@ -394,4 +473,25 @@ rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
|
||||
rm(current_globalenv)
|
||||
|
||||
devtools::load_all(quiet = TRUE)
|
||||
devtools::document()
|
||||
|
||||
|
||||
# Document pkg ------------------------------------------------------------
|
||||
usethis::ui_info("Documenting package")
|
||||
suppressMessages(devtools::document(quiet = TRUE))
|
||||
|
||||
|
||||
# Style pkg ---------------------------------------------------------------
|
||||
usethis::ui_info("Styling package")
|
||||
invisible(capture.output(styler::style_pkg(
|
||||
style = styler::tidyverse_style,
|
||||
filetype = c("R", "Rmd")
|
||||
)))
|
||||
invisible(capture.output(styler::style_dir(
|
||||
path = "inst", # unit tests
|
||||
style = styler::tidyverse_style,
|
||||
filetype = c("R", "Rmd")
|
||||
)))
|
||||
|
||||
|
||||
# Finished ----------------------------------------------------------------
|
||||
usethis::ui_info("All done")
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -30,8 +30,9 @@
|
||||
# 2. Download the CSV from https://loinc.org/download/loinc-table-file-csv/ (Loinc_2.67_Text_2.67.zip)
|
||||
# 3. Read Loinc.csv that's in this zip file
|
||||
loinc_df <- read.csv("data-raw/Loinc.csv",
|
||||
row.names = NULL,
|
||||
stringsAsFactors = FALSE)
|
||||
row.names = NULL,
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
|
||||
# 4. Clean and add
|
||||
library(dplyr)
|
||||
@ -39,7 +40,10 @@ library(cleaner)
|
||||
library(AMR)
|
||||
loinc_df %>% freq(CLASS) # to find the drugs
|
||||
loinc_df <- loinc_df %>% filter(CLASS == "DRUG/TOX")
|
||||
ab_names <- antibiotics %>% pull(name) %>% paste0(collapse = "|") %>% paste0("(", ., ")")
|
||||
ab_names <- antibiotics %>%
|
||||
pull(name) %>%
|
||||
paste0(collapse = "|") %>%
|
||||
paste0("(", ., ")")
|
||||
|
||||
antibiotics$loinc <- as.list(rep(NA_character_, nrow(antibiotics)))
|
||||
for (i in seq_len(nrow(antibiotics))) {
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -34,10 +34,10 @@
|
||||
#
|
||||
# All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes.
|
||||
#
|
||||
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
|
||||
# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
|
||||
# copy of the software and associated documentation files (the "Software"), to deal in the Software
|
||||
# without restriction, including without limitation the rights to use, copy, modify, merge, publish,
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
# is furnished to do so', given that a copyright notice is given in the software.
|
||||
#
|
||||
# Copyright notice on {date}, the day this code was downloaded, as found on
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -31,11 +31,10 @@ library(AMR)
|
||||
|
||||
# USE THIS FUNCTION TO READ THE EUCAST EXCEL FILE THAT CONTAINS THE BREAKPOINT TABLES
|
||||
|
||||
read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
|
||||
read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
message("\nGetting sheet: ", sheet)
|
||||
sheet.bak <- sheet
|
||||
|
||||
|
||||
uncertainties <- NULL
|
||||
add_uncertainties <- function(old, new) {
|
||||
if (is.null(old)) {
|
||||
@ -44,55 +43,64 @@ read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
bind_rows(old, new)
|
||||
}
|
||||
}
|
||||
|
||||
raw_data <- read.xlsx(xlsxFile = file,
|
||||
sheet = sheet,
|
||||
colNames = FALSE,
|
||||
skipEmptyRows = FALSE,
|
||||
skipEmptyCols = FALSE,
|
||||
fillMergedCells = TRUE,
|
||||
na.strings = c("", "-", "NA", "IE", "IP"))
|
||||
probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~sum(!is.na(.))) %>% unlist() %>% max())
|
||||
|
||||
raw_data <- read.xlsx(
|
||||
xlsxFile = file,
|
||||
sheet = sheet,
|
||||
colNames = FALSE,
|
||||
skipEmptyRows = FALSE,
|
||||
skipEmptyCols = FALSE,
|
||||
fillMergedCells = TRUE,
|
||||
na.strings = c("", "-", "NA", "IE", "IP")
|
||||
)
|
||||
probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~ sum(!is.na(.))) %>% unlist() %>% max())
|
||||
if (probable_rows == 0) {
|
||||
message("NO ROWS FOUND")
|
||||
message("------------------------")
|
||||
return(NULL)
|
||||
}
|
||||
|
||||
|
||||
# in the info header in the Excel file, EUCAST mentions which genera are targeted
|
||||
if (sheet %like% "anaerob.*Gram.*posi") {
|
||||
sheet <- paste0(c("Actinomyces", "Bifidobacterium", "Clostridioides",
|
||||
"Clostridium", "Cutibacterium", "Eggerthella",
|
||||
"Eubacterium", "Lactobacillus", "Propionibacterium",
|
||||
"Staphylococcus saccharolyticus"),
|
||||
collapse = "_")
|
||||
sheet <- paste0(c(
|
||||
"Actinomyces", "Bifidobacterium", "Clostridioides",
|
||||
"Clostridium", "Cutibacterium", "Eggerthella",
|
||||
"Eubacterium", "Lactobacillus", "Propionibacterium",
|
||||
"Staphylococcus saccharolyticus"
|
||||
),
|
||||
collapse = "_"
|
||||
)
|
||||
} else if (sheet %like% "anaerob.*Gram.*nega") {
|
||||
sheet <- paste0(c("Bacteroides",
|
||||
"Bilophila",
|
||||
"Fusobacterium",
|
||||
"Mobiluncus",
|
||||
"Parabacteroides",
|
||||
"Porphyromonas",
|
||||
"Prevotella"),
|
||||
collapse = "_")
|
||||
sheet <- paste0(c(
|
||||
"Bacteroides",
|
||||
"Bilophila",
|
||||
"Fusobacterium",
|
||||
"Mobiluncus",
|
||||
"Parabacteroides",
|
||||
"Porphyromonas",
|
||||
"Prevotella"
|
||||
),
|
||||
collapse = "_"
|
||||
)
|
||||
} else if (sheet == "Streptococcus A,B,C,G") {
|
||||
sheet <- paste0(microorganisms %>%
|
||||
filter(genus == "Streptococcus") %>%
|
||||
mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
|
||||
filter(lancefield %like% "^Streptococcus group") %>%
|
||||
pull(fullname),
|
||||
collapse = "_")
|
||||
filter(genus == "Streptococcus") %>%
|
||||
mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
|
||||
filter(lancefield %like% "^Streptococcus group") %>%
|
||||
pull(fullname),
|
||||
collapse = "_"
|
||||
)
|
||||
} else if (sheet %like% "PK.*PD") {
|
||||
sheet <- "UNKNOWN"
|
||||
}
|
||||
mo_sheet <- paste0(suppressMessages(as.mo(unlist(strsplit(sheet, "_")))), collapse = "|")
|
||||
if (!is.null(mo_uncertainties())) uncertainties <- add_uncertainties(uncertainties, mo_uncertainties())
|
||||
|
||||
|
||||
set_columns_names <- function(x, cols) {
|
||||
colnames(x) <- cols[1:length(colnames(x))]
|
||||
x
|
||||
}
|
||||
|
||||
|
||||
get_mo <- function(x) {
|
||||
for (i in seq_len(length(x))) {
|
||||
y <- trimws(unlist(strsplit(x[i], "(,|and)")))
|
||||
@ -104,76 +112,91 @@ read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
MICs_with_trailing_superscript <- c(seq(from = 0.0011, to = 0.0019, by = 0.0001),
|
||||
seq(from = 0.031, to = 0.039, by = 0.001),
|
||||
seq(from = 0.061, to = 0.069, by = 0.001),
|
||||
seq(from = 0.1251, to = 0.1259, by = 0.0001),
|
||||
seq(from = 0.251, to = 0.259, by = 0.001),
|
||||
seq(from = 0.51, to = 0.59, by = 0.01),
|
||||
seq(from = 11, to = 19, by = 1),
|
||||
seq(from = 161, to = 169, by = 01),
|
||||
seq(from = 21, to = 29, by = 1),
|
||||
seq(from = 321, to = 329, by = 1),
|
||||
seq(from = 41, to = 49, by = 1),
|
||||
seq(from = 81, to = 89, by = 1))
|
||||
|
||||
MICs_with_trailing_superscript <- c(
|
||||
seq(from = 0.0011, to = 0.0019, by = 0.0001),
|
||||
seq(from = 0.031, to = 0.039, by = 0.001),
|
||||
seq(from = 0.061, to = 0.069, by = 0.001),
|
||||
seq(from = 0.1251, to = 0.1259, by = 0.0001),
|
||||
seq(from = 0.251, to = 0.259, by = 0.001),
|
||||
seq(from = 0.51, to = 0.59, by = 0.01),
|
||||
seq(from = 11, to = 19, by = 1),
|
||||
seq(from = 161, to = 169, by = 01),
|
||||
seq(from = 21, to = 29, by = 1),
|
||||
seq(from = 321, to = 329, by = 1),
|
||||
seq(from = 41, to = 49, by = 1),
|
||||
seq(from = 81, to = 89, by = 1)
|
||||
)
|
||||
has_superscript <- function(x) {
|
||||
# because due to floating point error, 0.1252 is not in:
|
||||
# because due to floating point error, 0.1252 is not in:
|
||||
# seq(from = 0.1251, to = 0.1259, by = 0.0001)
|
||||
sapply(x, function(x) any(near(x, MICs_with_trailing_superscript)))
|
||||
}
|
||||
|
||||
|
||||
has_zone_diameters <- rep(any(unlist(raw_data) %like% "zone diameter"), nrow(raw_data))
|
||||
|
||||
cleaned <- raw_data %>%
|
||||
as_tibble() %>%
|
||||
set_columns_names(LETTERS) %>%
|
||||
transmute(drug = A,
|
||||
MIC_S = B,
|
||||
MIC_R = C,
|
||||
disk_dose = ifelse(has_zone_diameters, E, NA_character_),
|
||||
disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
|
||||
disk_R = ifelse(has_zone_diameters, G, NA_character_)) %>%
|
||||
filter(!is.na(drug),
|
||||
!(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
|
||||
MIC_S %unlike% "(MIC|S ≤|note)",
|
||||
MIC_S %unlike% "^[-]",
|
||||
drug != MIC_S,) %>%
|
||||
mutate(administration = case_when(drug %like% "[( ]oral" ~ "oral",
|
||||
drug %like% "[( ]iv" ~ "iv",
|
||||
TRUE ~ NA_character_),
|
||||
uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
|
||||
systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
|
||||
mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)) %>%
|
||||
cleaned <- raw_data %>%
|
||||
as_tibble() %>%
|
||||
set_columns_names(LETTERS) %>%
|
||||
transmute(
|
||||
drug = A,
|
||||
MIC_S = B,
|
||||
MIC_R = C,
|
||||
disk_dose = ifelse(has_zone_diameters, E, NA_character_),
|
||||
disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
|
||||
disk_R = ifelse(has_zone_diameters, G, NA_character_)
|
||||
) %>%
|
||||
filter(
|
||||
!is.na(drug),
|
||||
!(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
|
||||
MIC_S %unlike% "(MIC|S ≤|note)",
|
||||
MIC_S %unlike% "^[-]",
|
||||
drug != MIC_S,
|
||||
) %>%
|
||||
mutate(
|
||||
administration = case_when(
|
||||
drug %like% "[( ]oral" ~ "oral",
|
||||
drug %like% "[( ]iv" ~ "iv",
|
||||
TRUE ~ NA_character_
|
||||
),
|
||||
uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
|
||||
systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
|
||||
mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)
|
||||
) %>%
|
||||
# clean disk doses
|
||||
mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>%
|
||||
mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>%
|
||||
# clean MIC and disk values
|
||||
mutate(MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
|
||||
MIC_R = gsub(".,.", "", MIC_R),
|
||||
disk_S = gsub(".,.", "", disk_S),
|
||||
disk_R = gsub(".,.", "", disk_R),
|
||||
MIC_S = clean_double(MIC_S), # make them valid numeric values
|
||||
MIC_R = clean_double(MIC_R),
|
||||
disk_S = clean_integer(disk_S),
|
||||
disk_R = clean_integer(disk_R),
|
||||
# invalid MIC values have a superscript text, delete those
|
||||
MIC_S = ifelse(has_superscript(MIC_S),
|
||||
substr(MIC_S, 1, nchar(MIC_S) - 1),
|
||||
MIC_S),
|
||||
MIC_R = ifelse(has_superscript(MIC_R),
|
||||
substr(MIC_R, 1, nchar(MIC_R) - 1),
|
||||
MIC_R),
|
||||
# and some are just awful
|
||||
MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
|
||||
MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
|
||||
) %>%
|
||||
mutate(
|
||||
MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
|
||||
MIC_R = gsub(".,.", "", MIC_R),
|
||||
disk_S = gsub(".,.", "", disk_S),
|
||||
disk_R = gsub(".,.", "", disk_R),
|
||||
MIC_S = clean_double(MIC_S), # make them valid numeric values
|
||||
MIC_R = clean_double(MIC_R),
|
||||
disk_S = clean_integer(disk_S),
|
||||
disk_R = clean_integer(disk_R),
|
||||
# invalid MIC values have a superscript text, delete those
|
||||
MIC_S = ifelse(has_superscript(MIC_S),
|
||||
substr(MIC_S, 1, nchar(MIC_S) - 1),
|
||||
MIC_S
|
||||
),
|
||||
MIC_R = ifelse(has_superscript(MIC_R),
|
||||
substr(MIC_R, 1, nchar(MIC_R) - 1),
|
||||
MIC_R
|
||||
),
|
||||
# and some are just awful
|
||||
MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
|
||||
MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
|
||||
) %>%
|
||||
# clean drug names
|
||||
mutate(drug = gsub(" ?[(, ].*$", "", drug),
|
||||
drug = gsub("[1-9]+$", "", drug),
|
||||
ab = as.ab(drug)) %>%
|
||||
select(ab, mo, everything(), -drug) %>%
|
||||
mutate(
|
||||
drug = gsub(" ?[(, ].*$", "", drug),
|
||||
drug = gsub("[1-9]+$", "", drug),
|
||||
ab = as.ab(drug)
|
||||
) %>%
|
||||
select(ab, mo, everything(), -drug) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
|
||||
# new row for every different MO mentioned
|
||||
for (i in 1:nrow(cleaned)) {
|
||||
mo <- cleaned[i, "mo", drop = TRUE]
|
||||
@ -181,37 +204,44 @@ read_EUCAST <- function(sheet, file, guideline_name) {
|
||||
mo_vect <- unlist(strsplit(mo, "|", fixed = TRUE))
|
||||
cleaned[i, "mo"] <- mo_vect[1]
|
||||
for (j in seq_len(length(mo_vect))) {
|
||||
cleaned <- bind_rows(cleaned, cleaned[i , , drop = FALSE])
|
||||
cleaned <- bind_rows(cleaned, cleaned[i, , drop = FALSE])
|
||||
cleaned[nrow(cleaned), "mo"] <- mo_vect[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleaned <- cleaned %>%
|
||||
distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>%
|
||||
arrange(ab, mo) %>%
|
||||
|
||||
cleaned <- cleaned %>%
|
||||
distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>%
|
||||
arrange(ab, mo) %>%
|
||||
mutate_at(c("MIC_S", "MIC_R", "disk_S", "disk_R"), as.double) %>%
|
||||
pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>%
|
||||
mutate(method = ifelse(type %like% "MIC", "MIC", "DISK"),
|
||||
type = gsub("^.*_", "breakpoint_", type)) %>%
|
||||
pivot_wider(names_from = type, values_from = value) %>%
|
||||
mutate(guideline = guideline_name,
|
||||
disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
|
||||
mo = ifelse(mo == "", mo_sheet, mo)) %>%
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>%
|
||||
pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>%
|
||||
mutate(
|
||||
method = ifelse(type %like% "MIC", "MIC", "DISK"),
|
||||
type = gsub("^.*_", "breakpoint_", type)
|
||||
) %>%
|
||||
pivot_wider(names_from = type, values_from = value) %>%
|
||||
mutate(
|
||||
guideline = guideline_name,
|
||||
disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
|
||||
mo = ifelse(mo == "", mo_sheet, mo)
|
||||
) %>%
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>%
|
||||
# comply with rsi_translation for now
|
||||
transmute(guideline,
|
||||
method,
|
||||
site = case_when(uti ~ "UTI",
|
||||
systemic ~ "Systemic",
|
||||
TRUE ~ administration),
|
||||
mo, ab,
|
||||
ref_tbl = sheet.bak,
|
||||
disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
|
||||
breakpoint_S,
|
||||
breakpoint_R) %>%
|
||||
method,
|
||||
site = case_when(
|
||||
uti ~ "UTI",
|
||||
systemic ~ "Systemic",
|
||||
TRUE ~ administration
|
||||
),
|
||||
mo, ab,
|
||||
ref_tbl = sheet.bak,
|
||||
disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
|
||||
breakpoint_S,
|
||||
breakpoint_R
|
||||
) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
|
||||
if (!is.null(uncertainties)) {
|
||||
print(uncertainties %>% distinct(input, mo, .keep_all = TRUE))
|
||||
}
|
||||
@ -231,24 +261,33 @@ guideline_name <- "EUCAST 2021"
|
||||
sheets_to_analyse <- sheets[!sheets %in% c("Content", "Changes", "Notes", "Guidance", "Dosages", "Technical uncertainty", "Topical agents")]
|
||||
|
||||
# takes the longest time:
|
||||
new_EUCAST <- read_EUCAST(sheet = sheets_to_analyse[1],
|
||||
file = file,
|
||||
guideline_name = guideline_name)
|
||||
new_EUCAST <- read_EUCAST(
|
||||
sheet = sheets_to_analyse[1],
|
||||
file = file,
|
||||
guideline_name = guideline_name
|
||||
)
|
||||
for (i in 2:length(sheets_to_analyse)) {
|
||||
tryCatch(
|
||||
new_EUCAST <<- bind_rows(new_EUCAST,
|
||||
read_EUCAST(sheet = sheets_to_analyse[i],
|
||||
file = file,
|
||||
guideline_name = guideline_name))
|
||||
, error = function(e) message(e$message))
|
||||
new_EUCAST <<- bind_rows(
|
||||
new_EUCAST,
|
||||
read_EUCAST(
|
||||
sheet = sheets_to_analyse[i],
|
||||
file = file,
|
||||
guideline_name = guideline_name
|
||||
)
|
||||
),
|
||||
error = function(e) message(e$message)
|
||||
)
|
||||
}
|
||||
|
||||
# 2021-07-12 fix for Morganellaceae (check other lines too next time)
|
||||
morg <- rsi_translation %>%
|
||||
as_tibble() %>%
|
||||
filter(ab == "IPM",
|
||||
guideline == "EUCAST 2021",
|
||||
mo == as.mo("Enterobacterales")) %>%
|
||||
filter(
|
||||
ab == "IPM",
|
||||
guideline == "EUCAST 2021",
|
||||
mo == as.mo("Enterobacterales")
|
||||
) %>%
|
||||
mutate(mo = as.mo("Morganellaceae"))
|
||||
morg[which(morg$method == "MIC"), "breakpoint_S"] <- 0.001
|
||||
morg[which(morg$method == "MIC"), "breakpoint_R"] <- 4
|
||||
@ -258,5 +297,5 @@ morg[which(morg$method == "DISK"), "breakpoint_R"] <- 19
|
||||
rsi_translation <- rsi_translation %>%
|
||||
bind_rows(morg) %>%
|
||||
bind_rows(morg %>%
|
||||
mutate(guideline = "EUCAST 2020")) %>%
|
||||
mutate(guideline = "EUCAST 2020")) %>%
|
||||
arrange(desc(guideline), ab, mo, method)
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -33,34 +33,43 @@ library(dplyr)
|
||||
library(readxl)
|
||||
DRGLST <- read_excel("DRGLST.xlsx")
|
||||
abx <- DRGLST %>%
|
||||
select(ab = WHON5_CODE,
|
||||
name = ANTIBIOTIC) %>%
|
||||
select(
|
||||
ab = WHON5_CODE,
|
||||
name = ANTIBIOTIC
|
||||
) %>%
|
||||
# remove the ones without WHONET code
|
||||
filter(!is.na(ab)) %>%
|
||||
distinct(name, .keep_all = TRUE) %>%
|
||||
# add the ones without WHONET code
|
||||
bind_rows(
|
||||
DRGLST %>%
|
||||
select(ab = WHON5_CODE,
|
||||
name = ANTIBIOTIC) %>%
|
||||
select(
|
||||
ab = WHON5_CODE,
|
||||
name = ANTIBIOTIC
|
||||
) %>%
|
||||
filter(is.na(ab)) %>%
|
||||
distinct(name, .keep_all = TRUE)
|
||||
# add new ab code later
|
||||
# add new ab code later
|
||||
) %>%
|
||||
arrange(name)
|
||||
|
||||
# add old ATC codes
|
||||
ab_old <- AMR::antibiotics %>%
|
||||
mutate(official = gsub("( and |, )", "/", official),
|
||||
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
|
||||
ifelse(is.na(certe), "", certe),
|
||||
ifelse(is.na(umcg), "", umcg),
|
||||
sep = "|")))
|
||||
mutate(
|
||||
official = gsub("( and |, )", "/", official),
|
||||
abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
|
||||
ifelse(is.na(certe), "", certe),
|
||||
ifelse(is.na(umcg), "", umcg),
|
||||
sep = "|"
|
||||
))
|
||||
)
|
||||
for (i in 1:nrow(ab_old)) {
|
||||
abbr <- ab_old[i, "abbr"]
|
||||
abbr <- strsplit(abbr, "|", fixed = TRUE) %>% unlist() %>% unique()
|
||||
abbr <- strsplit(abbr, "|", fixed = TRUE) %>%
|
||||
unlist() %>%
|
||||
unique()
|
||||
abbr <- abbr[abbr != ""]
|
||||
#print(abbr)
|
||||
# print(abbr)
|
||||
if (length(abbr) == 0) {
|
||||
ab_old[i, "abbr"] <- NA_character_
|
||||
} else {
|
||||
@ -72,50 +81,54 @@ for (i in 1:nrow(ab_old)) {
|
||||
abx_atc1 <- abx %>%
|
||||
mutate(name_lower = tolower(name)) %>%
|
||||
left_join(ab_old %>%
|
||||
select(ears_net, atc), by = c(ab = "ears_net")) %>%
|
||||
select(ears_net, atc), by = c(ab = "ears_net")) %>%
|
||||
rename(atc1 = atc) %>%
|
||||
left_join(ab_old %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
rename(atc2 = atc) %>%
|
||||
left_join(ab_old %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
rename(atc3 = atc) %>%
|
||||
left_join(ab_old %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
rename(atc4 = atc) %>%
|
||||
left_join(ab_old %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
rename(atc5 = atc) %>%
|
||||
left_join(ab_old %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
mutate(official = gsub("ine$", "in", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
|
||||
mutate(official = gsub("f", "ph", official)) %>%
|
||||
mutate(official = gsub("t", "th", official)) %>%
|
||||
mutate(official = gsub("ine$", "in", official)) %>%
|
||||
transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
|
||||
rename(atc6 = atc) %>%
|
||||
mutate(atc = case_when(!is.na(atc1) ~ atc1,
|
||||
!is.na(atc2) ~ atc2,
|
||||
!is.na(atc3) ~ atc3,
|
||||
!is.na(atc4) ~ atc4,
|
||||
!is.na(atc4) ~ atc5,
|
||||
TRUE ~ atc6)) %>%
|
||||
mutate(atc = case_when(
|
||||
!is.na(atc1) ~ atc1,
|
||||
!is.na(atc2) ~ atc2,
|
||||
!is.na(atc3) ~ atc3,
|
||||
!is.na(atc4) ~ atc4,
|
||||
!is.na(atc4) ~ atc5,
|
||||
TRUE ~ atc6
|
||||
)) %>%
|
||||
distinct(ab, name, .keep_all = TRUE) %>%
|
||||
select(ab, atc, name)
|
||||
|
||||
abx_atc2 <- ab_old %>%
|
||||
filter(!atc %in% abx_atc1$atc,
|
||||
is.na(ears_net),
|
||||
!is.na(atc_group1),
|
||||
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
|
||||
official %unlike% "(combinations| with )") %>%
|
||||
filter(
|
||||
!atc %in% abx_atc1$atc,
|
||||
is.na(ears_net),
|
||||
!is.na(atc_group1),
|
||||
atc_group1 %unlike% ("virus|vaccin|viral|immun"),
|
||||
official %unlike% "(combinations| with )"
|
||||
) %>%
|
||||
mutate(ab = NA_character_) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE) %>%
|
||||
select(ab, atc, name = official)
|
||||
@ -125,12 +138,15 @@ abx2 <- bind_rows(abx_atc1, abx_atc2)
|
||||
rm(abx_atc1)
|
||||
rm(abx_atc2)
|
||||
|
||||
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub("[/0-9-]",
|
||||
" ",
|
||||
abx2$name[is.na(abx2$ab)]),
|
||||
minlength = 3,
|
||||
method = "left.kept",
|
||||
strict = TRUE))
|
||||
abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub(
|
||||
"[/0-9-]",
|
||||
" ",
|
||||
abx2$name[is.na(abx2$ab)]
|
||||
),
|
||||
minlength = 3,
|
||||
method = "left.kept",
|
||||
strict = TRUE
|
||||
))
|
||||
|
||||
n_distinct(abx2$ab)
|
||||
|
||||
@ -150,7 +166,9 @@ for (i in 2:nrow(abx2)) {
|
||||
abx2[i, "ab"] <- paste0(abx2[i, "ab", drop = TRUE], abx2[i, "seqnr", drop = TRUE])
|
||||
}
|
||||
}
|
||||
abx2 <- abx2 %>% select(-seqnr) %>% arrange(name)
|
||||
abx2 <- abx2 %>%
|
||||
select(-seqnr) %>%
|
||||
arrange(name)
|
||||
|
||||
# everything unique??
|
||||
nrow(abx2) == n_distinct(abx2$ab)
|
||||
@ -158,8 +176,10 @@ nrow(abx2) == n_distinct(abx2$ab)
|
||||
# get ATC properties
|
||||
abx2 <- abx2 %>%
|
||||
left_join(ab_old %>%
|
||||
select(atc, abbr, atc_group1, atc_group2,
|
||||
oral_ddd, oral_units, iv_ddd, iv_units))
|
||||
select(
|
||||
atc, abbr, atc_group1, atc_group2,
|
||||
oral_ddd, oral_units, iv_ddd, iv_units
|
||||
))
|
||||
|
||||
abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixed = TRUE)))
|
||||
|
||||
@ -171,29 +191,41 @@ get_CID <- function(ab) {
|
||||
p$tick()$print()
|
||||
|
||||
CID[i] <- tryCatch(
|
||||
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(ab[i], reserved = TRUE),
|
||||
"/cids/TXT?name_type=complete"),
|
||||
showProgress = FALSE)[[1]][1],
|
||||
error = function(e) NA_integer_)
|
||||
data.table::fread(paste0(
|
||||
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(ab[i], reserved = TRUE),
|
||||
"/cids/TXT?name_type=complete"
|
||||
),
|
||||
showProgress = FALSE
|
||||
)[[1]][1],
|
||||
error = function(e) NA_integer_
|
||||
)
|
||||
if (is.na(CID[i])) {
|
||||
# try with removing the text in brackets
|
||||
CID[i] <- tryCatch(
|
||||
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
|
||||
"/cids/TXT?name_type=complete"),
|
||||
showProgress = FALSE)[[1]][1],
|
||||
error = function(e) NA_integer_)
|
||||
data.table::fread(paste0(
|
||||
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
|
||||
"/cids/TXT?name_type=complete"
|
||||
),
|
||||
showProgress = FALSE
|
||||
)[[1]][1],
|
||||
error = function(e) NA_integer_
|
||||
)
|
||||
}
|
||||
if (is.na(CID[i])) {
|
||||
# try match on word and take the lowest CID value (sorted)
|
||||
ab[i] <- gsub("[^a-z0-9]+", " ", ab[i], ignore.case = TRUE)
|
||||
CID[i] <- tryCatch(
|
||||
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(ab[i], reserved = TRUE),
|
||||
"/cids/TXT?name_type=word"),
|
||||
showProgress = FALSE)[[1]][1],
|
||||
error = function(e) NA_integer_)
|
||||
data.table::fread(paste0(
|
||||
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
|
||||
URLencode(ab[i], reserved = TRUE),
|
||||
"/cids/TXT?name_type=word"
|
||||
),
|
||||
showProgress = FALSE
|
||||
)[[1]][1],
|
||||
error = function(e) NA_integer_
|
||||
)
|
||||
}
|
||||
Sys.sleep(0.1)
|
||||
}
|
||||
@ -203,15 +235,15 @@ get_CID <- function(ab) {
|
||||
# get CIDs (2-3 min)
|
||||
CIDs <- get_CID(abx2$name)
|
||||
# These could not be found:
|
||||
abx2[is.na(CIDs),] %>% View()
|
||||
abx2[is.na(CIDs), ] %>% View()
|
||||
|
||||
# returns list with synonyms (brand names), with CIDs as names
|
||||
get_synonyms <- function(CID, clean = TRUE) {
|
||||
synonyms <- rep(NA_character_, length(CID))
|
||||
#p <- progress_ticker(n = length(CID), min_time = 0)
|
||||
# p <- progress_ticker(n = length(CID), min_time = 0)
|
||||
|
||||
for (i in 1:length(CID)) {
|
||||
#p$tick()$print()
|
||||
# p$tick()$print()
|
||||
|
||||
synonyms_txt <- ""
|
||||
|
||||
@ -220,27 +252,37 @@ get_synonyms <- function(CID, clean = TRUE) {
|
||||
}
|
||||
|
||||
synonyms_txt <- tryCatch(
|
||||
data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
|
||||
CID[i],
|
||||
"/synonyms/TXT"),
|
||||
sep = "\n",
|
||||
showProgress = FALSE)[[1]],
|
||||
error = function(e) NA_character_)
|
||||
data.table::fread(paste0(
|
||||
"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
|
||||
CID[i],
|
||||
"/synonyms/TXT"
|
||||
),
|
||||
sep = "\n",
|
||||
showProgress = FALSE
|
||||
)[[1]],
|
||||
error = function(e) NA_character_
|
||||
)
|
||||
|
||||
Sys.sleep(0.1)
|
||||
|
||||
if (clean == TRUE) {
|
||||
# remove text between brackets
|
||||
synonyms_txt <- trimws(gsub("[(].*[)]", "",
|
||||
gsub("[[].*[]]", "",
|
||||
gsub("[(].*[]]", "",
|
||||
gsub("[[].*[)]", "", synonyms_txt)))))
|
||||
synonyms_txt <- trimws(gsub(
|
||||
"[(].*[)]", "",
|
||||
gsub(
|
||||
"[[].*[]]", "",
|
||||
gsub(
|
||||
"[(].*[]]", "",
|
||||
gsub("[[].*[)]", "", synonyms_txt)
|
||||
)
|
||||
)
|
||||
))
|
||||
synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
|
||||
# only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
|
||||
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)
|
||||
& !grepl("[-&{},_0-9/]", synonyms_txt)
|
||||
& grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
|
||||
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
|
||||
synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) &
|
||||
!grepl("[-&{},_0-9/]", synonyms_txt) &
|
||||
grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
|
||||
synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
|
||||
}
|
||||
synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]))
|
||||
synonyms[i] <- list(sort(synonyms_txt))
|
||||
@ -251,52 +293,56 @@ get_synonyms <- function(CID, clean = TRUE) {
|
||||
|
||||
# get brand names from PubChem (2-3 min)
|
||||
synonyms <- get_synonyms(CIDs)
|
||||
synonyms <- lapply(synonyms,
|
||||
function(x) {
|
||||
if (length(x) == 0 | all(is.na(x))) {
|
||||
""
|
||||
} else {
|
||||
x
|
||||
}})
|
||||
synonyms <- lapply(
|
||||
synonyms,
|
||||
function(x) {
|
||||
if (length(x) == 0 | all(is.na(x))) {
|
||||
""
|
||||
} else {
|
||||
x
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# add them to data set
|
||||
antibiotics <- abx2 %>%
|
||||
left_join(DRGLST %>%
|
||||
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
|
||||
distinct(ab, .keep_all = TRUE), by = "ab") %>%
|
||||
select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
|
||||
distinct(ab, .keep_all = TRUE), by = "ab") %>%
|
||||
transmute(ab,
|
||||
atc,
|
||||
cid = CIDs,
|
||||
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
|
||||
name = name %>%
|
||||
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
|
||||
gsub("edta", "EDTA", ., ignore.case = TRUE),
|
||||
group = case_when(
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
|
||||
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
|
||||
TRUE ~ "Other antibacterials"
|
||||
),
|
||||
atc_group1, atc_group2,
|
||||
abbreviations = unname(abbr),
|
||||
synonyms = unname(synonyms),
|
||||
oral_ddd, oral_units,
|
||||
iv_ddd, iv_units) %>%
|
||||
atc,
|
||||
cid = CIDs,
|
||||
# no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
|
||||
name = name %>%
|
||||
gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
|
||||
gsub("edta", "EDTA", ., ignore.case = TRUE),
|
||||
group = case_when(
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
|
||||
name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
|
||||
paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
|
||||
TRUE ~ "Other antibacterials"
|
||||
),
|
||||
atc_group1, atc_group2,
|
||||
abbreviations = unname(abbr),
|
||||
synonyms = unname(synonyms),
|
||||
oral_ddd, oral_units,
|
||||
iv_ddd, iv_units
|
||||
) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
# some exceptions
|
||||
@ -329,13 +375,15 @@ antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <-
|
||||
antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
|
||||
# Add cefoxitin screening
|
||||
class(antibiotics$ab) <- "character"
|
||||
antibiotics <- rbind(antibiotics,data.frame(ab = "FOX1", atc = NA, cid = NA,
|
||||
name = "Cefoxitin screening",
|
||||
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
|
||||
abbreviations = "cfsc", synonyms = NA,
|
||||
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
|
||||
loinc = NA,
|
||||
stringsAsFactors = FALSE))
|
||||
antibiotics <- rbind(antibiotics, data.frame(
|
||||
ab = "FOX1", atc = NA, cid = NA,
|
||||
name = "Cefoxitin screening",
|
||||
group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
|
||||
abbreviations = "cfsc", synonyms = NA,
|
||||
oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
|
||||
loinc = NA,
|
||||
stringsAsFactors = FALSE
|
||||
))
|
||||
# More GLIMS codes
|
||||
antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]], "amf"))
|
||||
antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
|
||||
@ -520,27 +568,33 @@ antibiotics[which(antibiotics$ab == "RFP"), "abbreviations"][[1]] <- list(sort(c
|
||||
antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]], "RET")))
|
||||
antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]], "TVN")))
|
||||
|
||||
antibiotics <- antibiotics %>%
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "GEH") %>%
|
||||
mutate(ab = "AMH",
|
||||
name = "Amphotericin B-high",
|
||||
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")))) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "GEH") %>%
|
||||
mutate(ab = "TOH",
|
||||
name = "Tobramycin-high",
|
||||
abbreviations = list(c("tohl", "tobra high", "tobramycin high")))) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "BUT") %>%
|
||||
mutate(ab = "CIX",
|
||||
atc = "D01AE14",
|
||||
name = "Ciclopirox",
|
||||
group = "Antifungals/antimycotics",
|
||||
atc_group1 = "Antifungals for topical use",
|
||||
atc_group2 = "Other antifungals for topical use",
|
||||
abbreviations = list(c("cipx"))))
|
||||
antibiotics <- antibiotics %>%
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "GEH") %>%
|
||||
mutate(
|
||||
ab = "AMH",
|
||||
name = "Amphotericin B-high",
|
||||
abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high"))
|
||||
)) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "GEH") %>%
|
||||
mutate(
|
||||
ab = "TOH",
|
||||
name = "Tobramycin-high",
|
||||
abbreviations = list(c("tohl", "tobra high", "tobramycin high"))
|
||||
)) %>%
|
||||
rbind(antibiotics %>%
|
||||
filter(ab == "BUT") %>%
|
||||
mutate(
|
||||
ab = "CIX",
|
||||
atc = "D01AE14",
|
||||
name = "Ciclopirox",
|
||||
group = "Antifungals/antimycotics",
|
||||
atc_group1 = "Antifungals for topical use",
|
||||
atc_group2 = "Other antifungals for topical use",
|
||||
abbreviations = list(c("cipx"))
|
||||
))
|
||||
antibiotics[which(antibiotics$ab == "SSS"), "name"] <- "Sulfonamide"
|
||||
# ESBL E-test codes:
|
||||
antibiotics[which(antibiotics$ab == "CCV"), "abbreviations"][[1]] <- list(c("xtzl"))
|
||||
@ -600,13 +654,13 @@ antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin"
|
||||
antibiotics[which(antibiotics$ab == "PEN"), "atc"] <- "J01CE01"
|
||||
|
||||
# WHONET cleanup
|
||||
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
|
||||
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
|
||||
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
|
||||
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
|
||||
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
|
||||
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
|
||||
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
|
||||
antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
|
||||
antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
|
||||
antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
|
||||
antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
|
||||
antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
|
||||
antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
|
||||
antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
|
||||
antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]], "Bicozamycin"))))
|
||||
antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]], "Cefcatacol"))))
|
||||
antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]], "Tuberactinomycin"))))
|
||||
@ -617,7 +671,7 @@ antibiotics[which(antibiotics$ab == "PMR"), "abbreviations"][[1]] <- list(sort(u
|
||||
|
||||
|
||||
# set cephalosporins groups for the ones that could not be determined automatically:
|
||||
antibiotics <- antibiotics %>%
|
||||
antibiotics <- antibiotics %>%
|
||||
mutate(group = case_when(
|
||||
name == "Cefcapene" ~ "Cephalosporins (3rd gen.)",
|
||||
name == "Cefcapene pivoxil" ~ "Cephalosporins (3rd gen.)",
|
||||
@ -650,21 +704,24 @@ antibiotics <- antibiotics %>%
|
||||
name == "Ceftolozane/enzyme inhibitor" ~ "Cephalosporins (5th gen.)",
|
||||
name == "Ceftolozane/tazobactam" ~ "Cephalosporins (5th gen.)",
|
||||
name == "Cefuroxime axetil" ~ "Cephalosporins (2nd gen.)",
|
||||
TRUE ~ group))
|
||||
TRUE ~ group
|
||||
))
|
||||
antibiotics[which(antibiotics$ab %in% c("CYC", "LNZ", "THA", "TZD")), "group"] <- "Oxazolidinones"
|
||||
|
||||
# add pretomanid
|
||||
antibiotics <- antibiotics %>%
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
bind_rows(antibiotics %>%
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
filter(ab == "SMF") %>%
|
||||
mutate(ab = "PMD",
|
||||
atc = "J04AK08",
|
||||
cid = 456199,
|
||||
name = "Pretomanid",
|
||||
abbreviations = list(""),
|
||||
oral_ddd = NA_real_))
|
||||
mutate(ab = as.character(ab)) %>%
|
||||
filter(ab == "SMF") %>%
|
||||
mutate(
|
||||
ab = "PMD",
|
||||
atc = "J04AK08",
|
||||
cid = 456199,
|
||||
name = "Pretomanid",
|
||||
abbreviations = list(""),
|
||||
oral_ddd = NA_real_
|
||||
))
|
||||
|
||||
|
||||
|
||||
@ -675,25 +732,24 @@ antibiotics <- antibiotics %>%
|
||||
updated_atc <- as.list(antibiotics$atc)
|
||||
|
||||
get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
|
||||
|
||||
ab_name <- gsub("/", " and ", tolower(ab_name), fixed = TRUE)
|
||||
|
||||
|
||||
# we will do a search on their website, which means:
|
||||
|
||||
|
||||
# go to the url
|
||||
atc_tbl <- read_html(url) %>%
|
||||
atc_tbl <- read_html(url) %>%
|
||||
# get all forms
|
||||
html_form() %>%
|
||||
# get the second form (the first form is a global website form)
|
||||
.[[2]] %>%
|
||||
.[[2]] %>%
|
||||
# set the name input box to our search parameter
|
||||
html_form_set(name = ab_name) %>%
|
||||
html_form_set(name = ab_name) %>%
|
||||
# hit Submit
|
||||
html_form_submit() %>%
|
||||
html_form_submit() %>%
|
||||
# read the resulting page
|
||||
read_html() %>%
|
||||
read_html() %>%
|
||||
# retrieve the table on it
|
||||
html_node("table") %>%
|
||||
html_node("table") %>%
|
||||
# transform it to an R data set
|
||||
html_table(header = FALSE)
|
||||
# and get the ATCs (first column) of only exact hits
|
||||
@ -702,9 +758,10 @@ get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
|
||||
|
||||
# this takes around 4 minutes (some are skipped and go faster)
|
||||
for (i in seq_len(nrow(antibiotics))) {
|
||||
message(percentage(i / nrow(antibiotics), digits = 1),
|
||||
" - Downloading ", antibiotics$name[i],
|
||||
appendLF = FALSE)
|
||||
message(percentage(i / nrow(antibiotics), digits = 1),
|
||||
" - Downloading ", antibiotics$name[i],
|
||||
appendLF = FALSE
|
||||
)
|
||||
atcs <- get_atcs(antibiotics$name[i])
|
||||
if (length(atcs) > 0) {
|
||||
updated_atc[[i]] <- atcs
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -28,8 +28,10 @@
|
||||
get_atc_table <- function(atc_group) {
|
||||
# give as input J0XXX, like atc_group = "J05AB"
|
||||
downloaded <- read_html(paste0("https://www.whocc.no/atc_ddd_index/?code=", atc_group, "&showdescription=no"))
|
||||
table_title <- downloaded %>% html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>% html_text()
|
||||
table_content <- downloaded %>%
|
||||
table_title <- downloaded %>%
|
||||
html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>%
|
||||
html_text()
|
||||
table_content <- downloaded %>%
|
||||
html_nodes("table") %>%
|
||||
html_table(header = TRUE) %>%
|
||||
# returns list, so make data.frame out of it
|
||||
@ -37,8 +39,8 @@ get_atc_table <- function(atc_group) {
|
||||
# select right columns
|
||||
select(atc = ATC.code, name = Name, ddd = DDD, unit = U, ddd_type = Adm.R) %>%
|
||||
# fill empty rows
|
||||
mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>%
|
||||
pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>%
|
||||
mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>%
|
||||
pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>%
|
||||
mutate(atc_group = table_title)
|
||||
if (!"ddd_O" %in% colnames(table_content)) {
|
||||
table_content <- table_content %>% mutate(ddd_O = NA_real_, unit_O = NA_character_)
|
||||
@ -46,9 +48,10 @@ get_atc_table <- function(atc_group) {
|
||||
if (!"ddd_P" %in% colnames(table_content)) {
|
||||
table_content <- table_content %>% mutate(ddd_P = NA_real_, unit_P = NA_character_)
|
||||
}
|
||||
table_content %>% select(atc, name, atc_group,
|
||||
oral_ddd = ddd_O, oral_units = unit_O,
|
||||
iv_ddd = ddd_P, iv_units = unit_P)
|
||||
table_content %>% select(atc, name, atc_group,
|
||||
oral_ddd = ddd_O, oral_units = unit_O,
|
||||
iv_ddd = ddd_P, iv_units = unit_P
|
||||
)
|
||||
}
|
||||
|
||||
# these are the relevant groups for input: https://www.whocc.no/atc_ddd_index/?code=J05A (J05 only contains J05A)
|
||||
@ -62,32 +65,38 @@ for (i in 2:length(atc_groups)) {
|
||||
}
|
||||
|
||||
# arrange on name, untibble it
|
||||
antivirals <- antivirals %>% arrange(name) %>% as.data.frame(stringsAsFactors = FALSE)
|
||||
antivirals <- antivirals %>%
|
||||
arrange(name) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
# add PubChem Compound ID (cid) and their trade names - functions are in file to create `antibiotics` data set
|
||||
CIDs <- get_CID(antivirals$name)
|
||||
# these could not be found:
|
||||
antivirals[is.na(CIDs),] %>% View()
|
||||
# get brand names from PubChem
|
||||
antivirals[is.na(CIDs), ] %>% View()
|
||||
# get brand names from PubChem
|
||||
synonyms <- get_synonyms(CIDs)
|
||||
synonyms <- lapply(synonyms,
|
||||
function(x) {
|
||||
if (length(x) == 0 | all(is.na(x))) {
|
||||
""
|
||||
} else {
|
||||
x
|
||||
}})
|
||||
synonyms <- lapply(
|
||||
synonyms,
|
||||
function(x) {
|
||||
if (length(x) == 0 | all(is.na(x))) {
|
||||
""
|
||||
} else {
|
||||
x
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
antivirals <- antivirals %>%
|
||||
transmute(atc,
|
||||
cid = CIDs,
|
||||
name,
|
||||
atc_group,
|
||||
synonyms = unname(synonyms),
|
||||
oral_ddd,
|
||||
oral_units,
|
||||
iv_ddd,
|
||||
iv_units)
|
||||
cid = CIDs,
|
||||
name,
|
||||
atc_group,
|
||||
synonyms = unname(synonyms),
|
||||
oral_ddd,
|
||||
oral_units,
|
||||
iv_ddd,
|
||||
iv_units
|
||||
)
|
||||
|
||||
# save it
|
||||
usethis::use_data(antivirals, overwrite = TRUE)
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -32,37 +32,45 @@ library(cleaner)
|
||||
# download the PDF file, open in Acrobat Pro and export as Excel workbook
|
||||
breakpoints_version <- 11
|
||||
|
||||
dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>%
|
||||
format_names(snake_case = TRUE, penicillins = "drug") %>%
|
||||
filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>%
|
||||
filter(!is.na(standard_dosage)) %>%
|
||||
dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>%
|
||||
format_names(snake_case = TRUE, penicillins = "drug") %>%
|
||||
filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>%
|
||||
filter(!is.na(standard_dosage)) %>%
|
||||
# keep only one drug in the table
|
||||
arrange(desc(drug)) %>%
|
||||
mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
|
||||
#distinct(drug, .keep_all = TRUE) %>%
|
||||
arrange(drug) %>%
|
||||
mutate(ab = as.ab(drug),
|
||||
ab_name = ab_name(ab, language = NULL))
|
||||
arrange(desc(drug)) %>%
|
||||
mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
|
||||
# distinct(drug, .keep_all = TRUE) %>%
|
||||
arrange(drug) %>%
|
||||
mutate(
|
||||
ab = as.ab(drug),
|
||||
ab_name = ab_name(ab, language = NULL)
|
||||
)
|
||||
|
||||
dosage_source <- bind_rows(
|
||||
# oral
|
||||
dosage_source %>%
|
||||
dosage_source %>%
|
||||
filter(standard_dosage %like% " oral") %>%
|
||||
mutate(standard_dosage = gsub("oral.*", "oral", standard_dosage),
|
||||
high_dosage = if_else(high_dosage %like% "oral",
|
||||
gsub("oral.*", "oral", high_dosage),
|
||||
NA_character_)),
|
||||
mutate(
|
||||
standard_dosage = gsub("oral.*", "oral", standard_dosage),
|
||||
high_dosage = if_else(high_dosage %like% "oral",
|
||||
gsub("oral.*", "oral", high_dosage),
|
||||
NA_character_
|
||||
)
|
||||
),
|
||||
# iv
|
||||
dosage_source %>%
|
||||
dosage_source %>%
|
||||
filter(standard_dosage %like% " iv") %>%
|
||||
mutate(standard_dosage = gsub(".* or ", "", standard_dosage),
|
||||
high_dosage = if_else(high_dosage %like% "( or | iv)",
|
||||
gsub(".* or ", "", high_dosage),
|
||||
NA_character_)),
|
||||
mutate(
|
||||
standard_dosage = gsub(".* or ", "", standard_dosage),
|
||||
high_dosage = if_else(high_dosage %like% "( or | iv)",
|
||||
gsub(".* or ", "", high_dosage),
|
||||
NA_character_
|
||||
)
|
||||
),
|
||||
# im
|
||||
dosage_source %>%
|
||||
dosage_source %>%
|
||||
filter(standard_dosage %like% " im")
|
||||
) %>%
|
||||
) %>%
|
||||
arrange(drug)
|
||||
|
||||
|
||||
@ -71,34 +79,36 @@ get_dosage_lst <- function(col_data) {
|
||||
# remove new lines
|
||||
gsub(" ?(\n|\t)+ ?", " ", .) %>%
|
||||
# keep only the first suggestion, replace all after 'or' and more informative texts
|
||||
gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
|
||||
gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
|
||||
# remove (1 MU)
|
||||
gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
|
||||
gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
|
||||
# remove parentheses
|
||||
gsub("[)(]", "", .) %>%
|
||||
gsub("[)(]", "", .) %>%
|
||||
# remove drug names
|
||||
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
||||
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
||||
gsub(" (acid|dose)", "", .)# %>%
|
||||
# keep lowest value only (25-30 mg -> 25 mg)
|
||||
# gsub("[-].*? ", " ", .)
|
||||
|
||||
dosage_lst <- lapply(strsplit(standard, " x "),
|
||||
function(x) {
|
||||
dose <- x[1]
|
||||
if (dose %like% "under") {
|
||||
dose <- NA_character_
|
||||
}
|
||||
admin <- x[2]
|
||||
|
||||
list(
|
||||
dose = trimws(dose),
|
||||
dose_times = gsub("^([0-9.]+).*", "\\1", admin),
|
||||
administration = clean_character(admin),
|
||||
notes = "",
|
||||
original_txt = ""
|
||||
)
|
||||
})
|
||||
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
||||
gsub(" [a-z]{5,99}( |$)", " ", .) %>%
|
||||
gsub(" (acid|dose)", "", .) # %>%
|
||||
# keep lowest value only (25-30 mg -> 25 mg)
|
||||
# gsub("[-].*? ", " ", .)
|
||||
|
||||
dosage_lst <- lapply(
|
||||
strsplit(standard, " x "),
|
||||
function(x) {
|
||||
dose <- x[1]
|
||||
if (dose %like% "under") {
|
||||
dose <- NA_character_
|
||||
}
|
||||
admin <- x[2]
|
||||
|
||||
list(
|
||||
dose = trimws(dose),
|
||||
dose_times = gsub("^([0-9.]+).*", "\\1", admin),
|
||||
administration = clean_character(admin),
|
||||
notes = "",
|
||||
original_txt = ""
|
||||
)
|
||||
}
|
||||
)
|
||||
for (i in seq_len(length(col_data))) {
|
||||
dosage_lst[[i]]$original_txt <- gsub("\n", " ", col_data[i])
|
||||
if (col_data[i] %like% " (or|with|loading|depending|over) ") {
|
||||
@ -147,12 +157,15 @@ dosage <- bind_rows(
|
||||
notes = sapply(uti, function(x) x$notes),
|
||||
original_txt = sapply(uti, function(x) x$original_txt),
|
||||
stringsAsFactors = FALSE
|
||||
)) %>%
|
||||
mutate(eucast_version = breakpoints_version,
|
||||
dose_times = as.integer(dose_times),
|
||||
administration = gsub("([a-z]+) .*", "\\1", administration)) %>%
|
||||
arrange(name, administration, type) %>%
|
||||
filter(!is.na(dose), dose != ".") %>%
|
||||
)
|
||||
) %>%
|
||||
mutate(
|
||||
eucast_version = breakpoints_version,
|
||||
dose_times = as.integer(dose_times),
|
||||
administration = gsub("([a-z]+) .*", "\\1", administration)
|
||||
) %>%
|
||||
arrange(name, administration, type) %>%
|
||||
filter(!is.na(dose), dose != ".") %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
rownames(dosage) <- NULL
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -25,74 +25,118 @@
|
||||
|
||||
patients <- unlist(lapply(LETTERS, paste0, 1:10))
|
||||
|
||||
patients_table <- data.frame(patient_id = patients,
|
||||
gender = c(rep("M", 135),
|
||||
rep("F", 125)))
|
||||
patients_table <- data.frame(
|
||||
patient_id = patients,
|
||||
gender = c(
|
||||
rep("M", 135),
|
||||
rep("F", 125)
|
||||
)
|
||||
)
|
||||
|
||||
dates <- seq(as.Date("2011-01-01"), as.Date("2020-01-01"), by = "day")
|
||||
|
||||
bacteria_a <- c("E. coli", "S. aureus",
|
||||
"S. pneumoniae", "K. pneumoniae")
|
||||
bacteria_a <- c(
|
||||
"E. coli", "S. aureus",
|
||||
"S. pneumoniae", "K. pneumoniae"
|
||||
)
|
||||
|
||||
bacteria_b <- c("esccol", "staaur", "strpne", "klepne")
|
||||
|
||||
bacteria_c <- c("Escherichia coli", "Staphylococcus aureus",
|
||||
"Streptococcus pneumoniae", "Klebsiella pneumoniae")
|
||||
bacteria_c <- c(
|
||||
"Escherichia coli", "Staphylococcus aureus",
|
||||
"Streptococcus pneumoniae", "Klebsiella pneumoniae"
|
||||
)
|
||||
|
||||
ab_interpretations <- c("S", "I", "R")
|
||||
|
||||
ab_interpretations_messy = c("R", "< 0.5 S", "I")
|
||||
ab_interpretations_messy <- c("R", "< 0.5 S", "I")
|
||||
|
||||
sample_size <- 1000
|
||||
|
||||
data_a <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "A",
|
||||
bacteria = sample(bacteria_a, size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)),
|
||||
AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)),
|
||||
AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)),
|
||||
CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)),
|
||||
GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)))
|
||||
data_a <- data.frame(
|
||||
date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "A",
|
||||
bacteria = sample(bacteria_a,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)
|
||||
),
|
||||
AMX = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)
|
||||
),
|
||||
AMC = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)
|
||||
),
|
||||
CIP = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)
|
||||
),
|
||||
GEN = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
data_b <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "B",
|
||||
bacteria = sample(bacteria_b, size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)),
|
||||
AMX = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)),
|
||||
AMC = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)),
|
||||
CIP = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)),
|
||||
GEN = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)))
|
||||
data_b <- data.frame(
|
||||
date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "B",
|
||||
bacteria = sample(bacteria_b,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)
|
||||
),
|
||||
AMX = sample(ab_interpretations_messy,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)
|
||||
),
|
||||
AMC = sample(ab_interpretations_messy,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)
|
||||
),
|
||||
CIP = sample(ab_interpretations_messy,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)
|
||||
),
|
||||
GEN = sample(ab_interpretations_messy,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)
|
||||
)
|
||||
)
|
||||
|
||||
data_c <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "C",
|
||||
bacteria = sample(bacteria_c, size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)),
|
||||
AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)),
|
||||
AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)),
|
||||
CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)),
|
||||
GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)))
|
||||
data_c <- data.frame(
|
||||
date = sample(dates, size = sample_size, replace = TRUE),
|
||||
hospital = "C",
|
||||
bacteria = sample(bacteria_c,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.50, 0.25, 0.15, 0.10)
|
||||
),
|
||||
AMX = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.60, 0.05, 0.35)
|
||||
),
|
||||
AMC = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.75, 0.10, 0.15)
|
||||
),
|
||||
CIP = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.80, 0.00, 0.20)
|
||||
),
|
||||
GEN = sample(ab_interpretations,
|
||||
size = sample_size, replace = TRUE,
|
||||
prob = c(0.92, 0.00, 0.08)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
example_isolates_unclean <- data_a %>%
|
||||
example_isolates_unclean <- data_a %>%
|
||||
bind_rows(data_b, data_c)
|
||||
|
||||
example_isolates_unclean$patient_id <- sample(patients, size = nrow(example_isolates_unclean), replace = TRUE)
|
||||
|
||||
example_isolates_unclean <- example_isolates_unclean %>%
|
||||
select(patient_id, hospital, date, bacteria, everything()) %>%
|
||||
example_isolates_unclean <- example_isolates_unclean %>%
|
||||
select(patient_id, hospital, date, bacteria, everything()) %>%
|
||||
dataset_UTF8_to_ASCII()
|
||||
|
||||
usethis::use_data(example_isolates_unclean, overwrite = TRUE, internal = FALSE, version = 2, compress = "xz")
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -32,19 +32,22 @@ for (i in seq_len(nrow(antibiotics))) {
|
||||
}
|
||||
|
||||
int_resis <- eucast_rules(int_resis,
|
||||
eucast_rules_df = subset(AMR:::EUCAST_RULES_DF,
|
||||
is.na(have_these_values) & reference.version == 3.3),
|
||||
info = FALSE)
|
||||
eucast_rules_df = subset(
|
||||
AMR:::EUCAST_RULES_DF,
|
||||
is.na(have_these_values) & reference.version == 3.3
|
||||
),
|
||||
info = FALSE
|
||||
)
|
||||
|
||||
int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>%
|
||||
int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>%
|
||||
tidyr::pivot_longer(-mo) %>%
|
||||
filter(value == "R") %>%
|
||||
filter(value == "R") %>%
|
||||
select(mo, ab = name)
|
||||
|
||||
# remove lab drugs
|
||||
untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE]
|
||||
int_resis2 <- int_resis2 %>%
|
||||
filter(!ab %in% untreatable) %>%
|
||||
int_resis2 <- int_resis2 %>%
|
||||
filter(!ab %in% untreatable) %>%
|
||||
arrange(mo, ab)
|
||||
|
||||
intrinsic_resistant <- as.data.frame(int_resis2, stringsAsFactors = FALSE)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -39,7 +39,7 @@ test_mo <- microorganisms$mo
|
||||
|
||||
get_author_year <- function(ref) {
|
||||
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
|
||||
|
||||
|
||||
authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
authors2 <- gsub(" ?\\(Approved Lists [0-9]+\\) ?", " () ", authors2)
|
||||
authors2 <- gsub(" [)(]+ $", "", authors2)
|
||||
@ -47,14 +47,16 @@ get_author_year <- function(ref) {
|
||||
authors2 <- trimws(gsub("^[(](.*)[)]$", "\\1", authors2))
|
||||
# only take part after brackets if there's a name
|
||||
authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
|
||||
gsub(".*[)] (.*)", "\\1", authors2),
|
||||
authors2)
|
||||
gsub(".*[)] (.*)", "\\1", authors2),
|
||||
authors2
|
||||
)
|
||||
# get year from last 4 digits
|
||||
lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
|
||||
lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
|
||||
# can never be later than now
|
||||
lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
|
||||
NA,
|
||||
lastyear)
|
||||
lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
|
||||
NA,
|
||||
lastyear
|
||||
)
|
||||
# get authors without last year
|
||||
authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
|
||||
# remove nonsense characters from names
|
||||
@ -72,18 +74,19 @@ get_author_year <- function(ref) {
|
||||
authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
|
||||
# combine author and year if year is available
|
||||
ref <- ifelse(!is.na(lastyear),
|
||||
paste0(authors, ", ", lastyear),
|
||||
authors)
|
||||
paste0(authors, ", ", lastyear),
|
||||
authors
|
||||
)
|
||||
# fix beginning and ending
|
||||
ref <- gsub(", $", "", ref)
|
||||
ref <- gsub("^, ", "", ref)
|
||||
ref <- gsub("^(emend|et al.,?)", "", ref)
|
||||
ref <- trimws(ref)
|
||||
ref <- gsub("'", "", ref)
|
||||
|
||||
|
||||
# a lot start with a lowercase character - fix that
|
||||
ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
|
||||
# specific one for the French that are named dOrbigny
|
||||
# specific one for the French that are named dOrbigny
|
||||
ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
|
||||
ref <- gsub(" +", " ", ref)
|
||||
ref
|
||||
@ -92,21 +95,23 @@ get_author_year <- function(ref) {
|
||||
df_remove_nonASCII <- function(df) {
|
||||
# Remove non-ASCII characters (these are not allowed by CRAN)
|
||||
df %>%
|
||||
mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
|
||||
mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
|
||||
# also remove invalid characters
|
||||
mutate_if(is.character, ~gsub("[\"'`]+", "", .)) %>%
|
||||
mutate_if(is.character, ~ gsub("[\"'`]+", "", .)) %>%
|
||||
AMR:::dataset_UTF8_to_ASCII()
|
||||
}
|
||||
|
||||
abbreviate_mo <- function(x, minlength = 5, prefix = "", ...) {
|
||||
# keep a starting Latin ae
|
||||
suppressWarnings(
|
||||
gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
|
||||
abbreviate(minlength = minlength,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides", ...) %>%
|
||||
paste0(prefix, .) %>%
|
||||
toupper() %>%
|
||||
gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
|
||||
abbreviate(
|
||||
minlength = minlength,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides", ...
|
||||
) %>%
|
||||
paste0(prefix, .) %>%
|
||||
toupper() %>%
|
||||
gsub("(\u00C6|\u00E6)+", "AE", .)
|
||||
)
|
||||
}
|
||||
@ -119,63 +124,74 @@ taxonomy <- read_csv(file_location)
|
||||
|
||||
new_synonyms <- taxonomy %>%
|
||||
left_join(taxonomy,
|
||||
by = c("record_lnk" = "record_no"),
|
||||
suffix = c("", ".new")) %>%
|
||||
filter(!is.na(record_lnk)) %>%
|
||||
mutate_all(~ifelse(is.na(.), "", .)) %>%
|
||||
transmute(fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
|
||||
fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
|
||||
ref = get_author_year(authors),
|
||||
prevalence = 0) %>%
|
||||
distinct(fullname, .keep_all = TRUE) %>%
|
||||
filter(fullname != fullname_new) %>%
|
||||
# this part joins this table to itself to correct for entries that had >1 renames,
|
||||
by = c("record_lnk" = "record_no"),
|
||||
suffix = c("", ".new")
|
||||
) %>%
|
||||
filter(!is.na(record_lnk)) %>%
|
||||
mutate_all(~ ifelse(is.na(.), "", .)) %>%
|
||||
transmute(
|
||||
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
|
||||
fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
|
||||
ref = get_author_year(authors),
|
||||
prevalence = 0
|
||||
) %>%
|
||||
distinct(fullname, .keep_all = TRUE) %>%
|
||||
filter(fullname != fullname_new) %>%
|
||||
# this part joins this table to itself to correct for entries that had >1 renames,
|
||||
# such as:
|
||||
# Bacteroides tectum -> Bacteroides tectus -> Bacteroides pyogenes
|
||||
left_join(., .,
|
||||
by = c("fullname_new" = "fullname"),
|
||||
suffix = c("", ".2")) %>%
|
||||
mutate(fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
|
||||
ref = ifelse(!is.na(ref.2), ref.2, ref)) %>%
|
||||
select(-ends_with(".2"))
|
||||
left_join(., .,
|
||||
by = c("fullname_new" = "fullname"),
|
||||
suffix = c("", ".2")
|
||||
) %>%
|
||||
mutate(
|
||||
fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
|
||||
ref = ifelse(!is.na(ref.2), ref.2, ref)
|
||||
) %>%
|
||||
select(-ends_with(".2"))
|
||||
|
||||
mo_became_synonym <- microorganisms %>%
|
||||
mo_became_synonym <- microorganisms %>%
|
||||
filter(fullname %in% new_synonyms$fullname)
|
||||
|
||||
updated_microorganisms <- taxonomy %>%
|
||||
filter(is.na(record_lnk)) %>%
|
||||
mutate_all(~ifelse(is.na(.), "", .)) %>%
|
||||
transmute(mo = "",
|
||||
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
|
||||
kingdom = "Bacteria",
|
||||
phylum = "",
|
||||
class = "",
|
||||
order = "",
|
||||
family = "",
|
||||
genus = trimws(genus_name),
|
||||
species = trimws(replace_na(sp_epithet, "")),
|
||||
subspecies = trimws(replace_na(subsp_epithet, "")),
|
||||
rank = case_when(subspecies == "" & species == "" ~ "genus",
|
||||
subspecies == "" ~ "species",
|
||||
TRUE ~ "subsp."),
|
||||
ref = get_author_year(authors),
|
||||
species_id = as.character(record_no),
|
||||
source = "LPSN",
|
||||
prevalence = 0,
|
||||
snomed = NA)
|
||||
filter(is.na(record_lnk)) %>%
|
||||
mutate_all(~ ifelse(is.na(.), "", .)) %>%
|
||||
transmute(
|
||||
mo = "",
|
||||
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
|
||||
kingdom = "Bacteria",
|
||||
phylum = "",
|
||||
class = "",
|
||||
order = "",
|
||||
family = "",
|
||||
genus = trimws(genus_name),
|
||||
species = trimws(replace_na(sp_epithet, "")),
|
||||
subspecies = trimws(replace_na(subsp_epithet, "")),
|
||||
rank = case_when(
|
||||
subspecies == "" & species == "" ~ "genus",
|
||||
subspecies == "" ~ "species",
|
||||
TRUE ~ "subsp."
|
||||
),
|
||||
ref = get_author_year(authors),
|
||||
species_id = as.character(record_no),
|
||||
source = "LPSN",
|
||||
prevalence = 0,
|
||||
snomed = NA
|
||||
)
|
||||
|
||||
new_microorganisms <- updated_microorganisms %>%
|
||||
new_microorganisms <- updated_microorganisms %>%
|
||||
filter(!fullname %in% microorganisms$fullname)
|
||||
|
||||
genera_with_mo_code <- updated_microorganisms %>%
|
||||
filter(genus %in% (microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% pull(genus))) %>%
|
||||
distinct(genus) %>%
|
||||
distinct(genus) %>%
|
||||
left_join(microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% select(mo, genus),
|
||||
by = "genus")
|
||||
|
||||
genera_without_mo_code <- updated_microorganisms %>%
|
||||
filter(!genus %in% genera_with_mo_code$genus) %>%
|
||||
pull(genus) %>%
|
||||
by = "genus"
|
||||
)
|
||||
|
||||
genera_without_mo_code <- updated_microorganisms %>%
|
||||
filter(!genus %in% genera_with_mo_code$genus) %>%
|
||||
pull(genus) %>%
|
||||
unique()
|
||||
|
||||
genera_without_mo_code_abbr <- genera_without_mo_code %>% abbreviate_mo(5, prefix = "B_")
|
||||
@ -184,65 +200,73 @@ genera_without_mo_code_abbr[genera_without_mo_code_abbr %in% microorganisms$mo]
|
||||
# all unique??
|
||||
sum(genera_without_mo_code_abbr %in% microorganisms$mo) == 0
|
||||
|
||||
genus_abb <- tibble(genus = genera_without_mo_code,
|
||||
abbr = genera_without_mo_code_abbr) %>%
|
||||
genus_abb <- tibble(
|
||||
genus = genera_without_mo_code,
|
||||
abbr = genera_without_mo_code_abbr
|
||||
) %>%
|
||||
bind_rows(microorganisms %>%
|
||||
filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
|
||||
transmute(genus, abbr = as.character(mo))) %>%
|
||||
filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
|
||||
transmute(genus, abbr = as.character(mo))) %>%
|
||||
arrange(genus)
|
||||
|
||||
|
||||
# Update taxonomy ---------------------------------------------------------
|
||||
|
||||
# fill in the taxonomy of new genera
|
||||
updated_taxonomy <- tibble(phylum = character(0),
|
||||
class = character(0),
|
||||
order = character(0),
|
||||
family = character(0),
|
||||
genus = character(0))
|
||||
updated_taxonomy <- tibble(
|
||||
phylum = character(0),
|
||||
class = character(0),
|
||||
order = character(0),
|
||||
family = character(0),
|
||||
genus = character(0)
|
||||
)
|
||||
for (page in LETTERS) {
|
||||
message("Downloading page ", page, "... ", appendLF = FALSE)
|
||||
url <- paste0("https://lpsn.dsmz.de/genus?page=", page)
|
||||
|
||||
x <- xml2::read_html(url) %>%
|
||||
rvest::html_node(".main-list") %>%
|
||||
|
||||
x <- xml2::read_html(url) %>%
|
||||
rvest::html_node(".main-list") %>%
|
||||
# evety list element with a set <id> attribute
|
||||
rvest::html_nodes("li[id]")
|
||||
for (i in seq_len(length(x))) {
|
||||
txt <- x %>%
|
||||
magrittr::extract2(i) %>%
|
||||
txt <- x %>%
|
||||
magrittr::extract2(i) %>%
|
||||
rvest::html_text() %>%
|
||||
gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>%
|
||||
gsub("Candidatus ", "", ., fixed = TRUE) %>%
|
||||
gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>%
|
||||
gsub("Candidatus ", "", ., fixed = TRUE) %>%
|
||||
gsub("[ \t\r\n\"]+", "|", .) %>%
|
||||
gsub("\\|ShowHide.*", "", .) %>%
|
||||
gsub("[\\[\\]]", "", ., fixed = TRUE) %>%
|
||||
gsub("^\\|", "", .) %>%
|
||||
strsplit("|", fixed = TRUE) %>%
|
||||
gsub("\\|ShowHide.*", "", .) %>%
|
||||
gsub("[\\[\\]]", "", ., fixed = TRUE) %>%
|
||||
gsub("^\\|", "", .) %>%
|
||||
strsplit("|", fixed = TRUE) %>%
|
||||
unlist()
|
||||
txt[txt == "NA"] <- ""
|
||||
txt <- gsub("[^A-Za-z]+", "", txt)
|
||||
updated_taxonomy <- updated_taxonomy %>%
|
||||
bind_rows(tibble(phylum = txt[2],
|
||||
class = txt[3],
|
||||
order = txt[4],
|
||||
family = txt[5],
|
||||
genus = txt[6]))
|
||||
updated_taxonomy <- updated_taxonomy %>%
|
||||
bind_rows(tibble(
|
||||
phylum = txt[2],
|
||||
class = txt[3],
|
||||
order = txt[4],
|
||||
family = txt[5],
|
||||
genus = txt[6]
|
||||
))
|
||||
}
|
||||
message(length(x), " entries (total ", nrow(updated_taxonomy), ")")
|
||||
}
|
||||
|
||||
# Create new microorganisms -----------------------------------------------
|
||||
|
||||
new_microorganisms <- new_microorganisms %>%
|
||||
left_join(genus_abb, by = "genus") %>%
|
||||
group_by(genus) %>%
|
||||
mutate(species_abb = abbreviate_mo(species, 4)) %>%
|
||||
group_by(genus, species) %>%
|
||||
mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>%
|
||||
ungroup() %>%
|
||||
mutate(mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
|
||||
mo = gsub("_+$", "", mo)) %>%
|
||||
new_microorganisms <- new_microorganisms %>%
|
||||
left_join(genus_abb, by = "genus") %>%
|
||||
group_by(genus) %>%
|
||||
mutate(species_abb = abbreviate_mo(species, 4)) %>%
|
||||
group_by(genus, species) %>%
|
||||
mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>%
|
||||
ungroup() %>%
|
||||
mutate(
|
||||
mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
|
||||
mo = gsub("_+$", "", mo)
|
||||
) %>%
|
||||
select(-matches("abb"))
|
||||
|
||||
# add taxonomy new microorganisms
|
||||
@ -256,128 +280,150 @@ MOs$mo[which(duplicated(MOs$mo))] <- paste0(MOs$mo[which(duplicated(MOs$mo))], 1
|
||||
# all unique?
|
||||
!any(duplicated(MOs$mo))
|
||||
|
||||
MOs <- MOs %>%
|
||||
MOs <- MOs %>%
|
||||
# remove entries that are now a synonym
|
||||
filter(!fullname %in% new_synonyms$fullname) %>%
|
||||
filter(!fullname %in% new_synonyms$fullname) %>%
|
||||
# update the taxonomy
|
||||
left_join(updated_taxonomy, by = "genus", suffix = c("", ".new")) %>%
|
||||
mutate(phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
|
||||
class = ifelse(!is.na(class.new), class.new, class),
|
||||
order = ifelse(!is.na(order.new), order.new, order),
|
||||
family = ifelse(!is.na(family.new), family.new, family)) %>%
|
||||
mutate(
|
||||
phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
|
||||
class = ifelse(!is.na(class.new), class.new, class),
|
||||
order = ifelse(!is.na(order.new), order.new, order),
|
||||
family = ifelse(!is.na(family.new), family.new, family)
|
||||
) %>%
|
||||
select(-ends_with(".new")) %>%
|
||||
# update prevalence based on taxonomy (Berends et al., 2021)
|
||||
mutate(prevalence = case_when(
|
||||
class == "Gammaproteobacteria"
|
||||
| genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
|
||||
class == "Gammaproteobacteria" |
|
||||
genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
|
||||
~ 1,
|
||||
kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi")
|
||||
& (phylum %in% c("Proteobacteria",
|
||||
"Firmicutes",
|
||||
"Actinobacteria",
|
||||
"Sarcomastigophora")
|
||||
| genus %in% MO_PREVALENT_GENERA
|
||||
| rank %in% c("kingdom", "phylum", "class", "order", "family"))
|
||||
kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") &
|
||||
(phylum %in% c(
|
||||
"Proteobacteria",
|
||||
"Firmicutes",
|
||||
"Actinobacteria",
|
||||
"Sarcomastigophora"
|
||||
) |
|
||||
genus %in% MO_PREVALENT_GENERA |
|
||||
rank %in% c("kingdom", "phylum", "class", "order", "family"))
|
||||
~ 2,
|
||||
TRUE ~ 3
|
||||
))
|
||||
|
||||
# add all mssing genera, families and orders
|
||||
MOs <- MOs %>%
|
||||
MOs <- MOs %>%
|
||||
bind_rows(
|
||||
MOs %>%
|
||||
MOs %>%
|
||||
arrange(genus, species) %>%
|
||||
distinct(genus, .keep_all = TRUE) %>%
|
||||
filter(rank == "species", source != "manually added") %>%
|
||||
mutate(mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
|
||||
fullname = genus,
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "genus",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_),
|
||||
MOs %>%
|
||||
mutate(
|
||||
mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
|
||||
fullname = genus,
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "genus",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_
|
||||
),
|
||||
MOs %>%
|
||||
group_by(family) %>%
|
||||
filter(!any(rank == "family") & n() > 1) %>%
|
||||
ungroup() %>%
|
||||
ungroup() %>%
|
||||
arrange(family) %>%
|
||||
distinct(family, .keep_all = TRUE) %>%
|
||||
distinct(family, .keep_all = TRUE) %>%
|
||||
filter(!family %in% c("", NA), source != "manually added") %>%
|
||||
mutate(mo = paste0(substr(kingdom, 1, 1), "_[FAM]_",
|
||||
abbreviate(family,
|
||||
minlength = 8,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides",
|
||||
strict = FALSE)),
|
||||
mo = toupper(mo),
|
||||
fullname = family,
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "family",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_),
|
||||
MOs %>%
|
||||
mutate(
|
||||
mo = paste0(
|
||||
substr(kingdom, 1, 1), "_[FAM]_",
|
||||
abbreviate(family,
|
||||
minlength = 8,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides",
|
||||
strict = FALSE
|
||||
)
|
||||
),
|
||||
mo = toupper(mo),
|
||||
fullname = family,
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "family",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_
|
||||
),
|
||||
MOs %>%
|
||||
group_by(order) %>%
|
||||
filter(!any(rank == "order") & n() > 1) %>%
|
||||
ungroup() %>%
|
||||
ungroup() %>%
|
||||
arrange(order) %>%
|
||||
distinct(order, .keep_all = TRUE) %>%
|
||||
distinct(order, .keep_all = TRUE) %>%
|
||||
filter(!order %in% c("", NA), source != "manually added") %>%
|
||||
mutate(mo = paste0(substr(kingdom, 1, 1), "_[ORD]_",
|
||||
abbreviate(order,
|
||||
minlength = 8,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides",
|
||||
strict = FALSE)),
|
||||
mo = toupper(mo),
|
||||
fullname = order,
|
||||
family = "",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "order",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_)
|
||||
mutate(
|
||||
mo = paste0(
|
||||
substr(kingdom, 1, 1), "_[ORD]_",
|
||||
abbreviate(order,
|
||||
minlength = 8,
|
||||
use.classes = TRUE,
|
||||
method = "both.sides",
|
||||
strict = FALSE
|
||||
)
|
||||
),
|
||||
mo = toupper(mo),
|
||||
fullname = order,
|
||||
family = "",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "order",
|
||||
species_id = "",
|
||||
snomed = NA,
|
||||
ref = NA_character_
|
||||
)
|
||||
) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# clean up
|
||||
MOs <- MOs %>%
|
||||
MOs <- MOs %>%
|
||||
df_remove_nonASCII()
|
||||
|
||||
# Add LPSN record IDs -----------------------------------------------------
|
||||
|
||||
records_ids <- taxonomy %>%
|
||||
mutate(across(1:3, function(x) { x[is.na(x)] <- ""; x}),
|
||||
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))) %>%
|
||||
transmute(fullname, species_id = as.numeric(record_no)) %>%
|
||||
arrange(fullname, species_id) %>%
|
||||
mutate(across(1:3, function(x) {
|
||||
x[is.na(x)] <- ""
|
||||
x
|
||||
}),
|
||||
fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))
|
||||
) %>%
|
||||
transmute(fullname, species_id = as.numeric(record_no)) %>%
|
||||
arrange(fullname, species_id) %>%
|
||||
distinct(fullname, .keep_all = TRUE)
|
||||
message("Adding ", sum(records_ids$fullname %in% microorganisms$fullname), " LPSN record IDs")
|
||||
MOs <- MOs %>%
|
||||
select(-species_id) %>%
|
||||
select(-species_id) %>%
|
||||
left_join(records_ids, by = "fullname") %>%
|
||||
relocate(species_id, .after = ref) %>%
|
||||
mutate(source = case_when(!is.na(species_id) ~ "LPSN",
|
||||
source %unlike% "manual" ~ "CoL",
|
||||
TRUE ~ source))
|
||||
relocate(species_id, .after = ref) %>%
|
||||
mutate(source = case_when(
|
||||
!is.na(species_id) ~ "LPSN",
|
||||
source %unlike% "manual" ~ "CoL",
|
||||
TRUE ~ source
|
||||
))
|
||||
|
||||
# Merge synonyms ----------------------------------------------------------
|
||||
|
||||
# remove synonyms that are now valid names
|
||||
MOs.old <- microorganisms.old %>%
|
||||
MOs.old <- microorganisms.old %>%
|
||||
# add new synonyms
|
||||
bind_rows(new_synonyms) %>%
|
||||
filter(!fullname %in% MOs$fullname) %>%
|
||||
arrange(fullname) %>%
|
||||
distinct(fullname, fullname_new, .keep_all = TRUE) %>%
|
||||
bind_rows(new_synonyms) %>%
|
||||
filter(!fullname %in% MOs$fullname) %>%
|
||||
arrange(fullname) %>%
|
||||
distinct(fullname, fullname_new, .keep_all = TRUE) %>%
|
||||
# add prevalence to old taxonomic names
|
||||
select(-prevalence) %>%
|
||||
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>%
|
||||
select(-prevalence) %>%
|
||||
left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>%
|
||||
# clean up
|
||||
df_remove_nonASCII()
|
||||
|
||||
@ -397,16 +443,18 @@ microorganisms.old <- MOs.old
|
||||
# we keep them both
|
||||
microorganisms <- microorganisms %>%
|
||||
bind_rows(microorganisms %>%
|
||||
filter(fullname == "Branhamella catarrhalis") %>%
|
||||
mutate(mo = "B_MRXLL_CTRR",
|
||||
fullname = "Moraxella catarrhalis",
|
||||
genus = "Moraxella",
|
||||
ref = "Henriksen et al., 1968",
|
||||
species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
|
||||
snomed = as.list(24226003))) %>%
|
||||
arrange(fullname) %>%
|
||||
filter(fullname == "Branhamella catarrhalis") %>%
|
||||
mutate(
|
||||
mo = "B_MRXLL_CTRR",
|
||||
fullname = "Moraxella catarrhalis",
|
||||
genus = "Moraxella",
|
||||
ref = "Henriksen et al., 1968",
|
||||
species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
|
||||
snomed = as.list(24226003)
|
||||
)) %>%
|
||||
arrange(fullname) %>%
|
||||
df_remove_nonASCII()
|
||||
microorganisms.old <- microorganisms.old %>%
|
||||
microorganisms.old <- microorganisms.old %>%
|
||||
filter(fullname != "Moraxella catarrhalis")
|
||||
# ---
|
||||
|
||||
@ -444,7 +492,7 @@ rm(intrinsic_resistant)
|
||||
|
||||
# load new data sets again
|
||||
devtools::load_all(".")
|
||||
source("data-raw/pre-commit-hook.R")
|
||||
source("data-raw/_pre_commit_hook.R")
|
||||
devtools::load_all(".")
|
||||
|
||||
|
||||
|
@ -25,26 +25,28 @@ sapply(files, function(file) {
|
||||
contents <<- c(contents, readLines(file))
|
||||
invisible()
|
||||
})
|
||||
contents <- c(intro,
|
||||
copyright,
|
||||
"",
|
||||
contents)
|
||||
contents <- c(
|
||||
intro,
|
||||
copyright,
|
||||
"",
|
||||
contents
|
||||
)
|
||||
|
||||
# remove lines starting with "#'" and NULL and write to file
|
||||
contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)]
|
||||
|
||||
# now make it independent on UseMethod, since we will not export these functions
|
||||
|
||||
contents <- gsub('UseMethod[(]"(.*?)"[)]',
|
||||
'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }',
|
||||
paste(contents, collapse = "|||"),
|
||||
perl = TRUE) %>%
|
||||
'if ("grouped_data" %in% class(.data)) {||| \\1.grouped_data(.data, ...)||| } else {||| \\1.default(.data, ...)||| }',
|
||||
paste(contents, collapse = "|||"),
|
||||
perl = TRUE
|
||||
) %>%
|
||||
# add commit to intro part
|
||||
gsub("{commit}", commit, ., fixed = TRUE) %>%
|
||||
# add date to intro part
|
||||
gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>%
|
||||
strsplit(split = "|||", fixed = TRUE) %>%
|
||||
unlist() %>%
|
||||
unlist() %>%
|
||||
# add "pm_" as prefix to all functions
|
||||
gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .)
|
||||
|
||||
@ -56,7 +58,7 @@ for (i in seq_len(length(new_pm_names))) {
|
||||
contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents)
|
||||
}
|
||||
|
||||
# replace %>% with %pm>%
|
||||
# replace %>% with %pm>%
|
||||
contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE)
|
||||
# fix for new lines, since n() also existed
|
||||
contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE)
|
||||
@ -70,6 +72,8 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE)
|
||||
contents <- gsub("(pm_)+", "pm_", contents)
|
||||
# special case for pm_distinct(), we need '.keep_all' to work
|
||||
contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
|
||||
# pm_pull does not correct for tibbles, misses the drop argument
|
||||
contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]"
|
||||
|
||||
# who needs US spelling?
|
||||
contents <- contents[!grepl("summarize", contents)]
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -40,88 +40,104 @@ ORGLIST <- read_tsv("data-raw/WHONET/Codes/ORGLIST.txt", na = c("", "NA", "-"),
|
||||
# create data set for generic rules (i.e., AB-specific but not MO-specific)
|
||||
rsi_generic <- DRGLST %>%
|
||||
filter(CLSI == "X" | EUCST == "X") %>%
|
||||
select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
|
||||
mutate(ab = as.ab(ab),
|
||||
across(matches("(CLSI|EUCST)"), as.double)) %>%
|
||||
pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
|
||||
separate(method, into = c("guideline", "method"), sep = "_") %>%
|
||||
select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
|
||||
mutate(
|
||||
ab = as.ab(ab),
|
||||
across(matches("(CLSI|EUCST)"), as.double)
|
||||
) %>%
|
||||
pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
|
||||
separate(method, into = c("guideline", "method"), sep = "_") %>%
|
||||
mutate(method = ifelse(method %like% "D",
|
||||
gsub("D", "DISK_", method, fixed = TRUE),
|
||||
gsub("M", "MIC_", method, fixed = TRUE))) %>%
|
||||
separate(method, into = c("method", "rsi"), sep = "_") %>%
|
||||
gsub("D", "DISK_", method, fixed = TRUE),
|
||||
gsub("M", "MIC_", method, fixed = TRUE)
|
||||
)) %>%
|
||||
separate(method, into = c("method", "rsi"), sep = "_") %>%
|
||||
# I is in the middle, so we only need R and S (saves data)
|
||||
filter(rsi %in% c("R", "S")) %>%
|
||||
filter(rsi %in% c("R", "S")) %>%
|
||||
pivot_wider(names_from = rsi, values_from = value) %>%
|
||||
transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
|
||||
method,
|
||||
site = NA_character_,
|
||||
mo = as.mo("UNKNOWN"),
|
||||
ab,
|
||||
ref_tbl = "Generic rules",
|
||||
disk_dose,
|
||||
breakpoint_S = S,
|
||||
breakpoint_R = R,
|
||||
uti = FALSE) %>%
|
||||
transmute(
|
||||
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
|
||||
method,
|
||||
site = NA_character_,
|
||||
mo = as.mo("UNKNOWN"),
|
||||
ab,
|
||||
ref_tbl = "Generic rules",
|
||||
disk_dose,
|
||||
breakpoint_S = S,
|
||||
breakpoint_R = R,
|
||||
uti = FALSE
|
||||
) %>%
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
|
||||
rsi_generic
|
||||
|
||||
# create data set for AB-specific and MO-specific rules
|
||||
rsi_specific <- DRGLST1 %>%
|
||||
rsi_specific <- DRGLST1 %>%
|
||||
# only support guidelines for humans (for now)
|
||||
filter(HOST == "Human" & SITE_INF %unlike% "canine|feline",
|
||||
# only CLSI and EUCAST
|
||||
GUIDELINES %like% "(CLSI|EUCST)") %>%
|
||||
filter(
|
||||
HOST == "Human" & SITE_INF %unlike% "canine|feline",
|
||||
# only CLSI and EUCAST
|
||||
GUIDELINES %like% "(CLSI|EUCST)"
|
||||
) %>%
|
||||
# get microorganism names from another WHONET table
|
||||
mutate(ORG_CODE = tolower(ORG_CODE)) %>%
|
||||
mutate(ORG_CODE = tolower(ORG_CODE)) %>%
|
||||
left_join(ORGLIST %>%
|
||||
transmute(ORG_CODE = tolower(ORG),
|
||||
SCT_TEXT = case_when(is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
|
||||
is.na(SCT_TEXT) ~ ORGANISM,
|
||||
TRUE ~ SCT_TEXT)) %>%
|
||||
# WHO for 'Generic'
|
||||
bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
|
||||
# WHO for 'Enterobacterales'
|
||||
bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))
|
||||
) %>%
|
||||
transmute(
|
||||
ORG_CODE = tolower(ORG),
|
||||
SCT_TEXT = case_when(
|
||||
is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
|
||||
is.na(SCT_TEXT) ~ ORGANISM,
|
||||
TRUE ~ SCT_TEXT
|
||||
)
|
||||
) %>%
|
||||
# WHO for 'Generic'
|
||||
bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
|
||||
# WHO for 'Enterobacterales'
|
||||
bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))) %>%
|
||||
# still some manual cleaning required
|
||||
filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
|
||||
transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
|
||||
method = toupper(TESTMETHOD),
|
||||
site = SITE_INF,
|
||||
mo = as.mo(SCT_TEXT),
|
||||
ab = as.ab(WHON5_CODE),
|
||||
ref_tbl = REF_TABLE,
|
||||
disk_dose = POTENCY,
|
||||
breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
|
||||
breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
|
||||
uti = site %like% "(UTI|urinary|urine)") %>%
|
||||
filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
|
||||
transmute(
|
||||
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
|
||||
method = toupper(TESTMETHOD),
|
||||
site = SITE_INF,
|
||||
mo = as.mo(SCT_TEXT),
|
||||
ab = as.ab(WHON5_CODE),
|
||||
ref_tbl = REF_TABLE,
|
||||
disk_dose = POTENCY,
|
||||
breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
|
||||
breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
|
||||
uti = site %like% "(UTI|urinary|urine)"
|
||||
) %>%
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
|
||||
rsi_specific
|
||||
|
||||
rsi_translation <- rsi_generic %>%
|
||||
bind_rows(rsi_specific) %>%
|
||||
rsi_translation <- rsi_generic %>%
|
||||
bind_rows(rsi_specific) %>%
|
||||
# add the taxonomic rank index, used for sorting (so subspecies match first, order matches last)
|
||||
mutate(rank_index = case_when(mo_rank(mo) %like% "(infra|sub)" ~ 1,
|
||||
mo_rank(mo) == "species" ~ 2,
|
||||
mo_rank(mo) == "genus" ~ 3,
|
||||
mo_rank(mo) == "family" ~ 4,
|
||||
mo_rank(mo) == "order" ~ 5,
|
||||
TRUE ~ 6),
|
||||
.after = mo) %>%
|
||||
arrange(desc(guideline), ab, mo, method) %>%
|
||||
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
|
||||
mutate(
|
||||
rank_index = case_when(
|
||||
mo_rank(mo) %like% "(infra|sub)" ~ 1,
|
||||
mo_rank(mo) == "species" ~ 2,
|
||||
mo_rank(mo) == "genus" ~ 3,
|
||||
mo_rank(mo) == "family" ~ 4,
|
||||
mo_rank(mo) == "order" ~ 5,
|
||||
TRUE ~ 6
|
||||
),
|
||||
.after = mo
|
||||
) %>%
|
||||
arrange(desc(guideline), ab, mo, method) %>%
|
||||
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
|
||||
as.data.frame(stringsAsFactors = FALSE)
|
||||
|
||||
# disks MUST be 6-50 mm, so correct where that is wrong:
|
||||
rsi_translation[which(rsi_translation$method == "DISK" &
|
||||
(is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
|
||||
(is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
|
||||
rsi_translation[which(rsi_translation$method == "DISK" &
|
||||
(is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
|
||||
(is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
|
||||
m <- unique(as.double(as.mic(levels(as.mic(1)))))
|
||||
rsi_translation[which(rsi_translation$method == "MIC" &
|
||||
is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
|
||||
is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
|
||||
rsi_translation[which(rsi_translation$method == "MIC" &
|
||||
is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
|
||||
is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
|
||||
|
||||
# WHONET has no >1024 but instead uses 1025, 513, etc, so raise these one higher valid MIC factor level:
|
||||
rsi_translation[which(rsi_translation$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
|
||||
@ -134,18 +150,18 @@ rsi_translation[which(rsi_translation$breakpoint_R == 1025), "breakpoint_R"] <-
|
||||
# WHONET file: S <= 8 and R >= 16
|
||||
# this will make an MIC of 12 I, which should be R, so:
|
||||
eucast_mics <- which(rsi_translation$guideline %like% "EUCAST" &
|
||||
rsi_translation$method == "MIC" &
|
||||
log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
|
||||
!is.na(rsi_translation$breakpoint_R))
|
||||
rsi_translation$method == "MIC" &
|
||||
log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
|
||||
!is.na(rsi_translation$breakpoint_R))
|
||||
old_R <- rsi_translation[eucast_mics, "breakpoint_R", drop = TRUE]
|
||||
old_S <- rsi_translation[eucast_mics, "breakpoint_S", drop = TRUE]
|
||||
new_R <- 2 ^ (log2(old_R) - 1)
|
||||
new_R <- 2^(log2(old_R) - 1)
|
||||
new_R[new_R < old_S | is.na(as.mic(new_R))] <- old_S[new_R < old_S | is.na(as.mic(new_R))]
|
||||
rsi_translation[eucast_mics, "breakpoint_R"] <- new_R
|
||||
eucast_disks <- which(rsi_translation$guideline %like% "EUCAST" &
|
||||
rsi_translation$method == "DISK" &
|
||||
rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
|
||||
!is.na(rsi_translation$breakpoint_R))
|
||||
rsi_translation$method == "DISK" &
|
||||
rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
|
||||
!is.na(rsi_translation$breakpoint_R))
|
||||
rsi_translation[eucast_disks, "breakpoint_R"] <- rsi_translation[eucast_disks, "breakpoint_R", drop = TRUE] + 1
|
||||
|
||||
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
|
||||
|
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -31,8 +31,8 @@ library(tidyverse)
|
||||
# - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
|
||||
# - check that current online version is higher than SNOMED_VERSION$current_version
|
||||
# - if so, click on 'Download Value Set', choose 'TXT'
|
||||
snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
|
||||
select(1:2) %>%
|
||||
snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
|
||||
select(1:2) %>%
|
||||
set_names(c("snomed", "mo"))
|
||||
|
||||
# save all valid genera, species and subspecies
|
||||
@ -41,17 +41,21 @@ vctr <- tolower(vctr[vctr %like% "^[a-z]+$"])
|
||||
|
||||
# remove all parts of the name that are no valid values in genera, species or subspecies
|
||||
# this takes ~20 seconds
|
||||
snomed <- snomed %>%
|
||||
mutate(fullname = vapply(FUN.VALUE = character(1),
|
||||
# split on space and/or comma
|
||||
strsplit(tolower(mo), "[ ,]"),
|
||||
function(x) trimws(paste0(x[x %in% vctr], collapse = " "))),
|
||||
# remove " group"
|
||||
fullname = gsub(" group", "", fullname, fixed = TRUE))
|
||||
snomed <- snomed %>%
|
||||
mutate(
|
||||
fullname = vapply(
|
||||
FUN.VALUE = character(1),
|
||||
# split on space and/or comma
|
||||
strsplit(tolower(mo), "[ ,]"),
|
||||
function(x) trimws(paste0(x[x %in% vctr], collapse = " "))
|
||||
),
|
||||
# remove " group"
|
||||
fullname = gsub(" group", "", fullname, fixed = TRUE)
|
||||
)
|
||||
|
||||
snomed_keep <- snomed %>%
|
||||
filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
|
||||
group_by(fullname_lower = fullname) %>%
|
||||
snomed_keep <- snomed %>%
|
||||
filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
|
||||
group_by(fullname_lower = fullname) %>%
|
||||
summarise(snomed = list(snomed))
|
||||
|
||||
message(nrow(snomed_keep), " MO's will get a SNOMED code.")
|
||||
@ -65,10 +69,9 @@ microorganisms <- microorganisms %>%
|
||||
# join new snomed
|
||||
left_join(snomed_keep) %>%
|
||||
# remove dummy var
|
||||
select(-fullname_lower) %>%
|
||||
select(-fullname_lower) %>%
|
||||
AMR:::dataset_UTF8_to_ASCII()
|
||||
|
||||
# don't forget to update the version number in SNOMED_VERSION in ./R/globals.R!
|
||||
|
||||
# usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")
|
||||
|
||||
|
@ -1,68 +1,77 @@
|
||||
microorganisms <- microorganisms |> bind_rows(
|
||||
# Toxoplasma
|
||||
data.frame(mo = "P_TXPL_GOND", # species
|
||||
fullname = "Toxoplasma gondii",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "Toxoplasma",
|
||||
species = "gondii",
|
||||
subspecies = "",
|
||||
rank = "species",
|
||||
ref = "Nicolle et al., 1908",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE),
|
||||
data.frame(mo = "P_TXPL", # genus
|
||||
fullname = "Toxoplasma",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "Toxoplasma",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "genus",
|
||||
ref = "Nicolle et al., 1909",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE),
|
||||
data.frame(mo = "[FAM]_SRCCYSTD", # family
|
||||
fullname = "Sarcocystidae",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "family",
|
||||
ref = "Poche, 1913",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE),
|
||||
data.frame(mo = "[ORD]_EUCCCDRD", # order
|
||||
fullname = "Eucoccidiorida",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "order",
|
||||
ref = "Leger et al., 1910",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE),
|
||||
) |>
|
||||
microorganisms <- microorganisms |>
|
||||
bind_rows(
|
||||
# Toxoplasma
|
||||
data.frame(
|
||||
mo = "P_TXPL_GOND", # species
|
||||
fullname = "Toxoplasma gondii",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "Toxoplasma",
|
||||
species = "gondii",
|
||||
subspecies = "",
|
||||
rank = "species",
|
||||
ref = "Nicolle et al., 1908",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
data.frame(
|
||||
mo = "P_TXPL", # genus
|
||||
fullname = "Toxoplasma",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "Toxoplasma",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "genus",
|
||||
ref = "Nicolle et al., 1909",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
data.frame(
|
||||
mo = "[FAM]_SRCCYSTD", # family
|
||||
fullname = "Sarcocystidae",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "Sarcocystidae",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "family",
|
||||
ref = "Poche, 1913",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
data.frame(
|
||||
mo = "[ORD]_EUCCCDRD", # order
|
||||
fullname = "Eucoccidiorida",
|
||||
kingdom = "(unknown kingdom)",
|
||||
phylum = "Apicomplexa",
|
||||
class = "Conoidasida",
|
||||
order = "Eucoccidiorida",
|
||||
family = "",
|
||||
genus = "",
|
||||
species = "",
|
||||
subspecies = "",
|
||||
rank = "order",
|
||||
ref = "Leger et al., 1910",
|
||||
species_id = NA_real_,
|
||||
source = "manually added",
|
||||
prevalence = 2,
|
||||
stringsAsFactors = FALSE
|
||||
),
|
||||
) |>
|
||||
arrange(fullname)
|
||||
|
Reference in New Issue
Block a user