mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 07:51:57 +02:00
(v1.7.0.9000) package size
This commit is contained in:
Binary file not shown.
@ -31,109 +31,6 @@ devtools::load_all(quiet = TRUE)
|
||||
|
||||
old_globalenv <- ls(envir = globalenv())
|
||||
|
||||
# Helper functions --------------------------------------------------------
|
||||
|
||||
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
# Determination of which staphylococcal species are CoNS/CoPS according to:
|
||||
# - Becker et al. 2014, PMID 25278577
|
||||
# - Becker et al. 2019, PMID 30872103
|
||||
# - Becker et al. 2020, PMID 32056452
|
||||
# this function returns class <mo>
|
||||
MO_staph <- AMR::microorganisms
|
||||
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
|
||||
if (type == "CoNS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
|
||||
"croceilyticus",
|
||||
"debuckii", "devriesei", "edaphicus", "epidermidis",
|
||||
"equorum", "felis", "fleurettii", "gallinarum",
|
||||
"haemolyticus", "hominis", "jettensis", "kloosii",
|
||||
"lentus", "lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
|
||||
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
|
||||
"sciuri", "simulans", "stepanovicii", "succinus",
|
||||
"ureilyticus",
|
||||
"vitulinus", "vitulus", "warneri", "xylosus")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
|
||||
"mo", drop = TRUE]
|
||||
} else if (type == "CoPS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
|
||||
"agnetis", "argenteus",
|
||||
"cornubiensis",
|
||||
"delphini", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schweitzeri", "simiae")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
|
||||
"mo", drop = TRUE]
|
||||
}
|
||||
}
|
||||
|
||||
create_AB_lookup <- function() {
|
||||
AB_lookup <- AMR::antibiotics
|
||||
AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
|
||||
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
|
||||
c("ab", "atc", "cid", "name",
|
||||
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
|
||||
drop = FALSE]),
|
||||
stringsAsFactors = FALSE)),
|
||||
function(x) {
|
||||
x <- generalise_antibiotic_name(unname(unlist(x)))
|
||||
x[x != ""]
|
||||
}))
|
||||
AB_lookup
|
||||
}
|
||||
|
||||
create_MO_lookup <- function() {
|
||||
MO_lookup <- AMR::microorganisms
|
||||
|
||||
MO_lookup$kingdom_index <- NA_real_
|
||||
MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
|
||||
MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
|
||||
MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
|
||||
MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
|
||||
# all the rest
|
||||
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
|
||||
|
||||
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
|
||||
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
|
||||
MO_lookup$species,
|
||||
MO_lookup$subspecies)))
|
||||
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
|
||||
MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
|
||||
MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
|
||||
|
||||
# add a column with only "e coli" like combinations
|
||||
MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
|
||||
|
||||
# so arrange data on prevalence first, then kingdom, then full name
|
||||
MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]
|
||||
}
|
||||
|
||||
create_MO.old_lookup <- function() {
|
||||
MO.old_lookup <- AMR::microorganisms.old
|
||||
MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))
|
||||
|
||||
# add a column with only "e coli"-like combinations
|
||||
MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))
|
||||
|
||||
# so arrange data on prevalence first, then full name
|
||||
MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]
|
||||
}
|
||||
|
||||
create_intr_resistance <- function() {
|
||||
# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
|
||||
paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],
|
||||
AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Save internal data to R/sysdata.rda -------------------------------------
|
||||
|
||||
# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
|
||||
@ -170,24 +67,50 @@ translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = "")
|
||||
|
||||
# Old microorganism codes
|
||||
microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")
|
||||
|
||||
# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
|
||||
INTRINSIC_R <- create_intr_resistance()
|
||||
|
||||
# for checking input in `language` argument in e.g. mo_*() and ab_*() functions
|
||||
LANGUAGES_SUPPORTED <- sort(c("en", colnames(translations_file)[nchar(colnames(translations_file)) == 2]))
|
||||
|
||||
# vectors of CoNS and CoPS, improves speed in as.mo()
|
||||
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
# Determination of which staphylococcal species are CoNS/CoPS according to:
|
||||
# - Becker et al. 2014, PMID 25278577
|
||||
# - Becker et al. 2019, PMID 30872103
|
||||
# - Becker et al. 2020, PMID 32056452
|
||||
# this function returns class <mo>
|
||||
MO_staph <- AMR::microorganisms
|
||||
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
|
||||
if (type == "CoNS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
|
||||
"croceilyticus",
|
||||
"debuckii", "devriesei", "edaphicus", "epidermidis",
|
||||
"equorum", "felis", "fleurettii", "gallinarum",
|
||||
"haemolyticus", "hominis", "jettensis", "kloosii",
|
||||
"lentus", "lugdunensis", "massiliensis", "microti",
|
||||
"muscae", "nepalensis", "pasteuri", "petrasii",
|
||||
"pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
|
||||
"pulvereri", "rostri", "saccharolyticus", "saprophyticus",
|
||||
"sciuri", "simulans", "stepanovicii", "succinus",
|
||||
"ureilyticus",
|
||||
"vitulinus", "vitulus", "warneri", "xylosus")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
|
||||
"mo", drop = TRUE]
|
||||
} else if (type == "CoPS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
|
||||
"agnetis", "argenteus",
|
||||
"cornubiensis",
|
||||
"delphini", "lutrae",
|
||||
"hyicus", "intermedius",
|
||||
"pseudintermedius", "pseudointermedius",
|
||||
"schweitzeri", "simiae")
|
||||
| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
|
||||
"mo", drop = TRUE]
|
||||
}
|
||||
}
|
||||
MO_CONS <- create_species_cons_cops("CoNS")
|
||||
MO_COPS <- create_species_cons_cops("CoPS")
|
||||
|
||||
# reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed
|
||||
AB_lookup <- create_AB_lookup()
|
||||
MO_lookup <- create_MO_lookup()
|
||||
MO.old_lookup <- create_MO.old_lookup()
|
||||
|
||||
# antibiotic groups
|
||||
# (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv)
|
||||
globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab")
|
||||
@ -220,14 +143,10 @@ DEFINED_AB_GROUPS <- DEFINED_AB_GROUPS[!DEFINED_AB_GROUPS %in% globalenv_before_
|
||||
# Export to package as internal data ----
|
||||
usethis::use_data(eucast_rules_file,
|
||||
translations_file,
|
||||
microorganisms.translation,
|
||||
INTRINSIC_R,
|
||||
LANGUAGES_SUPPORTED,
|
||||
MO_CONS,
|
||||
MO_COPS,
|
||||
AB_lookup,
|
||||
MO_lookup,
|
||||
MO.old_lookup,
|
||||
AMINOGLYCOSIDES,
|
||||
AMINOPENICILLINS,
|
||||
CARBAPENEMS,
|
||||
|
@ -872,12 +872,12 @@ View(old_new)
|
||||
# set new MO codes as names to existing data sets
|
||||
rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL)
|
||||
microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL)
|
||||
microorganisms.translation <- AMR:::microorganisms.translation %>%
|
||||
bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
|
||||
filter(!mo_old %in% MOs$mo) %>%
|
||||
mutate(mo_new = mo_name(mo_new, language = NULL)) %>%
|
||||
bind_rows(old_new %>% select(mo_old, mo_new)) %>%
|
||||
distinct(mo_old, .keep_all = TRUE)
|
||||
# microorganisms.translation <- AMR:::microorganisms.translation %>%
|
||||
# bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
|
||||
# filter(!mo_old %in% MOs$mo) %>%
|
||||
# mutate(mo_new = mo_name(mo_new, language = NULL)) %>%
|
||||
# bind_rows(old_new %>% select(mo_old, mo_new)) %>%
|
||||
# distinct(mo_old, .keep_all = TRUE)
|
||||
|
||||
# arrange the data sets to save
|
||||
MOs <- MOs %>% arrange(fullname)
|
||||
@ -911,23 +911,23 @@ devtools::load_all(".")
|
||||
rsi_translation$mo <- as.mo(rsi_translation$mo)
|
||||
microorganisms.codes$mo <- as.mo(microorganisms.codes$mo)
|
||||
class(microorganisms.codes$mo) <- c("mo", "character")
|
||||
microorganisms.translation <- microorganisms.translation %>%
|
||||
# (to do: add last package version to column pkg_version)
|
||||
left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
|
||||
by = c("mo_new" = "fullname")) %>%
|
||||
mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>%
|
||||
left_join(microorganisms[, c("fullname", "mo")], # as is microorganisms
|
||||
by = c("name" = "fullname")) %>%
|
||||
select(mo_old, mo_new = mo) %>%
|
||||
filter(!is.na(mo_old), !is.na(mo_new))
|
||||
class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
|
||||
class(microorganisms.translation$mo_new) <- c("mo", "character")
|
||||
# microorganisms.translation <- microorganisms.translation %>%
|
||||
# # (to do: add last package version to column pkg_version)
|
||||
# left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
|
||||
# by = c("mo_new" = "fullname")) %>%
|
||||
# mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>%
|
||||
# left_join(microorganisms[, c("fullname", "mo")], # as is microorganisms
|
||||
# by = c("name" = "fullname")) %>%
|
||||
# select(mo_old, mo_new = mo) %>%
|
||||
# filter(!is.na(mo_old), !is.na(mo_new))
|
||||
# class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
|
||||
# class(microorganisms.translation$mo_new) <- c("mo", "character")
|
||||
# save those to the package
|
||||
usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
|
||||
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
|
||||
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
|
||||
# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
|
||||
# to save microorganisms.translation internally to the package
|
||||
source("data-raw/_internals.R")
|
||||
# source("data-raw/_internals.R")
|
||||
|
||||
# load new data sets again
|
||||
devtools::load_all(".")
|
||||
@ -935,7 +935,7 @@ devtools::load_all(".")
|
||||
# and check: these codes should not be missing (will otherwise throw a unit test error):
|
||||
AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
|
||||
AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
|
||||
AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
|
||||
# AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
|
||||
|
||||
# update the example_isolates data set
|
||||
example_isolates$mo <- as.mo(example_isolates$mo)
|
||||
|
@ -380,37 +380,37 @@ MOs.old <- microorganisms.old %>%
|
||||
# Keep old codes for translation ------------------------------------------
|
||||
|
||||
# add removed microbial IDs to the internal translation table so old package versions keep working
|
||||
MOs.translation <- microorganisms %>%
|
||||
filter(!mo %in% MOs$mo) %>%
|
||||
select(mo, fullname) %>%
|
||||
left_join(new_synonyms) %>%
|
||||
left_join(MOs %>% transmute(fullname_new = fullname, mo2 = as.character(mo))) %>%
|
||||
select(mo_old = mo, mo_new = mo2) %>%
|
||||
distinct()
|
||||
MOs.translation <- AMR:::microorganisms.translation %>%
|
||||
left_join(MOs.translation %>% select(mo_new_update = mo_new, mo_new = mo_old)) %>%
|
||||
mutate(mo_new = as.character(ifelse(!is.na(mo_new_update), mo_new_update, mo_new))) %>%
|
||||
select(-mo_new_update) %>%
|
||||
bind_rows(
|
||||
# old IDs used in microorganisms.codes must put in here as well
|
||||
microorganisms.codes %>%
|
||||
filter(!mo %in% MOs$mo) %>%
|
||||
transmute(mo_old = mo, fullname = mo_name(mo)) %>%
|
||||
left_join(MOs.old %>%
|
||||
select(fullname, fullname_new)) %>%
|
||||
left_join(MOs %>%
|
||||
select(mo_new = mo, fullname_new = fullname)) %>%
|
||||
transmute(mo_old = as.character(mo_old), mo_new)) %>%
|
||||
arrange(mo_old) %>%
|
||||
filter(mo_old != mo_new,
|
||||
!mo_old %in% MOs$mo) %>%
|
||||
left_join(., .,
|
||||
by = c("mo_new" = "mo_old"),
|
||||
suffix = c("", ".2")) %>%
|
||||
mutate(mo_new = ifelse(!is.na(mo_new.2), mo_new.2, mo_new)) %>%
|
||||
distinct(mo_old, mo_new) %>%
|
||||
# clean up
|
||||
df_remove_nonASCII()
|
||||
# MOs.translation <- microorganisms %>%
|
||||
# filter(!mo %in% MOs$mo) %>%
|
||||
# select(mo, fullname) %>%
|
||||
# left_join(new_synonyms) %>%
|
||||
# left_join(MOs %>% transmute(fullname_new = fullname, mo2 = as.character(mo))) %>%
|
||||
# select(mo_old = mo, mo_new = mo2) %>%
|
||||
# distinct()
|
||||
# MOs.translation <- AMR:::microorganisms.translation %>%
|
||||
# left_join(MOs.translation %>% select(mo_new_update = mo_new, mo_new = mo_old)) %>%
|
||||
# mutate(mo_new = as.character(ifelse(!is.na(mo_new_update), mo_new_update, mo_new))) %>%
|
||||
# select(-mo_new_update) %>%
|
||||
# bind_rows(
|
||||
# # old IDs used in microorganisms.codes must put in here as well
|
||||
# microorganisms.codes %>%
|
||||
# filter(!mo %in% MOs$mo) %>%
|
||||
# transmute(mo_old = mo, fullname = mo_name(mo)) %>%
|
||||
# left_join(MOs.old %>%
|
||||
# select(fullname, fullname_new)) %>%
|
||||
# left_join(MOs %>%
|
||||
# select(mo_new = mo, fullname_new = fullname)) %>%
|
||||
# transmute(mo_old = as.character(mo_old), mo_new)) %>%
|
||||
# arrange(mo_old) %>%
|
||||
# filter(mo_old != mo_new,
|
||||
# !mo_old %in% MOs$mo) %>%
|
||||
# left_join(., .,
|
||||
# by = c("mo_new" = "mo_old"),
|
||||
# suffix = c("", ".2")) %>%
|
||||
# mutate(mo_new = ifelse(!is.na(mo_new.2), mo_new.2, mo_new)) %>%
|
||||
# distinct(mo_old, mo_new) %>%
|
||||
# # clean up
|
||||
# df_remove_nonASCII()
|
||||
|
||||
message("microorganisms new: ", sum(!MOs$fullname %in% c(microorganisms$fullname, MOs.old$fullname)))
|
||||
message("microorganisms renamed: ", sum(!MOs.old$fullname %in% microorganisms.old$fullname))
|
||||
@ -424,12 +424,12 @@ class(MOs.translation$mo_new) <- c("mo", "character")
|
||||
|
||||
microorganisms <- MOs
|
||||
microorganisms.old <- MOs.old
|
||||
microorganisms.translation <- MOs.translation
|
||||
# microorganisms.translation <- MOs.translation
|
||||
|
||||
# on the server, do:
|
||||
usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")
|
||||
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
|
||||
saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
|
||||
# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
|
||||
rm(microorganisms)
|
||||
rm(microorganisms.old)
|
||||
rm(microorganisms.translation)
|
||||
|
Reference in New Issue
Block a user