AMR/data-raw/_internals.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Data Analysis for R                   #
#                                                                      #
# SOURCE                                                               #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2018-2021 Berends MS, Luz CF et al.                              #
# Developed at the University of Groningen, the Netherlands, in        #
# collaboration with non-profit organisations Certe Medical            #
# Diagnostics & Advice, and University Medical Center Groningen.       # 
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
# ==================================================================== #

# Run this file to update the package using:
# source("data-raw/_internals.R")

library(dplyr, warn.conflicts = FALSE)
devtools::load_all(quiet = TRUE)

old_globalenv <- ls(envir = globalenv())

# Helper functions --------------------------------------------------------

create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
  # Determination of which staphylococcal species are CoNS/CoPS according to:
  # - Becker et al. 2014, PMID 25278577
  # - Becker et al. 2019, PMID 30872103
  # - Becker et al. 2020, PMID 32056452
  # this function returns class <mo>
  MO_staph <- AMR::microorganisms
  MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
  if (type == "CoNS") {
    MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
                                           "auricularis", "caeli", "capitis", "caprae", 
                                           "carnosus", "chromogenes", "cohnii", "condimenti",
                                           "debuckii", "devriesei", "edaphicus", "epidermidis",
                                           "equorum", "felis", "fleurettii", "gallinarum",
                                           "haemolyticus", "hominis", "jettensis", "kloosii",
                                           "lentus", "lugdunensis", "massiliensis", "microti",
                                           "muscae", "nepalensis", "pasteuri", "petrasii",
                                           "pettenkoferi", "piscifermentans", "pseudoxylosus",
                                           "rostri", "saccharolyticus", "saprophyticus",
                                           "sciuri", "simulans", "stepanovicii", "succinus",
                                           "vitulinus", "warneri", "xylosus")
                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
             "mo", drop = TRUE]
  } else if (type == "CoPS") {
    MO_staph[which(MO_staph$species %in% c("coagulase-positive",
                                           "simiae", "agnetis",
                                           "delphini", "lutrae",
                                           "hyicus", "intermedius",
                                           "pseudintermedius", "pseudointermedius",
                                           "schweitzeri", "argenteus")
                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
             "mo", drop = TRUE]
  }
}

create_AB_lookup <- function() {
  AB_lookup <- AMR::antibiotics
  AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
  AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
  AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
  AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
  AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, 
                                                                               c("ab", "atc", "cid", "name",
                                                                                 colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
                                                                               drop = FALSE]),
                                                                   stringsAsFactors = FALSE)),
                                             function(x) {
                                               x <- generalise_antibiotic_name(unname(unlist(x)))
                                               x[x != ""]
                                             }))
  AB_lookup
}

create_MO_lookup <- function() {
  MO_lookup <- AMR::microorganisms
  
  MO_lookup$kingdom_index <- NA_real_
  MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
  MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
  MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
  MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
  # all the rest
  MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
  
  # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
  MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, 
                                                   MO_lookup$species,
                                                   MO_lookup$subspecies)))
  ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
  MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
  
  # add a column with only "e coli" like combinations
  MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
  
  # so arrange data on prevalence first, then kingdom, then full name
  MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]
}

create_MO.old_lookup <- function() {
  MO.old_lookup <- AMR::microorganisms.old
  MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))
  
  # add a column with only "e coli"-like combinations
  MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))
  
  # so arrange data on prevalence first, then full name
  MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]
}

create_intr_resistance <- function() {
  # for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
  paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],
        AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])
}


# Save internal data sets to R/sysdata.rda --------------------------------

# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
eucast_rules_file <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
                                       skip = 10,
                                       sep = "\t",
                                       stringsAsFactors = FALSE,
                                       header = TRUE,
                                       strip.white = TRUE,
                                       na = c(NA, "", NULL)) %>% 
  # take the order of the reference.rule_group column in the original data file
  mutate(reference.rule_group = factor(reference.rule_group,
                                       levels = unique(reference.rule_group),
                                       ordered = TRUE),
         sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% 
  arrange(reference.rule_group,
          reference.version,
          sorting_rule,
          reference.rule) %>% 
  mutate(reference.rule_group = as.character(reference.rule_group)) %>% 
  select(-sorting_rule)

# Translations
translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
                                       sep = "\t",
                                       stringsAsFactors = FALSE,
                                       header = TRUE,
                                       blank.lines.skip = TRUE,
                                       fill = TRUE,
                                       strip.white = TRUE,
                                       encoding = "UTF-8",
                                       fileEncoding = "UTF-8",
                                       na.strings = c(NA, "", NULL),
                                       allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
                                       quote = "")

# Old microorganism codes
microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")

# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
INTRINSIC_R <- create_intr_resistance()

# for checking input in `language` argument in e.g. mo_*() and ab_*() functions
LANGUAGES_SUPPORTED <- sort(c("en", unique(translations_file$lang)))

# vectors of CoNS and CoPS, improves speed in as.mo()
MO_CONS <- create_species_cons_cops("CoNS")
MO_COPS <- create_species_cons_cops("CoPS")

# reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed
AB_lookup <- create_AB_lookup()
MO_lookup <- create_MO_lookup()
MO.old_lookup <- create_MO.old_lookup()

# Export to package as internal data ----
usethis::use_data(eucast_rules_file, 
                  translations_file,
                  microorganisms.translation,
                  INTRINSIC_R,
                  LANGUAGES_SUPPORTED,
                  MO_CONS,
                  MO_COPS,
                  AB_lookup,
                  MO_lookup,
                  MO.old_lookup,
                  internal = TRUE,
                  overwrite = TRUE,
                  version = 2,
                  compress = "xz")

# Export data sets to the repository in different formats -----------------

write_md5 <- function(object) {
  conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
  writeLines(digest::digest(object, "md5"), conn)
  close(conn)
}
changed_md5 <- function(object) {
  tryCatch({
    conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
    compared <- digest::digest(object, "md5") != readLines(con = conn)
    close(conn)
    compared
  }, error = function(e) TRUE)
}
usethis::ui_done(paste0("Saving raw data to {usethis::ui_value('/data-raw/')}"))

# give official names to ABs and MOs
rsi <- dplyr::mutate(rsi_translation, ab = ab_name(ab), mo = mo_name(mo))
if (changed_md5(rsi)) {
  write_md5(rsi)
  try(saveRDS(rsi, "data-raw/rsi_translation.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(rsi, "data-raw/rsi_translation.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(rsi, "data-raw/rsi_translation.sas"), silent = TRUE)
  try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)
  try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)
}

mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
if (changed_md5(mo)) {
  write_md5(mo)
  try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
  try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
  try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
}

if (changed_md5(microorganisms.old)) {
  write_md5(microorganisms.old)
  try(saveRDS(microorganisms.old, "data-raw/microorganisms.old.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(microorganisms.old, "data-raw/microorganisms.old.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(microorganisms.old, "data-raw/microorganisms.old.sas"), silent = TRUE)
  try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)
  try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)
}

ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
if (changed_md5(ab)) {
  write_md5(ab)
  try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
  try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
  try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)
}

av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
if (changed_md5(av)) {
  write_md5(av)
  try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
  try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
  try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)
}

if (changed_md5(intrinsic_resistant)) {
  write_md5(intrinsic_resistant)
  try(saveRDS(intrinsic_resistant, "data-raw/intrinsic_resistant.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(intrinsic_resistant, "data-raw/intrinsic_resistant.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(intrinsic_resistant, "data-raw/intrinsic_resistant.sas"), silent = TRUE)
  try(haven::write_sav(intrinsic_resistant, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
  try(haven::write_dta(intrinsic_resistant, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(intrinsic_resistant, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
}

if (changed_md5(dosage)) {
  write_md5(dosage)
  try(saveRDS(dosage, "data-raw/dosage.rds", version = 2, compress = "xz"), silent = TRUE)
  try(write.table(dosage, "data-raw/dosage.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
  try(haven::write_sas(dosage, "data-raw/dosage.sas"), silent = TRUE)
  try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)
  try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)
  try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)
}

# remove leftovers from global env
current_globalenv <- ls(envir = globalenv())
rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
rm(current_globalenv)
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# ==================================================================== #`
			`# TITLE #`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# Antimicrobial Resistance (AMR) Data Analysis for R #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# #`
			`# SOURCE #`
(v1.2.0.9032) Permanently move to GitHub 2020-07-09 20:07:39 +02:00			`# https://github.com/msberends/AMR #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# #`
			`# LICENCE #`
(v1.4.0.9047) unit tests 2020-12-27 00:30:28 +01:00			`# (c) 2018-2021 Berends MS, Luz CF et al. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# Developed at the University of Groningen, the Netherlands, in #`
			`# collaboration with non-profit organisations Certe Medical #`
			`# Diagnostics & Advice, and University Medical Center Groningen. #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# #`
			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# #`
			`# Visit our website for the full manual and a complete tutorial about #`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# ==================================================================== #`

(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# Run this file to update the package using:`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# source("data-raw/_internals.R")`
(v0.8.0.9002) eucast_rules() fix for S. maltophilia 2019-10-23 14:48:25 +02:00
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`library(dplyr, warn.conflicts = FALSE)`
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`devtools::load_all(quiet = TRUE)`

			`old_globalenv <- ls(envir = globalenv())`

			`# Helper functions --------------------------------------------------------`

			`create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {`
			`# Determination of which staphylococcal species are CoNS/CoPS according to:`
			`# - Becker et al. 2014, PMID 25278577`
			`# - Becker et al. 2019, PMID 30872103`
			`# - Becker et al. 2020, PMID 32056452`
			`# this function returns class <mo>`
			`MO_staph <- AMR::microorganisms`
			`MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]`
			`if (type == "CoNS") {`
			`MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",`
			`"auricularis", "caeli", "capitis", "caprae",`
			`"carnosus", "chromogenes", "cohnii", "condimenti",`
			`"debuckii", "devriesei", "edaphicus", "epidermidis",`
			`"equorum", "felis", "fleurettii", "gallinarum",`
			`"haemolyticus", "hominis", "jettensis", "kloosii",`
			`"lentus", "lugdunensis", "massiliensis", "microti",`
			`"muscae", "nepalensis", "pasteuri", "petrasii",`
			`"pettenkoferi", "piscifermentans", "pseudoxylosus",`
			`"rostri", "saccharolyticus", "saprophyticus",`
			`"sciuri", "simulans", "stepanovicii", "succinus",`
			`"vitulinus", "warneri", "xylosus")`
			`\| (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),`
			`"mo", drop = TRUE]`
			`} else if (type == "CoPS") {`
			`MO_staph[which(MO_staph$species %in% c("coagulase-positive",`
			`"simiae", "agnetis",`
			`"delphini", "lutrae",`
			`"hyicus", "intermedius",`
			`"pseudintermedius", "pseudointermedius",`
			`"schweitzeri", "argenteus")`
			`\| (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),`
			`"mo", drop = TRUE]`
			`}`
			`}`

			`create_AB_lookup <- function() {`
			`AB_lookup <- AMR::antibiotics`
			`AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)`
			`AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)`
			`AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)`
			`AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,`
			`c("ab", "atc", "cid", "name",`
			`colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),`
			`drop = FALSE]),`
			`stringsAsFactors = FALSE)),`
			`function(x) {`
			`x <- generalise_antibiotic_name(unname(unlist(x)))`
			`x[x != ""]`
			`}))`
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`AB_lookup`
			`}`

			`create_MO_lookup <- function() {`
			`MO_lookup <- AMR::microorganisms`

			`MO_lookup$kingdom_index <- NA_real_`
			`MO_lookup[which(MO_lookup$kingdom == "Bacteria" \| MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1`
			`MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2`
			`MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3`
			`MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4`
			`# all the rest`
			`MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5`

			# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
			`MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,`
			`MO_lookup$species,`
			`MO_lookup$subspecies)))`
			`ind <- MO_lookup$genus == "" \| grepl("^[(]unknown ", MO_lookup$fullname)`
			`MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])`
			`MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))`

			`# add a column with only "e coli" like combinations`
			`MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)`

			`# so arrange data on prevalence first, then kingdom, then full name`
			`MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]`
			`}`

			`create_MO.old_lookup <- function() {`
			`MO.old_lookup <- AMR::microorganisms.old`
			`MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))`

			`# add a column with only "e coli"-like combinations`
			`MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))`

			`# so arrange data on prevalence first, then full name`
			`MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]`
			`}`

			`create_intr_resistance <- function() {`
			`# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector`
			`paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],`
			`AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])`
			`}`



			`# Save internal data sets to R/sysdata.rda --------------------------------`

			`# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file`
(v0.8.0.9031) as.mo() improvements 2019-11-15 15:25:03 +01:00			`eucast_rules_file <- utils::read.delim(file = "data-raw/eucast_rules.tsv",`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`skip = 10,`
			`sep = "\t",`
			`stringsAsFactors = FALSE,`
			`header = TRUE,`
			`strip.white = TRUE,`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`na = c(NA, "", NULL)) %>%`
			`# take the order of the reference.rule_group column in the original data file`
			`mutate(reference.rule_group = factor(reference.rule_group,`
			`levels = unique(reference.rule_group),`
			`ordered = TRUE),`
			`sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%`
			`arrange(reference.rule_group,`
			`reference.version,`
			`sorting_rule,`
			`reference.rule) %>%`
			`mutate(reference.rule_group = as.character(reference.rule_group)) %>%`
			`select(-sorting_rule)`
(v0.8.0.9002) eucast_rules() fix for S. maltophilia 2019-10-23 14:48:25 +02:00
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# Translations`
(v0.7.0.9001) new pathovars, small fixes 2019-06-07 22:47:37 +02:00			`translations_file <- utils::read.delim(file = "data-raw/translations.tsv",`
(v0.6.1.9050) support staged install 2019-06-01 20:40:49 +02:00			`sep = "\t",`
			`stringsAsFactors = FALSE,`
			`header = TRUE,`
			`blank.lines.skip = TRUE,`
			`fill = TRUE,`
			`strip.white = TRUE,`
			`encoding = "UTF-8",`
			`fileEncoding = "UTF-8",`
(v0.7.0.9001) new pathovars, small fixes 2019-06-07 22:47:37 +02:00			`na.strings = c(NA, "", NULL),`
			`allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"`
			`quote = "")`
(v0.6.1.9050) support staged install 2019-06-01 20:40:49 +02:00
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# Old microorganism codes`
(v0.7.1.9075) new microorganism codes 2019-09-18 15:46:09 +02:00			`microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")`

(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector`
			`INTRINSIC_R <- create_intr_resistance()`

			# for checking input in `language` argument in e.g. mo_() and ab_() functions
			`LANGUAGES_SUPPORTED <- sort(c("en", unique(translations_file$lang)))`

			`# vectors of CoNS and CoPS, improves speed in as.mo()`
			`MO_CONS <- create_species_cons_cops("CoNS")`
			`MO_COPS <- create_species_cons_cops("CoPS")`

			# reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed
			`AB_lookup <- create_AB_lookup()`
			`MO_lookup <- create_MO_lookup()`
			`MO.old_lookup <- create_MO.old_lookup()`

(v0.6.1.9050) support staged install 2019-06-01 20:40:49 +02:00			`# Export to package as internal data ----`
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`usethis::use_data(eucast_rules_file,`
			`translations_file,`
			`microorganisms.translation,`
			`INTRINSIC_R,`
			`LANGUAGES_SUPPORTED,`
			`MO_CONS,`
			`MO_COPS,`
			`AB_lookup,`
			`MO_lookup,`
			`MO.old_lookup,`
(v0.6.1.9050) support staged install 2019-06-01 20:40:49 +02:00			`internal = TRUE,`
			`overwrite = TRUE,`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`version = 2,`
			`compress = "xz")`
(v0.6.1.9050) support staged install 2019-06-01 20:40:49 +02:00
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# Export data sets to the repository in different formats -----------------`
(v0.7.1.9073) as.mo() self-learning algorithm 2019-09-15 22:57:30 +02:00
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`write_md5 <- function(object) {`
(v1.3.0.9026) eucast expert rules 3.2 2020-09-24 00:30:11 +02:00			`conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))`
			`writeLines(digest::digest(object, "md5"), conn)`
			`close(conn)`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`}`
			`changed_md5 <- function(object) {`
			`tryCatch({`
			`conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))`
			`compared <- digest::digest(object, "md5") != readLines(con = conn)`
			`close(conn)`
			`compared`
			`}, error = function(e) TRUE)`
			`}`
(v0.9.0.9019) website update 2020-02-01 15:09:36 +01:00			`usethis::ui_done(paste0("Saving raw data to {usethis::ui_value('/data-raw/')}"))`
(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00
(v0.9.0.9023) EUCAST 2020 guidelines 2020-02-14 19:54:13 +01:00			`# give official names to ABs and MOs`
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00			`rsi <- dplyr::mutate(rsi_translation, ab = ab_name(ab), mo = mo_name(mo))`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`if (changed_md5(rsi)) {`
			`write_md5(rsi)`
			`try(saveRDS(rsi, "data-raw/rsi_translation.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(rsi, "data-raw/rsi_translation.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(rsi, "data-raw/rsi_translation.sas"), silent = TRUE)`
			`try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)`
			`try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)`
			`}`
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00
			`mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`if (changed_md5(mo)) {`
			`write_md5(mo)`
			`try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)`
			`try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)`
			`try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)`
			`}`
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`if (changed_md5(microorganisms.old)) {`
			`write_md5(microorganisms.old)`
			`try(saveRDS(microorganisms.old, "data-raw/microorganisms.old.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(microorganisms.old, "data-raw/microorganisms.old.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(microorganisms.old, "data-raw/microorganisms.old.sas"), silent = TRUE)`
			`try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)`
			`try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)`
			`}`
(v1.3.0.9005) website update 2020-08-17 21:49:58 +02:00
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00			`ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`if (changed_md5(ab)) {`
			`write_md5(ab)`
			`try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)`
			`try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)`
			`try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)`
			`}`
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00
			`av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)`
(v1.3.0.9018) language corrections 2020-09-14 12:21:23 +02:00			`if (changed_md5(av)) {`
			`write_md5(av)`
			`try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)`
			`try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)`
			`try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)`
			`}`

			`if (changed_md5(intrinsic_resistant)) {`
			`write_md5(intrinsic_resistant)`
			`try(saveRDS(intrinsic_resistant, "data-raw/intrinsic_resistant.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(intrinsic_resistant, "data-raw/intrinsic_resistant.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(intrinsic_resistant, "data-raw/intrinsic_resistant.sas"), silent = TRUE)`
			`try(haven::write_sav(intrinsic_resistant, "data-raw/intrinsic_resistant.sav"), silent = TRUE)`
			`try(haven::write_dta(intrinsic_resistant, "data-raw/intrinsic_resistant.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(intrinsic_resistant, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)`
			`}`
(v1.3.0.9004) data sets, as.disk() improvement 2020-08-16 21:38:42 +02:00
(v1.5.0.9006) major documentation update 2021-01-18 16:57:56 +01:00			`if (changed_md5(dosage)) {`
			`write_md5(dosage)`
			`try(saveRDS(dosage, "data-raw/dosage.rds", version = 2, compress = "xz"), silent = TRUE)`
			`try(write.table(dosage, "data-raw/dosage.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)`
			`try(haven::write_sas(dosage, "data-raw/dosage.sas"), silent = TRUE)`
			`try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)`
			`try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)`
			`try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)`
			`}`

(v1.5.0.9008) Internal data sets to pkg, speed for auto col determination 2021-01-22 10:20:41 +01:00			`# remove leftovers from global env`
			`current_globalenv <- ls(envir = globalenv())`
			`rm(list = current_globalenv[!current_globalenv %in% old_globalenv])`
			`rm(current_globalenv)`