AMR/data-raw/internals.R

164 lines
8.7 KiB
R
Raw Normal View History

# ==================================================================== #
# TITLE #
2020-10-08 11:16:03 +02:00
# Antimicrobial Resistance (AMR) Analysis for R #
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
# LICENCE #
# (c) 2018-2020 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
# Run this file to update the package using: -------------------------------
# source("data-raw/internals.R")
# --------------------------------------------------------------------------
# See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
2020-09-24 00:30:11 +02:00
library(dplyr, warn.conflicts = FALSE)
2019-11-15 15:25:03 +01:00
eucast_rules_file <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
2020-09-14 12:21:23 +02:00
skip = 10,
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
strip.white = TRUE,
2020-09-24 00:30:11 +02:00
na = c(NA, "", NULL)) %>%
# take the order of the reference.rule_group column in the original data file
mutate(reference.rule_group = factor(reference.rule_group,
levels = unique(reference.rule_group),
ordered = TRUE),
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
arrange(reference.rule_group,
reference.version,
sorting_rule,
reference.rule) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
select(-sorting_rule)
# Translations ----
translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
2019-06-01 20:40:49 +02:00
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
2019-06-01 20:40:49 +02:00
2019-09-18 15:46:09 +02:00
# Old microorganism codes -------------------------------------------------
microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")
2019-06-01 20:40:49 +02:00
# Export to package as internal data ----
2019-09-18 15:46:09 +02:00
usethis::use_data(eucast_rules_file, translations_file, microorganisms.translation,
2019-06-01 20:40:49 +02:00
internal = TRUE,
overwrite = TRUE,
2020-09-14 12:21:23 +02:00
version = 2,
compress = "xz")
2019-06-01 20:40:49 +02:00
# Remove from global environment ----
rm(eucast_rules_file)
rm(translations_file)
2019-09-18 15:46:09 +02:00
rm(microorganisms.translation)
2020-02-01 15:09:36 +01:00
# Save to raw data to repository ----
2020-09-14 12:21:23 +02:00
write_md5 <- function(object) {
2020-09-24 00:30:11 +02:00
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
writeLines(digest::digest(object, "md5"), conn)
close(conn)
2020-09-14 12:21:23 +02:00
}
changed_md5 <- function(object) {
tryCatch({
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
compared <- digest::digest(object, "md5") != readLines(con = conn)
close(conn)
compared
}, error = function(e) TRUE)
}
2020-02-01 15:09:36 +01:00
usethis::ui_done(paste0("Saving raw data to {usethis::ui_value('/data-raw/')}"))
devtools::load_all(quiet = TRUE)
2020-02-14 19:54:13 +01:00
# give official names to ABs and MOs
rsi <- dplyr::mutate(rsi_translation, ab = ab_name(ab), mo = mo_name(mo))
2020-09-14 12:21:23 +02:00
if (changed_md5(rsi)) {
write_md5(rsi)
try(saveRDS(rsi, "data-raw/rsi_translation.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(rsi, "data-raw/rsi_translation.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(rsi, "data-raw/rsi_translation.sas"), silent = TRUE)
try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)
try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)
try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)
}
mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
2020-09-14 12:21:23 +02:00
if (changed_md5(mo)) {
write_md5(mo)
try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
}
2020-09-14 12:21:23 +02:00
if (changed_md5(microorganisms.old)) {
write_md5(microorganisms.old)
try(saveRDS(microorganisms.old, "data-raw/microorganisms.old.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(microorganisms.old, "data-raw/microorganisms.old.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(microorganisms.old, "data-raw/microorganisms.old.sas"), silent = TRUE)
try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)
try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)
try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)
}
2020-08-17 21:49:58 +02:00
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
2020-09-14 12:21:23 +02:00
if (changed_md5(ab)) {
write_md5(ab)
try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)
}
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
2020-09-14 12:21:23 +02:00
if (changed_md5(av)) {
write_md5(av)
try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)
}
if (changed_md5(intrinsic_resistant)) {
write_md5(intrinsic_resistant)
try(saveRDS(intrinsic_resistant, "data-raw/intrinsic_resistant.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(intrinsic_resistant, "data-raw/intrinsic_resistant.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(intrinsic_resistant, "data-raw/intrinsic_resistant.sas"), silent = TRUE)
try(haven::write_sav(intrinsic_resistant, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
try(haven::write_dta(intrinsic_resistant, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
try(openxlsx::write.xlsx(intrinsic_resistant, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
}
2020-09-14 12:21:23 +02:00
rm(write_md5)
rm(changed_md5)