mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 01:22:25 +02:00
Feather and Parquet files
This commit is contained in:
BIN
data-raw/antibiotics.feather
Normal file
BIN
data-raw/antibiotics.feather
Normal file
Binary file not shown.
BIN
data-raw/antibiotics.parquet
Normal file
BIN
data-raw/antibiotics.parquet
Normal file
Binary file not shown.
Binary file not shown.
BIN
data-raw/antivirals.feather
Normal file
BIN
data-raw/antivirals.feather
Normal file
Binary file not shown.
BIN
data-raw/antivirals.parquet
Normal file
BIN
data-raw/antivirals.parquet
Normal file
Binary file not shown.
Binary file not shown.
BIN
data-raw/dosage.feather
Normal file
BIN
data-raw/dosage.feather
Normal file
Binary file not shown.
BIN
data-raw/dosage.parquet
Normal file
BIN
data-raw/dosage.parquet
Normal file
Binary file not shown.
BIN
data-raw/intrinsic_resistant.feather
Normal file
BIN
data-raw/intrinsic_resistant.feather
Normal file
Binary file not shown.
BIN
data-raw/intrinsic_resistant.parquet
Normal file
BIN
data-raw/intrinsic_resistant.parquet
Normal file
Binary file not shown.
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3b737ed331dd70a51aabf8203faadaa3f61e67c2f2cdbfce9c1b4aca7b61df93
|
||||
size 28881867
|
||||
oid sha256:0d69888efa84f05de1de460039fbd137439f76fba0e1a98f605df77a0e3b0ea4
|
||||
size 65184439
|
||||
|
BIN
data-raw/microorganisms.feather
Normal file
BIN
data-raw/microorganisms.feather
Normal file
Binary file not shown.
1
data-raw/microorganisms.md5
Normal file
1
data-raw/microorganisms.md5
Normal file
@ -0,0 +1 @@
|
||||
ec28bed91f4b254e2b33f30b77198325
|
BIN
data-raw/microorganisms.old.feather
Normal file
BIN
data-raw/microorganisms.old.feather
Normal file
Binary file not shown.
BIN
data-raw/microorganisms.old.parquet
Normal file
BIN
data-raw/microorganisms.old.parquet
Normal file
Binary file not shown.
BIN
data-raw/microorganisms.parquet
Normal file
BIN
data-raw/microorganisms.parquet
Normal file
Binary file not shown.
Binary file not shown.
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c3c78b0121a7adc97218825b701ab157e2d0c01400d797fa5fd40b7abf27d79f
|
||||
size 32219136
|
||||
oid sha256:2253a2f9b918972e77af08eec81565219510c10dba4bd957bca1580e4392033e
|
||||
size 72474624
|
||||
|
@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4970b36edc301a65f2a2494da93419e2e116302d029ba5a49a4fac82cef8e068
|
||||
size 17100983
|
||||
oid sha256:cbe379d131f50308af69d73f5cf74a14b92d6cf892a9b11fd02eaa48bf5b5657
|
||||
size 21775629
|
||||
|
Binary file not shown.
@ -9,7 +9,7 @@
|
||||
# (c) 2018-2022 Berends MS, Luz CF et al. #
|
||||
# Developed at the University of Groningen, the Netherlands, in #
|
||||
# collaboration with non-profit organisations Certe Medical #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# Diagnostics & Advice, and University Medical Center Groningen. #
|
||||
# #
|
||||
# This R package is free software; you can freely use and distribute #
|
||||
# it for both personal and commercial purposes under the terms of the #
|
||||
@ -24,7 +24,7 @@
|
||||
# ==================================================================== #
|
||||
|
||||
# Run this file to update the package using:
|
||||
# source("data-raw/_internals.R")
|
||||
# source("data-raw/pre-commit-hook.R")
|
||||
|
||||
library(dplyr, warn.conflicts = FALSE)
|
||||
devtools::load_all(quiet = TRUE)
|
||||
@ -42,19 +42,38 @@ EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
strip.white = TRUE,
|
||||
na = c(NA, "", NULL)) %>%
|
||||
na = c(NA, "", NULL)) %>%
|
||||
# take the order of the reference.rule_group column in the original data file
|
||||
mutate(reference.rule_group = factor(reference.rule_group,
|
||||
levels = unique(reference.rule_group),
|
||||
ordered = TRUE),
|
||||
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
|
||||
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
|
||||
arrange(reference.rule_group,
|
||||
reference.version,
|
||||
sorting_rule,
|
||||
reference.rule) %>%
|
||||
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
|
||||
reference.rule) %>%
|
||||
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
|
||||
select(-sorting_rule)
|
||||
|
||||
TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
|
||||
sep = "\t",
|
||||
stringsAsFactors = FALSE,
|
||||
header = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
fill = TRUE,
|
||||
strip.white = TRUE,
|
||||
encoding = "UTF-8",
|
||||
fileEncoding = "UTF-8",
|
||||
na.strings = c(NA, "", NULL),
|
||||
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
|
||||
quote = "")
|
||||
|
||||
LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
|
||||
lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2)],
|
||||
function(x) list(exonym = x[1], endonym = x[2])))
|
||||
|
||||
LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)
|
||||
|
||||
# vectors of CoNS and CoPS, improves speed in as.mo()
|
||||
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
# Determination of which staphylococcal species are CoNS/CoPS according to:
|
||||
@ -66,7 +85,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
|
||||
if (type == "CoNS") {
|
||||
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"auricularis", "borealis", "caeli", "capitis", "caprae",
|
||||
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
|
||||
"croceilyticus",
|
||||
"debuckii", "devriesei", "edaphicus", "epidermidis",
|
||||
@ -99,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
|
||||
create_MO_fullname_lower <- function() {
|
||||
MO_lookup <- AMR::microorganisms
|
||||
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
|
||||
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
|
||||
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
|
||||
MO_lookup$species,
|
||||
MO_lookup$subspecies)))
|
||||
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
|
||||
@ -175,7 +194,7 @@ create_AB_lookup <- function() {
|
||||
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
|
||||
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
|
||||
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
|
||||
c("ab", "atc", "cid", "name",
|
||||
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
|
||||
drop = FALSE]),
|
||||
@ -189,7 +208,10 @@ create_AB_lookup <- function() {
|
||||
AB_LOOKUP <- create_AB_lookup()
|
||||
|
||||
# Export to package as internal data ----
|
||||
usethis::use_data(EUCAST_RULES_DF,
|
||||
usethis::use_data(EUCAST_RULES_DF,
|
||||
TRANSLATIONS,
|
||||
LANGUAGES_SUPPORTED_NAMES,
|
||||
LANGUAGES_SUPPORTED,
|
||||
MO_CONS,
|
||||
MO_COPS,
|
||||
MO_STREP_ABCG,
|
||||
@ -232,23 +254,35 @@ usethis::use_data(EUCAST_RULES_DF,
|
||||
|
||||
# Export data sets to the repository in different formats -----------------
|
||||
|
||||
for (pkg in c("haven", "openxlsx", "arrow")) {
|
||||
if (!pkg %in% rownames(utils::installed.packages())) {
|
||||
message("NOTE: package '", pkg, "' not installed! Ignoring export where this package is required.")
|
||||
}
|
||||
}
|
||||
if ("digest" %in% rownames(utils::installed.packages())) {
|
||||
md5 <- function(object) digest::digest(object, "md5")
|
||||
} else {
|
||||
# will write all files anyway, since MD5 hash cannot be determined
|
||||
md5 <- function(object) "unknown-md5-hash"
|
||||
}
|
||||
|
||||
write_md5 <- function(object) {
|
||||
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
|
||||
writeLines(digest::digest(object, "md5"), conn)
|
||||
writeLines(md5(object), conn)
|
||||
close(conn)
|
||||
}
|
||||
changed_md5 <- function(object) {
|
||||
tryCatch({
|
||||
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
|
||||
compared <- digest::digest(object, "md5") != readLines(con = conn)
|
||||
compared <- md5(object) != readLines(con = conn)
|
||||
close(conn)
|
||||
compared
|
||||
}, error = function(e) TRUE)
|
||||
}
|
||||
|
||||
# give official names to ABs and MOs
|
||||
rsi <- AMR::rsi_translation %>%
|
||||
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
|
||||
rsi <- rsi_translation %>%
|
||||
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
|
||||
mutate(ab_name = ab_name(ab, language = NULL), .after = ab)
|
||||
if (changed_md5(rsi)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}"))
|
||||
@ -259,18 +293,25 @@ if (changed_md5(rsi)) {
|
||||
try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)
|
||||
try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(rsi, "data-raw/rsi_translation.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(rsi, "data-raw/rsi_translation.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
|
||||
if (changed_md5(mo)) {
|
||||
if (changed_md5(microorganisms)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}"))
|
||||
write_md5(mo)
|
||||
try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
write_md5(microorganisms)
|
||||
try(saveRDS(microorganisms, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
|
||||
try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE)
|
||||
try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE)
|
||||
try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(dplyr::select(mo, -snomed), "data-raw/microorganisms.xlsx"), silent = TRUE)
|
||||
max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
|
||||
mo <- microorganisms
|
||||
mo$snomed <- max_50_snomed
|
||||
mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
|
||||
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
|
||||
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
|
||||
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(microorganisms, "data-raw/microorganisms.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(microorganisms, "data-raw/microorganisms.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
if (changed_md5(microorganisms.old)) {
|
||||
@ -282,30 +323,36 @@ if (changed_md5(microorganisms.old)) {
|
||||
try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)
|
||||
try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(microorganisms.old, "data-raw/microorganisms.old.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
|
||||
if (changed_md5(ab)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}"))
|
||||
write_md5(ab)
|
||||
try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
|
||||
try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
try(write.table(antibiotics, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
|
||||
try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
|
||||
try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
|
||||
try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(antibiotics, "data-raw/antibiotics.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
|
||||
if (changed_md5(av)) {
|
||||
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}"))
|
||||
write_md5(av)
|
||||
try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
|
||||
try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
|
||||
try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
|
||||
try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
|
||||
try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(antivirals, "data-raw/antivirals.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(antivirals, "data-raw/antivirals.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
# give official names to ABs and MOs
|
||||
@ -321,6 +368,8 @@ if (changed_md5(intrinsicR)) {
|
||||
try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
|
||||
try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(intrinsicR, "data-raw/intrinsic_resistant.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(intrinsicR, "data-raw/intrinsic_resistant.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
if (changed_md5(dosage)) {
|
||||
@ -332,6 +381,8 @@ if (changed_md5(dosage)) {
|
||||
try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)
|
||||
try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)
|
||||
try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)
|
||||
try(arrow::write_feather(dosage, "data-raw/dosage.feather"), silent = TRUE)
|
||||
try(arrow::write_parquet(dosage, "data-raw/dosage.parquet"), silent = TRUE)
|
||||
}
|
||||
|
||||
reset_AMR_locale()
|
||||
@ -340,3 +391,6 @@ reset_AMR_locale()
|
||||
current_globalenv <- ls(envir = globalenv())
|
||||
rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
|
||||
rm(current_globalenv)
|
||||
|
||||
devtools::load_all(quiet = TRUE)
|
||||
devtools::document()
|
@ -901,7 +901,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
|
||||
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
|
||||
# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
|
||||
# to save microorganisms.translation internally to the package
|
||||
# source("data-raw/_internals.R")
|
||||
# source("data-raw/pre-commit-hook.R")
|
||||
|
||||
# load new data sets again
|
||||
devtools::load_all(".")
|
||||
|
@ -444,7 +444,7 @@ rm(intrinsic_resistant)
|
||||
|
||||
# load new data sets again
|
||||
devtools::load_all(".")
|
||||
source("data-raw/_internals.R")
|
||||
source("data-raw/pre-commit-hook.R")
|
||||
devtools::load_all(".")
|
||||
|
||||
|
||||
|
BIN
data-raw/rsi_translation.feather
Normal file
BIN
data-raw/rsi_translation.feather
Normal file
Binary file not shown.
BIN
data-raw/rsi_translation.parquet
Normal file
BIN
data-raw/rsi_translation.parquet
Normal file
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user