1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 01:22:25 +02:00

Feather and Parquet files

This commit is contained in:
2022-08-26 22:25:15 +02:00
parent 4da32e3d40
commit 3864ab2fb8
48 changed files with 188 additions and 175 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
data-raw/antivirals.feather Normal file

Binary file not shown.

BIN
data-raw/antivirals.parquet Normal file

Binary file not shown.

Binary file not shown.

BIN
data-raw/dosage.feather Normal file

Binary file not shown.

BIN
data-raw/dosage.parquet Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3b737ed331dd70a51aabf8203faadaa3f61e67c2f2cdbfce9c1b4aca7b61df93
size 28881867
oid sha256:0d69888efa84f05de1de460039fbd137439f76fba0e1a98f605df77a0e3b0ea4
size 65184439

Binary file not shown.

View File

@ -0,0 +1 @@
ec28bed91f4b254e2b33f30b77198325

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c3c78b0121a7adc97218825b701ab157e2d0c01400d797fa5fd40b7abf27d79f
size 32219136
oid sha256:2253a2f9b918972e77af08eec81565219510c10dba4bd957bca1580e4392033e
size 72474624

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4970b36edc301a65f2a2494da93419e2e116302d029ba5a49a4fac82cef8e068
size 17100983
oid sha256:cbe379d131f50308af69d73f5cf74a14b92d6cf892a9b11fd02eaa48bf5b5657
size 21775629

Binary file not shown.

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -24,7 +24,7 @@
# ==================================================================== #
# Run this file to update the package using:
# source("data-raw/_internals.R")
# source("data-raw/pre-commit-hook.R")
library(dplyr, warn.conflicts = FALSE)
devtools::load_all(quiet = TRUE)
@ -42,19 +42,38 @@ EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
stringsAsFactors = FALSE,
header = TRUE,
strip.white = TRUE,
na = c(NA, "", NULL)) %>%
na = c(NA, "", NULL)) %>%
# take the order of the reference.rule_group column in the original data file
mutate(reference.rule_group = factor(reference.rule_group,
levels = unique(reference.rule_group),
ordered = TRUE),
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
arrange(reference.rule_group,
reference.version,
sorting_rule,
reference.rule) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
reference.rule) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
select(-sorting_rule)
TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2)],
function(x) list(exonym = x[1], endonym = x[2])))
LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)
# vectors of CoNS and CoPS, improves speed in as.mo()
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
# Determination of which staphylococcal species are CoNS/CoPS according to:
@ -66,7 +85,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
if (type == "CoNS") {
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
"croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis",
@ -99,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
create_MO_fullname_lower <- function() {
MO_lookup <- AMR::microorganisms
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$species,
MO_lookup$subspecies)))
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
@ -175,7 +194,7 @@ create_AB_lookup <- function() {
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
c("ab", "atc", "cid", "name",
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
drop = FALSE]),
@ -189,7 +208,10 @@ create_AB_lookup <- function() {
AB_LOOKUP <- create_AB_lookup()
# Export to package as internal data ----
usethis::use_data(EUCAST_RULES_DF,
usethis::use_data(EUCAST_RULES_DF,
TRANSLATIONS,
LANGUAGES_SUPPORTED_NAMES,
LANGUAGES_SUPPORTED,
MO_CONS,
MO_COPS,
MO_STREP_ABCG,
@ -232,23 +254,35 @@ usethis::use_data(EUCAST_RULES_DF,
# Export data sets to the repository in different formats -----------------
for (pkg in c("haven", "openxlsx", "arrow")) {
if (!pkg %in% rownames(utils::installed.packages())) {
message("NOTE: package '", pkg, "' not installed! Ignoring export where this package is required.")
}
}
if ("digest" %in% rownames(utils::installed.packages())) {
md5 <- function(object) digest::digest(object, "md5")
} else {
# will write all files anyway, since MD5 hash cannot be determined
md5 <- function(object) "unknown-md5-hash"
}
write_md5 <- function(object) {
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
writeLines(digest::digest(object, "md5"), conn)
writeLines(md5(object), conn)
close(conn)
}
changed_md5 <- function(object) {
tryCatch({
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
compared <- digest::digest(object, "md5") != readLines(con = conn)
compared <- md5(object) != readLines(con = conn)
close(conn)
compared
}, error = function(e) TRUE)
}
# give official names to ABs and MOs
rsi <- AMR::rsi_translation %>%
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
rsi <- rsi_translation %>%
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
mutate(ab_name = ab_name(ab, language = NULL), .after = ab)
if (changed_md5(rsi)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}"))
@ -259,18 +293,25 @@ if (changed_md5(rsi)) {
try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)
try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)
try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)
try(arrow::write_feather(rsi, "data-raw/rsi_translation.feather"), silent = TRUE)
try(arrow::write_parquet(rsi, "data-raw/rsi_translation.parquet"), silent = TRUE)
}
mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
if (changed_md5(mo)) {
if (changed_md5(microorganisms)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}"))
write_md5(mo)
try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
write_md5(microorganisms)
try(saveRDS(microorganisms, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(dplyr::select(mo, -snomed), "data-raw/microorganisms.xlsx"), silent = TRUE)
max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
mo <- microorganisms
mo$snomed <- max_50_snomed
mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
try(arrow::write_feather(microorganisms, "data-raw/microorganisms.feather"), silent = TRUE)
try(arrow::write_parquet(microorganisms, "data-raw/microorganisms.parquet"), silent = TRUE)
}
if (changed_md5(microorganisms.old)) {
@ -282,30 +323,36 @@ if (changed_md5(microorganisms.old)) {
try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)
try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)
try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)
try(arrow::write_feather(microorganisms.old, "data-raw/microorganisms.old.feather"), silent = TRUE)
try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
}
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
if (changed_md5(ab)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}"))
write_md5(ab)
try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(antibiotics, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)
try(arrow::write_feather(antibiotics, "data-raw/antibiotics.feather"), silent = TRUE)
try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
}
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
if (changed_md5(av)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}"))
write_md5(av)
try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)
try(arrow::write_feather(antivirals, "data-raw/antivirals.feather"), silent = TRUE)
try(arrow::write_parquet(antivirals, "data-raw/antivirals.parquet"), silent = TRUE)
}
# give official names to ABs and MOs
@ -321,6 +368,8 @@ if (changed_md5(intrinsicR)) {
try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
try(arrow::write_feather(intrinsicR, "data-raw/intrinsic_resistant.feather"), silent = TRUE)
try(arrow::write_parquet(intrinsicR, "data-raw/intrinsic_resistant.parquet"), silent = TRUE)
}
if (changed_md5(dosage)) {
@ -332,6 +381,8 @@ if (changed_md5(dosage)) {
try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)
try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)
try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)
try(arrow::write_feather(dosage, "data-raw/dosage.feather"), silent = TRUE)
try(arrow::write_parquet(dosage, "data-raw/dosage.parquet"), silent = TRUE)
}
reset_AMR_locale()
@ -340,3 +391,6 @@ reset_AMR_locale()
current_globalenv <- ls(envir = globalenv())
rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
rm(current_globalenv)
devtools::load_all(quiet = TRUE)
devtools::document()

View File

@ -901,7 +901,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
# to save microorganisms.translation internally to the package
# source("data-raw/_internals.R")
# source("data-raw/pre-commit-hook.R")
# load new data sets again
devtools::load_all(".")

View File

@ -444,7 +444,7 @@ rm(intrinsic_resistant)
# load new data sets again
devtools::load_all(".")
source("data-raw/_internals.R")
source("data-raw/pre-commit-hook.R")
devtools::load_all(".")

Binary file not shown.

Binary file not shown.

Binary file not shown.