Feather and Parquet files

This commit is contained in:
dr. M.S. (Matthijs) Berends 2022-08-26 22:25:15 +02:00
parent 4da32e3d40
commit 3864ab2fb8
48 changed files with 188 additions and 175 deletions

6
.gitattributes vendored
View File

@ -1,6 +0,0 @@
*.dta filter=lfs diff=lfs merge=lfs -text
*.sas filter=lfs diff=lfs merge=lfs -text
*.sav filter=lfs diff=lfs merge=lfs -text
data-raw/*.dta filter=lfs diff=lfs merge=lfs -text
data-raw/*.sas filter=lfs diff=lfs merge=lfs -text
data-raw/*.sav filter=lfs diff=lfs merge=lfs -text

View File

@ -1,3 +0,0 @@
#!/bin/sh
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-checkout.\n"; exit 2; }
git lfs post-checkout "$@"

View File

@ -1,3 +0,0 @@
#!/bin/sh
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-commit.\n"; exit 2; }
git lfs post-commit "$@"

View File

@ -1,3 +0,0 @@
#!/bin/sh
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-merge.\n"; exit 2; }
git lfs post-merge "$@"

View File

@ -5,13 +5,13 @@ echo "Running pre-commit hook..."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo ">> Updating R documentation..."
if command -v Rscript > /dev/null; then
if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then
Rscript -e "suppressMessages(roxygen2::roxygenise())"
if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then
Rscript -e "source('data-raw/pre-commit-hook.R')"
currentpkg=`Rscript -e "cat(pkgload::pkg_name())"`
git add man/*
echo ">> done."
git add R/sysdata.rda
else
echo ">> R packages 'roxygen2' and 'pkgload' are not installed!"
echo ">> R package 'pkgload', 'devtools', or 'dplyr' not installed!"
currentpkg="your"
fi
else
@ -30,7 +30,7 @@ currenttagfull=`git describe --tags --abbrev=0`
currenttag=`git describe --tags --abbrev=0 | sed 's/v//'`
if [ "$currenttag" = "" ]; then
# there is no tag, so set tag to 0.0.1 and commit index to current count
echo ">> - no git tags found, create some using v(x).(y).(z)"
echo ">> - no git tags found, create one in this format: 'v(x).(y).(z)'!"
currenttag="0.0.1"
currentcommit=`git rev-list --count HEAD`
else

View File

@ -1,71 +0,0 @@
#!/bin/sh
echo "Running pre-commit hook..."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo ">> Updating R documentation..."
if command -v Rscript > /dev/null; then
if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then
Rscript -e "suppressMessages(roxygen2::roxygenise())"
currentpkg=`Rscript -e "cat(pkgload::pkg_name())"`
git add man/*
echo ">> done."
else
echo ">> R packages 'roxygen2' and 'pkgload' are not installed!"
currentpkg="your"
fi
else
echo ">> R is not available on your system!"
currentpkg="your"
fi
echo ">> "
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
echo ">> Updating semantic versioning and date..."
# get tags from remote, and remove tags not on remote:
git fetch origin --prune --prune-tags --quiet
currenttagfull=`git describe --tags --abbrev=0`
currenttag=`git describe --tags --abbrev=0 | sed 's/v//'`
if [ "$currenttag" = "" ]; then
# there is no tag, so set tag to 0.0.1 and commit index to current count
echo ">> - no git tags found, create some using v(x).(y).(z)"
currenttag="0.0.1"
currentcommit=`git rev-list --count HEAD`
else
# there is a tag, so base version number on that
currentcommit=`git rev-list --count ${currenttagfull}..HEAD`
if (( "$currentcommit" == 0 )); then
# tag is new, so this must become the version number
currentversion="$currenttag"
fi
echo ">> - latest tag is '${currenttagfull}', with ${currentcommit} previous commits"
fi
if [ "$currentversion" = "" ]; then
# combine tag (e.g. 1.2.3) and commit number (like 5) increased by 9000 to indicate beta version
currentversion="$currenttag.$((currentcommit + 9001))" # results in e.g. 1.2.3.9005
fi
echo ">> - ${currentpkg} pkg version set to ${currentversion}"
# set version number and date to DESCRIPTION file
sed -i -- "s/^Version: .*/Version: ${currentversion}/" DESCRIPTION
sed -i -- "s/^Date: .*/Date: $(date '+%Y-%m-%d')/" DESCRIPTION
echo ">> - updated DESCRIPTION"
# remove leftover on macOS
rm -f DESCRIPTION--
# add to commit
git add DESCRIPTION
# set version number to NEWS file
if [ -e "NEWS.md" ]; then
sed -i -- "1s/.*/# ${currentpkg} ${currentversion}/" NEWS.md
echo ">> - updated NEWS.md"
# remove leftover on macOS
rm -f NEWS.md--
# add to commit
git add NEWS.md
else
echo ">> - no NEWS.md found!"
fi
echo ">> "

View File

@ -1,3 +0,0 @@
#!/bin/sh
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/pre-push.\n"; exit 2; }
git lfs pre-push "$@"

1
.gitignore vendored
View File

@ -5,7 +5,6 @@ doc
.Rhistory
.RData
.Ruserdata
AMR.Rproj
tests/testthat/Rplots.pdf
inst/doc
/src/*.o

22
AMR.Rproj Normal file
View File

@ -0,0 +1,22 @@
Version: 1.0
RestoreWorkspace: No
SaveWorkspace: Ask
AlwaysSaveHistory: Yes
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageBuildArgs: --no-build-vignettes
PackageCheckArgs: --no-build-vignettes --as-cran
PackageRoxygenize: rd,collate,namespace

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.8.1.9027
Version: 1.8.1.9028
Date: 2022-08-26
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,11 +1,12 @@
# AMR 1.8.1.9027
# AMR 1.8.1.9028
### New
* EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
* Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. The `AMR` package is now available in 16 languages.
### Changed
* Fix for `as.rsi()` on certain EUCAST breakpoints for MIC values
* Fix for using `as.rsi()` on certain EUCAST breakpoints for MIC values
* Fix for using `as.rsi()` on `NA` values (e.g. `as.rsi(as.disk(NA), ...)`)
* Removed `as.integer()` for MIC values, since MIC are not integer values and running `table()` on MIC values consequently failed for not being able to retrieve the level position (as that's how normally `as.integer()` on `factor`s work)
* `droplevels()` on MIC will now return a common `factor` at default and will lose the `<mic>` class. Use `droplevels(..., as.mic = TRUE)` to keep the `<mic>` class.
* Small fix for using `ab_from_text()`
@ -19,6 +20,9 @@
### Other
* New website to make use of the new Bootstrap 5 and pkgdown v2.0. The website now contains results for all examples and will be automatically regenerated with every change to our repository, using GitHub Actions
* Added Peter Dutey-Magni and Anton Mymrikov as contributors, to thank them for their valuable input
* Our data sets are now also continually exported to Apache Feather and Apache Parquet formats
* Set up Git Large File Storage (Git LFS) for the large SAS and SPSS file formats
# `AMR` 1.8.1

View File

@ -24,7 +24,7 @@
# ==================================================================== #
# add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and rsi_translation
# (sourcing "data-raw/_internals.R" will process the TSV file)
# (sourcing "data-raw/pre-commit-hook.R" will process the TSV file)
EUCAST_VERSION_BREAKPOINTS <- list("11.0" = list(version_txt = "v11.0",
year = 2021,
title = "'EUCAST Clinical Breakpoint Tables'",

View File

@ -596,7 +596,7 @@ create_eucast_ab_documentation <- function() {
ab <- character()
for (val in x) {
if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) {
# antibiotic group names, as defined in data-raw/_internals.R, such as `CARBAPENEMS`
# antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `CARBAPENEMS`
val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR"))
} else if (val %in% AB_lookup$ab) {
# separate drugs, such as `AMX`

View File

@ -502,7 +502,7 @@ ab_select_exec <- function(function_name,
}
if (is.null(ab_class_args)) {
# their upper case equivalent are vectors with class <ab>, created in data-raw/_internals.R
# their upper case equivalent are vectors with class <ab>, created in data-raw/pre-commit-hook.R
# carbapenems() gets its codes from AMR:::AB_CARBAPENEMS
abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR"))
ab_group <- function_name

View File

@ -543,7 +543,7 @@ eucast_rules <- function(x,
# this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF %>% filter(is.na(have_these_values)))
eucast_rules_df <- list(...)$eucast_rules_df
} else {
# otherwise internal data file, created in data-raw/_internals.R
# otherwise internal data file, created in data-raw/pre-commit-hook.R
eucast_rules_df <- EUCAST_RULES_DF
}

View File

@ -311,7 +311,7 @@ get_ab_from_namespace <- function(x, cols_ab) {
x_new <- character()
for (val in x) {
if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) {
# antibiotic group names, as defined in data-raw/_internals.R, such as `AB_CARBAPENEMS`
# antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `AB_CARBAPENEMS`
val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR"))
} else if (val %in% AB_lookup$ab) {
# separate drugs, such as `AMX`

22
R/rsi.R
View File

@ -597,18 +597,18 @@ get_guideline <- function(guideline, reference_data) {
guideline_param
}
as_rsi_method <- function(method_short = "mic",
method_long = "MIC values",
x = x,
mo = NULL,
ab = deparse(substitute(x)),
guideline = "EUCAST",
uti = FALSE,
conserve_capped_values = FALSE,
add_intrinsic_resistance = FALSE,
reference_data = AMR::rsi_translation,
as_rsi_method <- function(method_short,
method_long,
x,
mo,
ab,
guideline,
uti,
conserve_capped_values,
add_intrinsic_resistance,
reference_data,
...) {
meet_criteria(x)
meet_criteria(x, allow_NA = TRUE)
meet_criteria(mo, allow_class = c("mo", "character"), allow_NULL = TRUE)
meet_criteria(ab, allow_class = c("ab", "character"))
meet_criteria(guideline, allow_class = "character", has_length = 1)

Binary file not shown.

View File

@ -133,7 +133,7 @@ create_MO_lookup <- function() {
MO_lookup$fullname_lower <- MO_FULLNAME_LOWER
} else {
MO_lookup$fullname_lower <- ""
warning("MO table updated - Run: source(\"data-raw/_internals.R\")", call. = FALSE)
warning("MO table updated - Run: source(\"data-raw/pre-commit-hook.R\")", call. = FALSE)
}
# add a column with only "e coli" like combinations

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
data-raw/antivirals.feather Normal file

Binary file not shown.

BIN
data-raw/antivirals.parquet Normal file

Binary file not shown.

Binary file not shown.

BIN
data-raw/dosage.feather Normal file

Binary file not shown.

BIN
data-raw/dosage.parquet Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3b737ed331dd70a51aabf8203faadaa3f61e67c2f2cdbfce9c1b4aca7b61df93
size 28881867
oid sha256:0d69888efa84f05de1de460039fbd137439f76fba0e1a98f605df77a0e3b0ea4
size 65184439

Binary file not shown.

View File

@ -0,0 +1 @@
ec28bed91f4b254e2b33f30b77198325

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c3c78b0121a7adc97218825b701ab157e2d0c01400d797fa5fd40b7abf27d79f
size 32219136
oid sha256:2253a2f9b918972e77af08eec81565219510c10dba4bd957bca1580e4392033e
size 72474624

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4970b36edc301a65f2a2494da93419e2e116302d029ba5a49a4fac82cef8e068
size 17100983
oid sha256:cbe379d131f50308af69d73f5cf74a14b92d6cf892a9b11fd02eaa48bf5b5657
size 21775629

Binary file not shown.

View File

@ -9,7 +9,7 @@
# (c) 2018-2022 Berends MS, Luz CF et al. #
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
# Diagnostics & Advice, and University Medical Center Groningen. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
@ -24,7 +24,7 @@
# ==================================================================== #
# Run this file to update the package using:
# source("data-raw/_internals.R")
# source("data-raw/pre-commit-hook.R")
library(dplyr, warn.conflicts = FALSE)
devtools::load_all(quiet = TRUE)
@ -42,19 +42,38 @@ EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
stringsAsFactors = FALSE,
header = TRUE,
strip.white = TRUE,
na = c(NA, "", NULL)) %>%
na = c(NA, "", NULL)) %>%
# take the order of the reference.rule_group column in the original data file
mutate(reference.rule_group = factor(reference.rule_group,
levels = unique(reference.rule_group),
ordered = TRUE),
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
arrange(reference.rule_group,
reference.version,
sorting_rule,
reference.rule) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
reference.rule) %>%
mutate(reference.rule_group = as.character(reference.rule_group)) %>%
select(-sorting_rule)
TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,
blank.lines.skip = TRUE,
fill = TRUE,
strip.white = TRUE,
encoding = "UTF-8",
fileEncoding = "UTF-8",
na.strings = c(NA, "", NULL),
allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
quote = "")
LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2)],
function(x) list(exonym = x[1], endonym = x[2])))
LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)
# vectors of CoNS and CoPS, improves speed in as.mo()
create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
# Determination of which staphylococcal species are CoNS/CoPS according to:
@ -66,7 +85,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
if (type == "CoNS") {
MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
"croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis",
@ -99,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
create_MO_fullname_lower <- function() {
MO_lookup <- AMR::microorganisms
# use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
MO_lookup$species,
MO_lookup$subspecies)))
ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
@ -175,7 +194,7 @@ create_AB_lookup <- function() {
AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
c("ab", "atc", "cid", "name",
colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
drop = FALSE]),
@ -189,7 +208,10 @@ create_AB_lookup <- function() {
AB_LOOKUP <- create_AB_lookup()
# Export to package as internal data ----
usethis::use_data(EUCAST_RULES_DF,
usethis::use_data(EUCAST_RULES_DF,
TRANSLATIONS,
LANGUAGES_SUPPORTED_NAMES,
LANGUAGES_SUPPORTED,
MO_CONS,
MO_COPS,
MO_STREP_ABCG,
@ -232,23 +254,35 @@ usethis::use_data(EUCAST_RULES_DF,
# Export data sets to the repository in different formats -----------------
for (pkg in c("haven", "openxlsx", "arrow")) {
if (!pkg %in% rownames(utils::installed.packages())) {
message("NOTE: package '", pkg, "' not installed! Ignoring export where this package is required.")
}
}
if ("digest" %in% rownames(utils::installed.packages())) {
md5 <- function(object) digest::digest(object, "md5")
} else {
# will write all files anyway, since MD5 hash cannot be determined
md5 <- function(object) "unknown-md5-hash"
}
write_md5 <- function(object) {
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
writeLines(digest::digest(object, "md5"), conn)
writeLines(md5(object), conn)
close(conn)
}
changed_md5 <- function(object) {
tryCatch({
conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
compared <- digest::digest(object, "md5") != readLines(con = conn)
compared <- md5(object) != readLines(con = conn)
close(conn)
compared
}, error = function(e) TRUE)
}
# give official names to ABs and MOs
rsi <- AMR::rsi_translation %>%
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
rsi <- rsi_translation %>%
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
mutate(ab_name = ab_name(ab, language = NULL), .after = ab)
if (changed_md5(rsi)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}"))
@ -259,18 +293,25 @@ if (changed_md5(rsi)) {
try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE)
try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE)
try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE)
try(arrow::write_feather(rsi, "data-raw/rsi_translation.feather"), silent = TRUE)
try(arrow::write_parquet(rsi, "data-raw/rsi_translation.parquet"), silent = TRUE)
}
mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
if (changed_md5(mo)) {
if (changed_md5(microorganisms)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}"))
write_md5(mo)
try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
write_md5(microorganisms)
try(saveRDS(microorganisms, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(dplyr::select(mo, -snomed), "data-raw/microorganisms.xlsx"), silent = TRUE)
max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
mo <- microorganisms
mo$snomed <- max_50_snomed
mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
try(arrow::write_feather(microorganisms, "data-raw/microorganisms.feather"), silent = TRUE)
try(arrow::write_parquet(microorganisms, "data-raw/microorganisms.parquet"), silent = TRUE)
}
if (changed_md5(microorganisms.old)) {
@ -282,30 +323,36 @@ if (changed_md5(microorganisms.old)) {
try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE)
try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE)
try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE)
try(arrow::write_feather(microorganisms.old, "data-raw/microorganisms.old.feather"), silent = TRUE)
try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
}
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
if (changed_md5(ab)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}"))
write_md5(ab)
try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(antibiotics, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE)
try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE)
try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE)
try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE)
try(arrow::write_feather(antibiotics, "data-raw/antibiotics.feather"), silent = TRUE)
try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
}
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
if (changed_md5(av)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}"))
write_md5(av)
try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE)
try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE)
try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE)
try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE)
try(arrow::write_feather(antivirals, "data-raw/antivirals.feather"), silent = TRUE)
try(arrow::write_parquet(antivirals, "data-raw/antivirals.parquet"), silent = TRUE)
}
# give official names to ABs and MOs
@ -321,6 +368,8 @@ if (changed_md5(intrinsicR)) {
try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE)
try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE)
try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE)
try(arrow::write_feather(intrinsicR, "data-raw/intrinsic_resistant.feather"), silent = TRUE)
try(arrow::write_parquet(intrinsicR, "data-raw/intrinsic_resistant.parquet"), silent = TRUE)
}
if (changed_md5(dosage)) {
@ -332,6 +381,8 @@ if (changed_md5(dosage)) {
try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE)
try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE)
try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE)
try(arrow::write_feather(dosage, "data-raw/dosage.feather"), silent = TRUE)
try(arrow::write_parquet(dosage, "data-raw/dosage.parquet"), silent = TRUE)
}
reset_AMR_locale()
@ -340,3 +391,6 @@ reset_AMR_locale()
current_globalenv <- ls(envir = globalenv())
rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
rm(current_globalenv)
devtools::load_all(quiet = TRUE)
devtools::document()

View File

@ -901,7 +901,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
# to save microorganisms.translation internally to the package
# source("data-raw/_internals.R")
# source("data-raw/pre-commit-hook.R")
# load new data sets again
devtools::load_all(".")

View File

@ -444,7 +444,7 @@ rm(intrinsic_resistant)
# load new data sets again
devtools::load_all(".")
source("data-raw/_internals.R")
source("data-raw/pre-commit-hook.R")
devtools::load_all(".")

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
vignettes/AMR_intro.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

View File

@ -13,7 +13,7 @@ editor_options:
chunk_output_type: console
---
```{r setup, include = FALSE, results = 'markup'}
```{r setup, include = FALSE, results = "markup"}
knitr::opts_chunk$set(
warning = FALSE,
collapse = TRUE,
@ -40,30 +40,41 @@ download_txt <- function(filename) {
". Find more info about the structure of this data set [here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antibiotics", filename), ".html).\n")
github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/"
filename <- paste0("../data-raw/", filename)
txt <- paste0(filename, ".txt")
rds <- paste0(filename, ".rds")
txt <- paste0(filename, ".txt")
excel <- paste0(filename, ".xlsx")
feather <- paste0(filename, ".feather")
parquet <- paste0(filename, ".parquet")
sas <- paste0(filename, ".sas")
spss <- paste0(filename, ".sav")
stata <- paste0(filename, ".dta")
sas <- paste0(filename, ".sas")
excel <- paste0(filename, ".xlsx")
create_txt <- function(filename, type, software) {
paste0("* Download as [", software, " file](", github_base, filename, ") (", AMR:::formatted_filesize(filename), ") \n")
create_txt <- function(filename, type, software, exists) {
if (isTRUE(exists)) {
paste0("* Download as [", software, "](", github_base, filename, ") (",
AMR:::formatted_filesize(filename), ") \n")
} else {
paste0("* *(unavailable as ", software, ")*\n")
}
}
if (any(file.exists(rds),
file.exists(excel),
file.exists(txt),
file.exists(excel),
file.exists(feather),
file.exists(parquet),
file.exists(sas),
file.exists(spss),
file.exists(stata))) {
msg <- c(msg, "\n**Direct download links:**\n\n")
msg <- c(msg, "\n**Direct download links:**\n\n",
create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)),
create_txt(txt, "txt", "tab-separated text file", file.exists(txt)),
create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)),
create_txt(feather, "feather", "Apache Feather file", file.exists(feather)),
create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)),
create_txt(sas, "sas", "SAS data file", file.exists(sas)),
create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)),
create_txt(stata, "dta", "Stata DTA file", file.exists(stata)))
}
if (file.exists(rds)) msg <- c(msg, create_txt(rds, "rds", "R"))
if (file.exists(excel)) msg <- c(msg, create_txt(excel, "xlsx", "Excel"))
if (file.exists(txt)) msg <- c(msg, create_txt(txt, "txt", "plain text"))
if (file.exists(sas)) msg <- c(msg, create_txt(sas, "sas", "SAS"))
if (file.exists(spss)) msg <- c(msg, create_txt(spss, "sav", "SPSS"))
if (file.exists(stata)) msg <- c(msg, create_txt(stata, "dta", "Stata"))
paste0(msg, collapse = "")
}
@ -87,14 +98,13 @@ print_df <- function(x, rows = 6) {
}) %>%
knitr::kable(align = "c")
}
```
All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply tab separated files that are machine-readable and suitable for input in any software program, such as laboratory information systems.
All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated text files that are machine-readable and suitable for input in any software program, such as laboratory information systems.
On this page, we explain how to download them and how the structure of the data sets look like.
## Microorganisms (currently accepted names)
## `microorganisms`: Microbial Taxonomy (currently accepted names)
`r structure_txt(microorganisms)`
@ -102,6 +112,8 @@ This data set is in R available as `microorganisms`, after you load the `AMR` pa
`r download_txt("microorganisms")`
**NOTE: The exported files for Excel, SAS, SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Advice? Use R instead.
### Source
Our full taxonomy of microorganisms is based on the authoritative and comprehensive:
@ -130,7 +142,7 @@ microorganisms %>%
print_df()
```
## Microorganisms (previously accepted names)
## `microorganisms.old`: Microbial Taxonomy (previously accepted names)
`r structure_txt(microorganisms.old)`
@ -158,7 +170,7 @@ microorganisms.old %>%
```
## Antibiotic agents
## `antibiotics`: Antibiotic Agents
`r structure_txt(antibiotics)`
@ -183,7 +195,7 @@ antibiotics %>%
```
## Antiviral agents
## `antivirals`: Antiviral Agents
`r structure_txt(antivirals)`
@ -205,7 +217,7 @@ antivirals %>%
print_df()
```
## Interpretation from MIC values / disk diameters to R/SI
## `rsi_translation`: Interpretation from MIC values / disk diameters to R/SI
`r structure_txt(rsi_translation)`
@ -227,7 +239,7 @@ rsi_translation %>%
```
## Intrinsic bacterial resistance
## `intrinsic_resistant`: Intrinsic Bacterial Resistance
`r structure_txt(intrinsic_resistant)`
@ -253,7 +265,7 @@ intrinsic_resistant %>%
```
## Dosage guidelines from EUCAST
## `dosage`: Dosage Guidelines from EUCAST
`r structure_txt(dosage)`

View File

@ -22,15 +22,19 @@ knitr::opts_chunk$set(
)
```
Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDRO's, find explanation of EUCAST rules, and much more: <https://msberends.github.io/AMR/articles/>.
Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDROs, find explanation of EUCAST rules, and much more: <https://msberends.github.io/AMR/articles/>.
----
`AMR` is a free, open-source and independent R package (see [Copyright](https://msberends.github.io/AMR/#copyright)) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting.
The `AMR` package is a [free and open-source](https://msberends.github.io/AMR/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting.
```{r, echo = FALSE, out.width = "555px"}
knitr::include_graphics("AMR_intro.png")
```
After installing this package, R knows `r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species and all `r AMR:::format_included_data_number(rbind(AMR::antibiotics[, "atc", drop = FALSE], AMR::antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data.
The `AMR` package is available in Danish, Dutch, English, French, German, Italian, Portuguese, Russian, Spanish and Swedish. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
The `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
This package is fully independent of any other R package and works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. Since its first public release in early 2018, this package has been downloaded from more than 175 countries.
@ -56,3 +60,9 @@ This package can be used for:
All reference data sets (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are publicly and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply flat files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please find [all download links on our website](https://msberends.github.io/AMR/articles/datasets.html), which is automatically updated with every code change.
This R package was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl). This R package formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)) but is actively and durably maintained (see [changelog)](https://msberends.github.io/AMR/news/index.html)) by two public healthcare organisations in the Netherlands.
----
<small>
This AMR package for R is free, open-source software and licensed under the [GNU General Public License v2.0 (GPL-2)](https://msberends.github.io/AMR/LICENSE-text.html). These requirements are consequently legally binding: modifications must be released under the same license when distributing the package, changes made to the code must be documented, source code must be made available when the package is distributed, and a copy of the license and copyright notice must be included with the package.
</small>