diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 88cf75fa6..000000000 --- a/.gitattributes +++ /dev/null @@ -1,6 +0,0 @@ -*.dta filter=lfs diff=lfs merge=lfs -text -*.sas filter=lfs diff=lfs merge=lfs -text -*.sav filter=lfs diff=lfs merge=lfs -text -data-raw/*.dta filter=lfs diff=lfs merge=lfs -text -data-raw/*.sas filter=lfs diff=lfs merge=lfs -text -data-raw/*.sav filter=lfs diff=lfs merge=lfs -text diff --git a/.github/prehooks/post-checkout b/.github/prehooks/post-checkout deleted file mode 100755 index cab40f264..000000000 --- a/.github/prehooks/post-checkout +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-checkout.\n"; exit 2; } -git lfs post-checkout "$@" diff --git a/.github/prehooks/post-commit b/.github/prehooks/post-commit deleted file mode 100755 index 9443f4161..000000000 --- a/.github/prehooks/post-commit +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-commit.\n"; exit 2; } -git lfs post-commit "$@" diff --git a/.github/prehooks/post-merge b/.github/prehooks/post-merge deleted file mode 100755 index 828b70891..000000000 --- a/.github/prehooks/post-merge +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-merge.\n"; exit 2; } -git lfs post-merge "$@" diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit index dc8344db5..3d05a7e7a 100755 --- a/.github/prehooks/pre-commit +++ b/.github/prehooks/pre-commit @@ -5,13 +5,13 @@ echo "Running pre-commit hook..." # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ echo ">> Updating R documentation..." if command -v Rscript > /dev/null; then - if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then - Rscript -e "suppressMessages(roxygen2::roxygenise())" + if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then + Rscript -e "source('data-raw/pre-commit-hook.R')" currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` git add man/* - echo ">> done." + git add R/sysdata.rda else - echo ">> R packages 'roxygen2' and 'pkgload' are not installed!" + echo ">> R package 'pkgload', 'devtools', or 'dplyr' not installed!" currentpkg="your" fi else @@ -30,7 +30,7 @@ currenttagfull=`git describe --tags --abbrev=0` currenttag=`git describe --tags --abbrev=0 | sed 's/v//'` if [ "$currenttag" = "" ]; then # there is no tag, so set tag to 0.0.1 and commit index to current count - echo ">> - no git tags found, create some using v(x).(y).(z)" + echo ">> - no git tags found, create one in this format: 'v(x).(y).(z)'!" currenttag="0.0.1" currentcommit=`git rev-list --count HEAD` else diff --git a/.github/prehooks/pre-commit.save b/.github/prehooks/pre-commit.save deleted file mode 100755 index ddb330666..000000000 --- a/.github/prehooks/pre-commit.save +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/sh - -echo "Running pre-commit hook..." - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -echo ">> Updating R documentation..." -if command -v Rscript > /dev/null; then - if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then - Rscript -e "suppressMessages(roxygen2::roxygenise())" - currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` - git add man/* - echo ">> done." - else - echo ">> R packages 'roxygen2' and 'pkgload' are not installed!" - currentpkg="your" - fi -else - echo ">> R is not available on your system!" - currentpkg="your" -fi -echo ">> " - - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -echo ">> Updating semantic versioning and date..." - -# get tags from remote, and remove tags not on remote: -git fetch origin --prune --prune-tags --quiet -currenttagfull=`git describe --tags --abbrev=0` -currenttag=`git describe --tags --abbrev=0 | sed 's/v//'` -if [ "$currenttag" = "" ]; then - # there is no tag, so set tag to 0.0.1 and commit index to current count - echo ">> - no git tags found, create some using v(x).(y).(z)" - currenttag="0.0.1" - currentcommit=`git rev-list --count HEAD` -else - # there is a tag, so base version number on that - currentcommit=`git rev-list --count ${currenttagfull}..HEAD` - if (( "$currentcommit" == 0 )); then - # tag is new, so this must become the version number - currentversion="$currenttag" - fi - echo ">> - latest tag is '${currenttagfull}', with ${currentcommit} previous commits" -fi -if [ "$currentversion" = "" ]; then - # combine tag (e.g. 1.2.3) and commit number (like 5) increased by 9000 to indicate beta version - currentversion="$currenttag.$((currentcommit + 9001))" # results in e.g. 1.2.3.9005 -fi -echo ">> - ${currentpkg} pkg version set to ${currentversion}" - -# set version number and date to DESCRIPTION file -sed -i -- "s/^Version: .*/Version: ${currentversion}/" DESCRIPTION -sed -i -- "s/^Date: .*/Date: $(date '+%Y-%m-%d')/" DESCRIPTION -echo ">> - updated DESCRIPTION" -# remove leftover on macOS -rm -f DESCRIPTION-- -# add to commit -git add DESCRIPTION - -# set version number to NEWS file -if [ -e "NEWS.md" ]; then - sed -i -- "1s/.*/# ${currentpkg} ${currentversion}/" NEWS.md - echo ">> - updated NEWS.md" - # remove leftover on macOS - rm -f NEWS.md-- - # add to commit - git add NEWS.md -else - echo ">> - no NEWS.md found!" -fi -echo ">> " diff --git a/.github/prehooks/pre-push b/.github/prehooks/pre-push deleted file mode 100755 index 81a9cc639..000000000 --- a/.github/prehooks/pre-push +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/pre-push.\n"; exit 2; } -git lfs pre-push "$@" diff --git a/.gitignore b/.gitignore index 80ef971ad..07d87ccab 100755 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ doc .Rhistory .RData .Ruserdata -AMR.Rproj tests/testthat/Rplots.pdf inst/doc /src/*.o diff --git a/AMR.Rproj b/AMR.Rproj new file mode 100644 index 000000000..1604a99fc --- /dev/null +++ b/AMR.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: Ask +AlwaysSaveHistory: Yes + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --no-build-vignettes +PackageCheckArgs: --no-build-vignettes --as-cran +PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION index 4577074b0..ce68c8bd1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.8.1.9027 +Version: 1.8.1.9028 Date: 2022-08-26 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 0b26273f5..3c1f1cd30 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,12 @@ -# AMR 1.8.1.9027 +# AMR 1.8.1.9028 ### New * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations. * Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. The `AMR` package is now available in 16 languages. ### Changed -* Fix for `as.rsi()` on certain EUCAST breakpoints for MIC values +* Fix for using `as.rsi()` on certain EUCAST breakpoints for MIC values +* Fix for using `as.rsi()` on `NA` values (e.g. `as.rsi(as.disk(NA), ...)`) * Removed `as.integer()` for MIC values, since MIC are not integer values and running `table()` on MIC values consequently failed for not being able to retrieve the level position (as that's how normally `as.integer()` on `factor`s work) * `droplevels()` on MIC will now return a common `factor` at default and will lose the `` class. Use `droplevels(..., as.mic = TRUE)` to keep the `` class. * Small fix for using `ab_from_text()` @@ -19,6 +20,9 @@ ### Other * New website to make use of the new Bootstrap 5 and pkgdown v2.0. The website now contains results for all examples and will be automatically regenerated with every change to our repository, using GitHub Actions * Added Peter Dutey-Magni and Anton Mymrikov as contributors, to thank them for their valuable input +* Our data sets are now also continually exported to Apache Feather and Apache Parquet formats +* Set up Git Large File Storage (Git LFS) for the large SAS and SPSS file formats + # `AMR` 1.8.1 diff --git a/R/aa_globals.R b/R/aa_globals.R index 0e6c71efd..e14910196 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -24,7 +24,7 @@ # ==================================================================== # # add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and rsi_translation -# (sourcing "data-raw/_internals.R" will process the TSV file) +# (sourcing "data-raw/pre-commit-hook.R" will process the TSV file) EUCAST_VERSION_BREAKPOINTS <- list("11.0" = list(version_txt = "v11.0", year = 2021, title = "'EUCAST Clinical Breakpoint Tables'", diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index d0c2b9996..86ecc08ee 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -596,7 +596,7 @@ create_eucast_ab_documentation <- function() { ab <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/_internals.R, such as `CARBAPENEMS` + # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` diff --git a/R/ab_selectors.R b/R/ab_selectors.R index fd159a87f..98de50448 100644 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -502,7 +502,7 @@ ab_select_exec <- function(function_name, } if (is.null(ab_class_args)) { - # their upper case equivalent are vectors with class , created in data-raw/_internals.R + # their upper case equivalent are vectors with class , created in data-raw/pre-commit-hook.R # carbapenems() gets its codes from AMR:::AB_CARBAPENEMS abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR")) ab_group <- function_name diff --git a/R/eucast_rules.R b/R/eucast_rules.R index ea993a988..e2475b2c7 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -543,7 +543,7 @@ eucast_rules <- function(x, # this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF %>% filter(is.na(have_these_values))) eucast_rules_df <- list(...)$eucast_rules_df } else { - # otherwise internal data file, created in data-raw/_internals.R + # otherwise internal data file, created in data-raw/pre-commit-hook.R eucast_rules_df <- EUCAST_RULES_DF } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index efffb4bb7..da005f813 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -311,7 +311,7 @@ get_ab_from_namespace <- function(x, cols_ab) { x_new <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/_internals.R, such as `AB_CARBAPENEMS` + # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `AB_CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` diff --git a/R/rsi.R b/R/rsi.R index bdeddf867..7f8112319 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -597,18 +597,18 @@ get_guideline <- function(guideline, reference_data) { guideline_param } -as_rsi_method <- function(method_short = "mic", - method_long = "MIC values", - x = x, - mo = NULL, - ab = deparse(substitute(x)), - guideline = "EUCAST", - uti = FALSE, - conserve_capped_values = FALSE, - add_intrinsic_resistance = FALSE, - reference_data = AMR::rsi_translation, +as_rsi_method <- function(method_short, + method_long, + x, + mo, + ab, + guideline, + uti, + conserve_capped_values, + add_intrinsic_resistance, + reference_data, ...) { - meet_criteria(x) + meet_criteria(x, allow_NA = TRUE) meet_criteria(mo, allow_class = c("mo", "character"), allow_NULL = TRUE) meet_criteria(ab, allow_class = c("ab", "character")) meet_criteria(guideline, allow_class = "character", has_length = 1) diff --git a/R/sysdata.rda b/R/sysdata.rda index 89799e4db..b698b5786 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/zzz.R b/R/zzz.R index 396f753fc..5d47e1bc9 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -133,7 +133,7 @@ create_MO_lookup <- function() { MO_lookup$fullname_lower <- MO_FULLNAME_LOWER } else { MO_lookup$fullname_lower <- "" - warning("MO table updated - Run: source(\"data-raw/_internals.R\")", call. = FALSE) + warning("MO table updated - Run: source(\"data-raw/pre-commit-hook.R\")", call. = FALSE) } # add a column with only "e coli" like combinations diff --git a/data-raw/antibiotics.feather b/data-raw/antibiotics.feather new file mode 100644 index 000000000..2b9d96d99 Binary files /dev/null and b/data-raw/antibiotics.feather differ diff --git a/data-raw/antibiotics.parquet b/data-raw/antibiotics.parquet new file mode 100644 index 000000000..66423b407 Binary files /dev/null and b/data-raw/antibiotics.parquet differ diff --git a/data-raw/antibiotics.rds b/data-raw/antibiotics.rds index 032c544ed..40342dba0 100644 Binary files a/data-raw/antibiotics.rds and b/data-raw/antibiotics.rds differ diff --git a/data-raw/antivirals.feather b/data-raw/antivirals.feather new file mode 100644 index 000000000..8232a72c6 Binary files /dev/null and b/data-raw/antivirals.feather differ diff --git a/data-raw/antivirals.parquet b/data-raw/antivirals.parquet new file mode 100644 index 000000000..0b5226bc1 Binary files /dev/null and b/data-raw/antivirals.parquet differ diff --git a/data-raw/antivirals.rds b/data-raw/antivirals.rds index 2997918f5..704e90c4f 100644 Binary files a/data-raw/antivirals.rds and b/data-raw/antivirals.rds differ diff --git a/data-raw/dosage.feather b/data-raw/dosage.feather new file mode 100644 index 000000000..cfb753dbb Binary files /dev/null and b/data-raw/dosage.feather differ diff --git a/data-raw/dosage.parquet b/data-raw/dosage.parquet new file mode 100644 index 000000000..30165311b Binary files /dev/null and b/data-raw/dosage.parquet differ diff --git a/data-raw/intrinsic_resistant.feather b/data-raw/intrinsic_resistant.feather new file mode 100644 index 000000000..aa32ae352 Binary files /dev/null and b/data-raw/intrinsic_resistant.feather differ diff --git a/data-raw/intrinsic_resistant.parquet b/data-raw/intrinsic_resistant.parquet new file mode 100644 index 000000000..08c33434f Binary files /dev/null and b/data-raw/intrinsic_resistant.parquet differ diff --git a/data-raw/microorganisms.dta b/data-raw/microorganisms.dta index d1cbc96af..c6adb5e29 100644 --- a/data-raw/microorganisms.dta +++ b/data-raw/microorganisms.dta @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b737ed331dd70a51aabf8203faadaa3f61e67c2f2cdbfce9c1b4aca7b61df93 -size 28881867 +oid sha256:0d69888efa84f05de1de460039fbd137439f76fba0e1a98f605df77a0e3b0ea4 +size 65184439 diff --git a/data-raw/microorganisms.feather b/data-raw/microorganisms.feather new file mode 100644 index 000000000..847a37fdf Binary files /dev/null and b/data-raw/microorganisms.feather differ diff --git a/data-raw/microorganisms.md5 b/data-raw/microorganisms.md5 new file mode 100644 index 000000000..12ec97a43 --- /dev/null +++ b/data-raw/microorganisms.md5 @@ -0,0 +1 @@ +ec28bed91f4b254e2b33f30b77198325 diff --git a/data-raw/microorganisms.old.feather b/data-raw/microorganisms.old.feather new file mode 100644 index 000000000..6dd4d740a Binary files /dev/null and b/data-raw/microorganisms.old.feather differ diff --git a/data-raw/microorganisms.old.parquet b/data-raw/microorganisms.old.parquet new file mode 100644 index 000000000..2abfbd2c5 Binary files /dev/null and b/data-raw/microorganisms.old.parquet differ diff --git a/data-raw/microorganisms.parquet b/data-raw/microorganisms.parquet new file mode 100644 index 000000000..c3106dc22 Binary files /dev/null and b/data-raw/microorganisms.parquet differ diff --git a/data-raw/microorganisms.rds b/data-raw/microorganisms.rds index 03de78771..5321a47a2 100644 Binary files a/data-raw/microorganisms.rds and b/data-raw/microorganisms.rds differ diff --git a/data-raw/microorganisms.sas b/data-raw/microorganisms.sas index 0dedb8e32..fe9808416 100644 --- a/data-raw/microorganisms.sas +++ b/data-raw/microorganisms.sas @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3c78b0121a7adc97218825b701ab157e2d0c01400d797fa5fd40b7abf27d79f -size 32219136 +oid sha256:2253a2f9b918972e77af08eec81565219510c10dba4bd957bca1580e4392033e +size 72474624 diff --git a/data-raw/microorganisms.sav b/data-raw/microorganisms.sav index 95efeff1b..c67d21e46 100644 --- a/data-raw/microorganisms.sav +++ b/data-raw/microorganisms.sav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4970b36edc301a65f2a2494da93419e2e116302d029ba5a49a4fac82cef8e068 -size 17100983 +oid sha256:cbe379d131f50308af69d73f5cf74a14b92d6cf892a9b11fd02eaa48bf5b5657 +size 21775629 diff --git a/data-raw/microorganisms.xlsx b/data-raw/microorganisms.xlsx index 4c8713b89..0932db244 100644 Binary files a/data-raw/microorganisms.xlsx and b/data-raw/microorganisms.xlsx differ diff --git a/data-raw/_internals.R b/data-raw/pre-commit-hook.R similarity index 82% rename from data-raw/_internals.R rename to data-raw/pre-commit-hook.R index 354f11294..e7d176868 100644 --- a/data-raw/_internals.R +++ b/data-raw/pre-commit-hook.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # # Run this file to update the package using: -# source("data-raw/_internals.R") +# source("data-raw/pre-commit-hook.R") library(dplyr, warn.conflicts = FALSE) devtools::load_all(quiet = TRUE) @@ -42,19 +42,38 @@ EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv", stringsAsFactors = FALSE, header = TRUE, strip.white = TRUE, - na = c(NA, "", NULL)) %>% + na = c(NA, "", NULL)) %>% # take the order of the reference.rule_group column in the original data file mutate(reference.rule_group = factor(reference.rule_group, levels = unique(reference.rule_group), ordered = TRUE), - sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% + sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% arrange(reference.rule_group, reference.version, sorting_rule, - reference.rule) %>% - mutate(reference.rule_group = as.character(reference.rule_group)) %>% + reference.rule) %>% + mutate(reference.rule_group = as.character(reference.rule_group)) %>% select(-sorting_rule) +TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv", + sep = "\t", + stringsAsFactors = FALSE, + header = TRUE, + blank.lines.skip = TRUE, + fill = TRUE, + strip.white = TRUE, + encoding = "UTF-8", + fileEncoding = "UTF-8", + na.strings = c(NA, "", NULL), + allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" + quote = "") + +LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")), + lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2)], + function(x) list(exonym = x[1], endonym = x[2]))) + +LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES) + # vectors of CoNS and CoPS, improves speed in as.mo() create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { # Determination of which staphylococcal species are CoNS/CoPS according to: @@ -66,7 +85,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE] if (type == "CoNS") { MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae", - "auricularis", "borealis", "caeli", "capitis", "caprae", + "auricularis", "borealis", "caeli", "capitis", "caprae", "carnosus", "casei", "chromogenes", "cohnii", "condimenti", "croceilyticus", "debuckii", "devriesei", "edaphicus", "epidermidis", @@ -99,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { create_MO_fullname_lower <- function() { MO_lookup <- AMR::microorganisms # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. - MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, + MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, MO_lookup$species, MO_lookup$subspecies))) ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE) @@ -175,7 +194,7 @@ create_AB_lookup <- function() { AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name) AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name) AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name) - AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, + AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, c("ab", "atc", "cid", "name", colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]), drop = FALSE]), @@ -189,7 +208,10 @@ create_AB_lookup <- function() { AB_LOOKUP <- create_AB_lookup() # Export to package as internal data ---- -usethis::use_data(EUCAST_RULES_DF, +usethis::use_data(EUCAST_RULES_DF, + TRANSLATIONS, + LANGUAGES_SUPPORTED_NAMES, + LANGUAGES_SUPPORTED, MO_CONS, MO_COPS, MO_STREP_ABCG, @@ -232,23 +254,35 @@ usethis::use_data(EUCAST_RULES_DF, # Export data sets to the repository in different formats ----------------- +for (pkg in c("haven", "openxlsx", "arrow")) { + if (!pkg %in% rownames(utils::installed.packages())) { + message("NOTE: package '", pkg, "' not installed! Ignoring export where this package is required.") + } +} +if ("digest" %in% rownames(utils::installed.packages())) { + md5 <- function(object) digest::digest(object, "md5") +} else { + # will write all files anyway, since MD5 hash cannot be determined + md5 <- function(object) "unknown-md5-hash" +} + write_md5 <- function(object) { conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) - writeLines(digest::digest(object, "md5"), conn) + writeLines(md5(object), conn) close(conn) } changed_md5 <- function(object) { tryCatch({ conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) - compared <- digest::digest(object, "md5") != readLines(con = conn) + compared <- md5(object) != readLines(con = conn) close(conn) compared }, error = function(e) TRUE) } # give official names to ABs and MOs -rsi <- AMR::rsi_translation %>% - mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% +rsi <- rsi_translation %>% + mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% mutate(ab_name = ab_name(ab, language = NULL), .after = ab) if (changed_md5(rsi)) { usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}")) @@ -259,18 +293,25 @@ if (changed_md5(rsi)) { try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE) try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE) try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE) + try(arrow::write_feather(rsi, "data-raw/rsi_translation.feather"), silent = TRUE) + try(arrow::write_parquet(rsi, "data-raw/rsi_translation.parquet"), silent = TRUE) } -mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character) -if (changed_md5(mo)) { +if (changed_md5(microorganisms)) { usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}")) - write_md5(mo) - try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE) + write_md5(microorganisms) + try(saveRDS(microorganisms, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE) try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) - try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE) - try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE) - try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE) - try(openxlsx::write.xlsx(dplyr::select(mo, -snomed), "data-raw/microorganisms.xlsx"), silent = TRUE) + max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " ")) + mo <- microorganisms + mo$snomed <- max_50_snomed + mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character) + try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE) + try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE) + try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE) + try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE) + try(arrow::write_feather(microorganisms, "data-raw/microorganisms.feather"), silent = TRUE) + try(arrow::write_parquet(microorganisms, "data-raw/microorganisms.parquet"), silent = TRUE) } if (changed_md5(microorganisms.old)) { @@ -282,30 +323,36 @@ if (changed_md5(microorganisms.old)) { try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE) try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE) try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE) + try(arrow::write_feather(microorganisms.old, "data-raw/microorganisms.old.feather"), silent = TRUE) + try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE) } ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character) if (changed_md5(ab)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}")) write_md5(ab) - try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE) - try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) + try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE) + try(write.table(antibiotics, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE) try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE) try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE) try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE) + try(arrow::write_feather(antibiotics, "data-raw/antibiotics.feather"), silent = TRUE) + try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE) } av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character) if (changed_md5(av)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}")) write_md5(av) - try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE) + try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE) try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE) try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE) try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE) try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE) + try(arrow::write_feather(antivirals, "data-raw/antivirals.feather"), silent = TRUE) + try(arrow::write_parquet(antivirals, "data-raw/antivirals.parquet"), silent = TRUE) } # give official names to ABs and MOs @@ -321,6 +368,8 @@ if (changed_md5(intrinsicR)) { try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE) try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE) try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE) + try(arrow::write_feather(intrinsicR, "data-raw/intrinsic_resistant.feather"), silent = TRUE) + try(arrow::write_parquet(intrinsicR, "data-raw/intrinsic_resistant.parquet"), silent = TRUE) } if (changed_md5(dosage)) { @@ -332,6 +381,8 @@ if (changed_md5(dosage)) { try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE) try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE) try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE) + try(arrow::write_feather(dosage, "data-raw/dosage.feather"), silent = TRUE) + try(arrow::write_parquet(dosage, "data-raw/dosage.parquet"), silent = TRUE) } reset_AMR_locale() @@ -340,3 +391,6 @@ reset_AMR_locale() current_globalenv <- ls(envir = globalenv()) rm(list = current_globalenv[!current_globalenv %in% old_globalenv]) rm(current_globalenv) + +devtools::load_all(quiet = TRUE) +devtools::document() diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index c2114668e..6b21f2648 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -901,7 +901,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2) usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2) # saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # to save microorganisms.translation internally to the package -# source("data-raw/_internals.R") +# source("data-raw/pre-commit-hook.R") # load new data sets again devtools::load_all(".") diff --git a/data-raw/reproduction_of_microorganisms_update.R b/data-raw/reproduction_of_microorganisms_update.R index 46cc353bb..0d7c69696 100644 --- a/data-raw/reproduction_of_microorganisms_update.R +++ b/data-raw/reproduction_of_microorganisms_update.R @@ -444,7 +444,7 @@ rm(intrinsic_resistant) # load new data sets again devtools::load_all(".") -source("data-raw/_internals.R") +source("data-raw/pre-commit-hook.R") devtools::load_all(".") diff --git a/data-raw/rsi_translation.feather b/data-raw/rsi_translation.feather new file mode 100644 index 000000000..8bb4377ba Binary files /dev/null and b/data-raw/rsi_translation.feather differ diff --git a/data-raw/rsi_translation.parquet b/data-raw/rsi_translation.parquet new file mode 100644 index 000000000..e4d4a5dea Binary files /dev/null and b/data-raw/rsi_translation.parquet differ diff --git a/data-raw/rsi_translation.rds b/data-raw/rsi_translation.rds index ce7f8bc35..9456c896c 100644 Binary files a/data-raw/rsi_translation.rds and b/data-raw/rsi_translation.rds differ diff --git a/vignettes/AMR_intro.png b/vignettes/AMR_intro.png new file mode 100644 index 000000000..d3b1e1c03 Binary files /dev/null and b/vignettes/AMR_intro.png differ diff --git a/vignettes/datasets.Rmd b/vignettes/datasets.Rmd index 3c86447f8..72ff12c4f 100644 --- a/vignettes/datasets.Rmd +++ b/vignettes/datasets.Rmd @@ -13,7 +13,7 @@ editor_options: chunk_output_type: console --- -```{r setup, include = FALSE, results = 'markup'} +```{r setup, include = FALSE, results = "markup"} knitr::opts_chunk$set( warning = FALSE, collapse = TRUE, @@ -40,30 +40,41 @@ download_txt <- function(filename) { ". Find more info about the structure of this data set [here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antibiotics", filename), ".html).\n") github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/" filename <- paste0("../data-raw/", filename) - txt <- paste0(filename, ".txt") rds <- paste0(filename, ".rds") + txt <- paste0(filename, ".txt") + excel <- paste0(filename, ".xlsx") + feather <- paste0(filename, ".feather") + parquet <- paste0(filename, ".parquet") + sas <- paste0(filename, ".sas") spss <- paste0(filename, ".sav") stata <- paste0(filename, ".dta") - sas <- paste0(filename, ".sas") - excel <- paste0(filename, ".xlsx") - create_txt <- function(filename, type, software) { - paste0("* Download as [", software, " file](", github_base, filename, ") (", AMR:::formatted_filesize(filename), ") \n") + create_txt <- function(filename, type, software, exists) { + if (isTRUE(exists)) { + paste0("* Download as [", software, "](", github_base, filename, ") (", + AMR:::formatted_filesize(filename), ") \n") + } else { + paste0("* *(unavailable as ", software, ")*\n") + } } - + if (any(file.exists(rds), - file.exists(excel), file.exists(txt), + file.exists(excel), + file.exists(feather), + file.exists(parquet), file.exists(sas), file.exists(spss), file.exists(stata))) { - msg <- c(msg, "\n**Direct download links:**\n\n") + msg <- c(msg, "\n**Direct download links:**\n\n", + create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)), + create_txt(txt, "txt", "tab-separated text file", file.exists(txt)), + create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)), + create_txt(feather, "feather", "Apache Feather file", file.exists(feather)), + create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)), + create_txt(sas, "sas", "SAS data file", file.exists(sas)), + create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)), + create_txt(stata, "dta", "Stata DTA file", file.exists(stata))) } - if (file.exists(rds)) msg <- c(msg, create_txt(rds, "rds", "R")) - if (file.exists(excel)) msg <- c(msg, create_txt(excel, "xlsx", "Excel")) - if (file.exists(txt)) msg <- c(msg, create_txt(txt, "txt", "plain text")) - if (file.exists(sas)) msg <- c(msg, create_txt(sas, "sas", "SAS")) - if (file.exists(spss)) msg <- c(msg, create_txt(spss, "sav", "SPSS")) - if (file.exists(stata)) msg <- c(msg, create_txt(stata, "dta", "Stata")) paste0(msg, collapse = "") } @@ -87,14 +98,13 @@ print_df <- function(x, rows = 6) { }) %>% knitr::kable(align = "c") } - ``` -All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply tab separated files that are machine-readable and suitable for input in any software program, such as laboratory information systems. +All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. On this page, we explain how to download them and how the structure of the data sets look like. -## Microorganisms (currently accepted names) +## `microorganisms`: Microbial Taxonomy (currently accepted names) `r structure_txt(microorganisms)` @@ -102,6 +112,8 @@ This data set is in R available as `microorganisms`, after you load the `AMR` pa `r download_txt("microorganisms")` +**NOTE: The exported files for Excel, SAS, SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Advice? Use R instead. + ### Source Our full taxonomy of microorganisms is based on the authoritative and comprehensive: @@ -130,7 +142,7 @@ microorganisms %>% print_df() ``` -## Microorganisms (previously accepted names) +## `microorganisms.old`: Microbial Taxonomy (previously accepted names) `r structure_txt(microorganisms.old)` @@ -158,7 +170,7 @@ microorganisms.old %>% ``` -## Antibiotic agents +## `antibiotics`: Antibiotic Agents `r structure_txt(antibiotics)` @@ -183,7 +195,7 @@ antibiotics %>% ``` -## Antiviral agents +## `antivirals`: Antiviral Agents `r structure_txt(antivirals)` @@ -205,7 +217,7 @@ antivirals %>% print_df() ``` -## Interpretation from MIC values / disk diameters to R/SI +## `rsi_translation`: Interpretation from MIC values / disk diameters to R/SI `r structure_txt(rsi_translation)` @@ -227,7 +239,7 @@ rsi_translation %>% ``` -## Intrinsic bacterial resistance +## `intrinsic_resistant`: Intrinsic Bacterial Resistance `r structure_txt(intrinsic_resistant)` @@ -253,7 +265,7 @@ intrinsic_resistant %>% ``` -## Dosage guidelines from EUCAST +## `dosage`: Dosage Guidelines from EUCAST `r structure_txt(dosage)` diff --git a/vignettes/welcome_to_AMR.Rmd b/vignettes/welcome_to_AMR.Rmd index 81b678e9d..725b23874 100644 --- a/vignettes/welcome_to_AMR.Rmd +++ b/vignettes/welcome_to_AMR.Rmd @@ -22,15 +22,19 @@ knitr::opts_chunk$set( ) ``` -Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDRO's, find explanation of EUCAST rules, and much more: . +Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDROs, find explanation of EUCAST rules, and much more: . ---- -`AMR` is a free, open-source and independent R package (see [Copyright](https://msberends.github.io/AMR/#copyright)) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. +The `AMR` package is a [free and open-source](https://msberends.github.io/AMR/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. + +```{r, echo = FALSE, out.width = "555px"} +knitr::include_graphics("AMR_intro.png") +``` After installing this package, R knows `r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species and all `r AMR:::format_included_data_number(rbind(AMR::antibiotics[, "atc", drop = FALSE], AMR::antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. -The `AMR` package is available in Danish, Dutch, English, French, German, Italian, Portuguese, Russian, Spanish and Swedish. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. +The `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. This package is fully independent of any other R package and works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. Since its first public release in early 2018, this package has been downloaded from more than 175 countries. @@ -56,3 +60,9 @@ This package can be used for: All reference data sets (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are publicly and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply flat files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please find [all download links on our website](https://msberends.github.io/AMR/articles/datasets.html), which is automatically updated with every code change. This R package was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl). This R package formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)) but is actively and durably maintained (see [changelog)](https://msberends.github.io/AMR/news/index.html)) by two public healthcare organisations in the Netherlands. + +---- + + +This AMR package for R is free, open-source software and licensed under the [GNU General Public License v2.0 (GPL-2)](https://msberends.github.io/AMR/LICENSE-text.html). These requirements are consequently legally binding: modifications must be released under the same license when distributing the package, changes made to the code must be documented, source code must be made available when the package is distributed, and a copy of the license and copyright notice must be included with the package. +