diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index 88cf75fa..00000000 --- a/.gitattributes +++ /dev/null @@ -1,6 +0,0 @@ -*.dta filter=lfs diff=lfs merge=lfs -text -*.sas filter=lfs diff=lfs merge=lfs -text -*.sav filter=lfs diff=lfs merge=lfs -text -data-raw/*.dta filter=lfs diff=lfs merge=lfs -text -data-raw/*.sas filter=lfs diff=lfs merge=lfs -text -data-raw/*.sav filter=lfs diff=lfs merge=lfs -text diff --git a/.github/prehooks/post-checkout b/.github/prehooks/post-checkout deleted file mode 100755 index cab40f26..00000000 --- a/.github/prehooks/post-checkout +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-checkout.\n"; exit 2; } -git lfs post-checkout "$@" diff --git a/.github/prehooks/post-commit b/.github/prehooks/post-commit deleted file mode 100755 index 9443f416..00000000 --- a/.github/prehooks/post-commit +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-commit.\n"; exit 2; } -git lfs post-commit "$@" diff --git a/.github/prehooks/post-merge b/.github/prehooks/post-merge deleted file mode 100755 index 828b7089..00000000 --- a/.github/prehooks/post-merge +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/post-merge.\n"; exit 2; } -git lfs post-merge "$@" diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit index dc8344db..3d05a7e7 100755 --- a/.github/prehooks/pre-commit +++ b/.github/prehooks/pre-commit @@ -5,13 +5,13 @@ echo "Running pre-commit hook..." # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ echo ">> Updating R documentation..." if command -v Rscript > /dev/null; then - if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then - Rscript -e "suppressMessages(roxygen2::roxygenise())" + if [ "$(Rscript -e 'cat(all(c('"'pkgload'"', '"'devtools'"', '"'dplyr'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then + Rscript -e "source('data-raw/pre-commit-hook.R')" currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` git add man/* - echo ">> done." + git add R/sysdata.rda else - echo ">> R packages 'roxygen2' and 'pkgload' are not installed!" + echo ">> R package 'pkgload', 'devtools', or 'dplyr' not installed!" currentpkg="your" fi else @@ -30,7 +30,7 @@ currenttagfull=`git describe --tags --abbrev=0` currenttag=`git describe --tags --abbrev=0 | sed 's/v//'` if [ "$currenttag" = "" ]; then # there is no tag, so set tag to 0.0.1 and commit index to current count - echo ">> - no git tags found, create some using v(x).(y).(z)" + echo ">> - no git tags found, create one in this format: 'v(x).(y).(z)'!" currenttag="0.0.1" currentcommit=`git rev-list --count HEAD` else diff --git a/.github/prehooks/pre-commit.save b/.github/prehooks/pre-commit.save deleted file mode 100755 index ddb33066..00000000 --- a/.github/prehooks/pre-commit.save +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/sh - -echo "Running pre-commit hook..." - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -echo ">> Updating R documentation..." -if command -v Rscript > /dev/null; then - if [ "$(Rscript -e 'cat(all(c('"'roxygen2'"', '"'pkgload'"') %in% rownames(installed.packages())))')" = "TRUE" ]; then - Rscript -e "suppressMessages(roxygen2::roxygenise())" - currentpkg=`Rscript -e "cat(pkgload::pkg_name())"` - git add man/* - echo ">> done." - else - echo ">> R packages 'roxygen2' and 'pkgload' are not installed!" - currentpkg="your" - fi -else - echo ">> R is not available on your system!" - currentpkg="your" -fi -echo ">> " - - -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -echo ">> Updating semantic versioning and date..." - -# get tags from remote, and remove tags not on remote: -git fetch origin --prune --prune-tags --quiet -currenttagfull=`git describe --tags --abbrev=0` -currenttag=`git describe --tags --abbrev=0 | sed 's/v//'` -if [ "$currenttag" = "" ]; then - # there is no tag, so set tag to 0.0.1 and commit index to current count - echo ">> - no git tags found, create some using v(x).(y).(z)" - currenttag="0.0.1" - currentcommit=`git rev-list --count HEAD` -else - # there is a tag, so base version number on that - currentcommit=`git rev-list --count ${currenttagfull}..HEAD` - if (( "$currentcommit" == 0 )); then - # tag is new, so this must become the version number - currentversion="$currenttag" - fi - echo ">> - latest tag is '${currenttagfull}', with ${currentcommit} previous commits" -fi -if [ "$currentversion" = "" ]; then - # combine tag (e.g. 1.2.3) and commit number (like 5) increased by 9000 to indicate beta version - currentversion="$currenttag.$((currentcommit + 9001))" # results in e.g. 1.2.3.9005 -fi -echo ">> - ${currentpkg} pkg version set to ${currentversion}" - -# set version number and date to DESCRIPTION file -sed -i -- "s/^Version: .*/Version: ${currentversion}/" DESCRIPTION -sed -i -- "s/^Date: .*/Date: $(date '+%Y-%m-%d')/" DESCRIPTION -echo ">> - updated DESCRIPTION" -# remove leftover on macOS -rm -f DESCRIPTION-- -# add to commit -git add DESCRIPTION - -# set version number to NEWS file -if [ -e "NEWS.md" ]; then - sed -i -- "1s/.*/# ${currentpkg} ${currentversion}/" NEWS.md - echo ">> - updated NEWS.md" - # remove leftover on macOS - rm -f NEWS.md-- - # add to commit - git add NEWS.md -else - echo ">> - no NEWS.md found!" -fi -echo ">> " diff --git a/.github/prehooks/pre-push b/.github/prehooks/pre-push deleted file mode 100755 index 81a9cc63..00000000 --- a/.github/prehooks/pre-push +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting .git/hooks/pre-push.\n"; exit 2; } -git lfs pre-push "$@" diff --git a/.gitignore b/.gitignore index 80ef971a..07d87cca 100755 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ doc .Rhistory .RData .Ruserdata -AMR.Rproj tests/testthat/Rplots.pdf inst/doc /src/*.o diff --git a/AMR.Rproj b/AMR.Rproj new file mode 100644 index 00000000..1604a99f --- /dev/null +++ b/AMR.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: Ask +AlwaysSaveHistory: Yes + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageBuildArgs: --no-build-vignettes +PackageCheckArgs: --no-build-vignettes --as-cran +PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION index 4577074b..ce68c8bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 1.8.1.9027 +Version: 1.8.1.9028 Date: 2022-08-26 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 0b26273f..3c1f1cd3 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,12 @@ -# AMR 1.8.1.9027 +# AMR 1.8.1.9028 ### New * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations. * Support for the following languages: Chinese, Greek, Japanese, Polish, Turkish and Ukrainian. The `AMR` package is now available in 16 languages. ### Changed -* Fix for `as.rsi()` on certain EUCAST breakpoints for MIC values +* Fix for using `as.rsi()` on certain EUCAST breakpoints for MIC values +* Fix for using `as.rsi()` on `NA` values (e.g. `as.rsi(as.disk(NA), ...)`) * Removed `as.integer()` for MIC values, since MIC are not integer values and running `table()` on MIC values consequently failed for not being able to retrieve the level position (as that's how normally `as.integer()` on `factor`s work) * `droplevels()` on MIC will now return a common `factor` at default and will lose the `` class. Use `droplevels(..., as.mic = TRUE)` to keep the `` class. * Small fix for using `ab_from_text()` @@ -19,6 +20,9 @@ ### Other * New website to make use of the new Bootstrap 5 and pkgdown v2.0. The website now contains results for all examples and will be automatically regenerated with every change to our repository, using GitHub Actions * Added Peter Dutey-Magni and Anton Mymrikov as contributors, to thank them for their valuable input +* Our data sets are now also continually exported to Apache Feather and Apache Parquet formats +* Set up Git Large File Storage (Git LFS) for the large SAS and SPSS file formats + # `AMR` 1.8.1 diff --git a/R/aa_globals.R b/R/aa_globals.R index 0e6c71ef..e1491019 100755 --- a/R/aa_globals.R +++ b/R/aa_globals.R @@ -24,7 +24,7 @@ # ==================================================================== # # add new version numbers here, and add the rules themselves to "data-raw/eucast_rules.tsv" and rsi_translation -# (sourcing "data-raw/_internals.R" will process the TSV file) +# (sourcing "data-raw/pre-commit-hook.R" will process the TSV file) EUCAST_VERSION_BREAKPOINTS <- list("11.0" = list(version_txt = "v11.0", year = 2021, title = "'EUCAST Clinical Breakpoint Tables'", diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index d0c2b999..86ecc08e 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -596,7 +596,7 @@ create_eucast_ab_documentation <- function() { ab <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/_internals.R, such as `CARBAPENEMS` + # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` diff --git a/R/ab_selectors.R b/R/ab_selectors.R index fd159a87..98de5044 100644 --- a/R/ab_selectors.R +++ b/R/ab_selectors.R @@ -502,7 +502,7 @@ ab_select_exec <- function(function_name, } if (is.null(ab_class_args)) { - # their upper case equivalent are vectors with class , created in data-raw/_internals.R + # their upper case equivalent are vectors with class , created in data-raw/pre-commit-hook.R # carbapenems() gets its codes from AMR:::AB_CARBAPENEMS abx <- get(paste0("AB_", toupper(function_name)), envir = asNamespace("AMR")) ab_group <- function_name diff --git a/R/eucast_rules.R b/R/eucast_rules.R index ea993a98..e2475b2c 100755 --- a/R/eucast_rules.R +++ b/R/eucast_rules.R @@ -543,7 +543,7 @@ eucast_rules <- function(x, # this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF %>% filter(is.na(have_these_values))) eucast_rules_df <- list(...)$eucast_rules_df } else { - # otherwise internal data file, created in data-raw/_internals.R + # otherwise internal data file, created in data-raw/pre-commit-hook.R eucast_rules_df <- EUCAST_RULES_DF } diff --git a/R/guess_ab_col.R b/R/guess_ab_col.R index efffb4bb..da005f81 100755 --- a/R/guess_ab_col.R +++ b/R/guess_ab_col.R @@ -311,7 +311,7 @@ get_ab_from_namespace <- function(x, cols_ab) { x_new <- character() for (val in x) { if (paste0("AB_", val) %in% ls(envir = asNamespace("AMR"))) { - # antibiotic group names, as defined in data-raw/_internals.R, such as `AB_CARBAPENEMS` + # antibiotic group names, as defined in data-raw/pre-commit-hook.R, such as `AB_CARBAPENEMS` val <- eval(parse(text = paste0("AB_", val)), envir = asNamespace("AMR")) } else if (val %in% AB_lookup$ab) { # separate drugs, such as `AMX` diff --git a/R/rsi.R b/R/rsi.R index bdeddf86..7f811231 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -597,18 +597,18 @@ get_guideline <- function(guideline, reference_data) { guideline_param } -as_rsi_method <- function(method_short = "mic", - method_long = "MIC values", - x = x, - mo = NULL, - ab = deparse(substitute(x)), - guideline = "EUCAST", - uti = FALSE, - conserve_capped_values = FALSE, - add_intrinsic_resistance = FALSE, - reference_data = AMR::rsi_translation, +as_rsi_method <- function(method_short, + method_long, + x, + mo, + ab, + guideline, + uti, + conserve_capped_values, + add_intrinsic_resistance, + reference_data, ...) { - meet_criteria(x) + meet_criteria(x, allow_NA = TRUE) meet_criteria(mo, allow_class = c("mo", "character"), allow_NULL = TRUE) meet_criteria(ab, allow_class = c("ab", "character")) meet_criteria(guideline, allow_class = "character", has_length = 1) diff --git a/R/sysdata.rda b/R/sysdata.rda index 89799e4d..b698b578 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/zzz.R b/R/zzz.R index 396f753f..5d47e1bc 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -133,7 +133,7 @@ create_MO_lookup <- function() { MO_lookup$fullname_lower <- MO_FULLNAME_LOWER } else { MO_lookup$fullname_lower <- "" - warning("MO table updated - Run: source(\"data-raw/_internals.R\")", call. = FALSE) + warning("MO table updated - Run: source(\"data-raw/pre-commit-hook.R\")", call. = FALSE) } # add a column with only "e coli" like combinations diff --git a/data-raw/antibiotics.feather b/data-raw/antibiotics.feather new file mode 100644 index 00000000..2b9d96d9 Binary files /dev/null and b/data-raw/antibiotics.feather differ diff --git a/data-raw/antibiotics.parquet b/data-raw/antibiotics.parquet new file mode 100644 index 00000000..66423b40 Binary files /dev/null and b/data-raw/antibiotics.parquet differ diff --git a/data-raw/antibiotics.rds b/data-raw/antibiotics.rds index 032c544e..40342dba 100644 Binary files a/data-raw/antibiotics.rds and b/data-raw/antibiotics.rds differ diff --git a/data-raw/antivirals.feather b/data-raw/antivirals.feather new file mode 100644 index 00000000..8232a72c Binary files /dev/null and b/data-raw/antivirals.feather differ diff --git a/data-raw/antivirals.parquet b/data-raw/antivirals.parquet new file mode 100644 index 00000000..0b5226bc Binary files /dev/null and b/data-raw/antivirals.parquet differ diff --git a/data-raw/antivirals.rds b/data-raw/antivirals.rds index 2997918f..704e90c4 100644 Binary files a/data-raw/antivirals.rds and b/data-raw/antivirals.rds differ diff --git a/data-raw/dosage.feather b/data-raw/dosage.feather new file mode 100644 index 00000000..cfb753db Binary files /dev/null and b/data-raw/dosage.feather differ diff --git a/data-raw/dosage.parquet b/data-raw/dosage.parquet new file mode 100644 index 00000000..30165311 Binary files /dev/null and b/data-raw/dosage.parquet differ diff --git a/data-raw/intrinsic_resistant.feather b/data-raw/intrinsic_resistant.feather new file mode 100644 index 00000000..aa32ae35 Binary files /dev/null and b/data-raw/intrinsic_resistant.feather differ diff --git a/data-raw/intrinsic_resistant.parquet b/data-raw/intrinsic_resistant.parquet new file mode 100644 index 00000000..08c33434 Binary files /dev/null and b/data-raw/intrinsic_resistant.parquet differ diff --git a/data-raw/microorganisms.dta b/data-raw/microorganisms.dta index d1cbc96a..c6adb5e2 100644 --- a/data-raw/microorganisms.dta +++ b/data-raw/microorganisms.dta @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3b737ed331dd70a51aabf8203faadaa3f61e67c2f2cdbfce9c1b4aca7b61df93 -size 28881867 +oid sha256:0d69888efa84f05de1de460039fbd137439f76fba0e1a98f605df77a0e3b0ea4 +size 65184439 diff --git a/data-raw/microorganisms.feather b/data-raw/microorganisms.feather new file mode 100644 index 00000000..847a37fd Binary files /dev/null and b/data-raw/microorganisms.feather differ diff --git a/data-raw/microorganisms.md5 b/data-raw/microorganisms.md5 new file mode 100644 index 00000000..12ec97a4 --- /dev/null +++ b/data-raw/microorganisms.md5 @@ -0,0 +1 @@ +ec28bed91f4b254e2b33f30b77198325 diff --git a/data-raw/microorganisms.old.feather b/data-raw/microorganisms.old.feather new file mode 100644 index 00000000..6dd4d740 Binary files /dev/null and b/data-raw/microorganisms.old.feather differ diff --git a/data-raw/microorganisms.old.parquet b/data-raw/microorganisms.old.parquet new file mode 100644 index 00000000..2abfbd2c Binary files /dev/null and b/data-raw/microorganisms.old.parquet differ diff --git a/data-raw/microorganisms.parquet b/data-raw/microorganisms.parquet new file mode 100644 index 00000000..c3106dc2 Binary files /dev/null and b/data-raw/microorganisms.parquet differ diff --git a/data-raw/microorganisms.rds b/data-raw/microorganisms.rds index 03de7877..5321a47a 100644 Binary files a/data-raw/microorganisms.rds and b/data-raw/microorganisms.rds differ diff --git a/data-raw/microorganisms.sas b/data-raw/microorganisms.sas index 0dedb8e3..fe980841 100644 --- a/data-raw/microorganisms.sas +++ b/data-raw/microorganisms.sas @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3c78b0121a7adc97218825b701ab157e2d0c01400d797fa5fd40b7abf27d79f -size 32219136 +oid sha256:2253a2f9b918972e77af08eec81565219510c10dba4bd957bca1580e4392033e +size 72474624 diff --git a/data-raw/microorganisms.sav b/data-raw/microorganisms.sav index 95efeff1..c67d21e4 100644 --- a/data-raw/microorganisms.sav +++ b/data-raw/microorganisms.sav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4970b36edc301a65f2a2494da93419e2e116302d029ba5a49a4fac82cef8e068 -size 17100983 +oid sha256:cbe379d131f50308af69d73f5cf74a14b92d6cf892a9b11fd02eaa48bf5b5657 +size 21775629 diff --git a/data-raw/microorganisms.xlsx b/data-raw/microorganisms.xlsx index 4c8713b8..0932db24 100644 Binary files a/data-raw/microorganisms.xlsx and b/data-raw/microorganisms.xlsx differ diff --git a/data-raw/_internals.R b/data-raw/pre-commit-hook.R similarity index 82% rename from data-raw/_internals.R rename to data-raw/pre-commit-hook.R index 354f1129..e7d17686 100644 --- a/data-raw/_internals.R +++ b/data-raw/pre-commit-hook.R @@ -9,7 +9,7 @@ # (c) 2018-2022 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -24,7 +24,7 @@ # ==================================================================== # # Run this file to update the package using: -# source("data-raw/_internals.R") +# source("data-raw/pre-commit-hook.R") library(dplyr, warn.conflicts = FALSE) devtools::load_all(quiet = TRUE) @@ -42,19 +42,38 @@ EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv", stringsAsFactors = FALSE, header = TRUE, strip.white = TRUE, - na = c(NA, "", NULL)) %>% + na = c(NA, "", NULL)) %>% # take the order of the reference.rule_group column in the original data file mutate(reference.rule_group = factor(reference.rule_group, levels = unique(reference.rule_group), ordered = TRUE), - sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% + sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>% arrange(reference.rule_group, reference.version, sorting_rule, - reference.rule) %>% - mutate(reference.rule_group = as.character(reference.rule_group)) %>% + reference.rule) %>% + mutate(reference.rule_group = as.character(reference.rule_group)) %>% select(-sorting_rule) +TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv", + sep = "\t", + stringsAsFactors = FALSE, + header = TRUE, + blank.lines.skip = TRUE, + fill = TRUE, + strip.white = TRUE, + encoding = "UTF-8", + fileEncoding = "UTF-8", + na.strings = c(NA, "", NULL), + allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1" + quote = "") + +LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")), + lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2)], + function(x) list(exonym = x[1], endonym = x[2]))) + +LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES) + # vectors of CoNS and CoPS, improves speed in as.mo() create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { # Determination of which staphylococcal species are CoNS/CoPS according to: @@ -66,7 +85,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE] if (type == "CoNS") { MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae", - "auricularis", "borealis", "caeli", "capitis", "caprae", + "auricularis", "borealis", "caeli", "capitis", "caprae", "carnosus", "casei", "chromogenes", "cohnii", "condimenti", "croceilyticus", "debuckii", "devriesei", "edaphicus", "epidermidis", @@ -99,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) { create_MO_fullname_lower <- function() { MO_lookup <- AMR::microorganisms # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc. - MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, + MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, MO_lookup$species, MO_lookup$subspecies))) ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE) @@ -175,7 +194,7 @@ create_AB_lookup <- function() { AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name) AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name) AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name) - AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, + AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, c("ab", "atc", "cid", "name", colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]), drop = FALSE]), @@ -189,7 +208,10 @@ create_AB_lookup <- function() { AB_LOOKUP <- create_AB_lookup() # Export to package as internal data ---- -usethis::use_data(EUCAST_RULES_DF, +usethis::use_data(EUCAST_RULES_DF, + TRANSLATIONS, + LANGUAGES_SUPPORTED_NAMES, + LANGUAGES_SUPPORTED, MO_CONS, MO_COPS, MO_STREP_ABCG, @@ -232,23 +254,35 @@ usethis::use_data(EUCAST_RULES_DF, # Export data sets to the repository in different formats ----------------- +for (pkg in c("haven", "openxlsx", "arrow")) { + if (!pkg %in% rownames(utils::installed.packages())) { + message("NOTE: package '", pkg, "' not installed! Ignoring export where this package is required.") + } +} +if ("digest" %in% rownames(utils::installed.packages())) { + md5 <- function(object) digest::digest(object, "md5") +} else { + # will write all files anyway, since MD5 hash cannot be determined + md5 <- function(object) "unknown-md5-hash" +} + write_md5 <- function(object) { conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) - writeLines(digest::digest(object, "md5"), conn) + writeLines(md5(object), conn) close(conn) } changed_md5 <- function(object) { tryCatch({ conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5")) - compared <- digest::digest(object, "md5") != readLines(con = conn) + compared <- md5(object) != readLines(con = conn) close(conn) compared }, error = function(e) TRUE) } # give official names to ABs and MOs -rsi <- AMR::rsi_translation %>% - mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% +rsi <- rsi_translation %>% + mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>% mutate(ab_name = ab_name(ab, language = NULL), .after = ab) if (changed_md5(rsi)) { usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}")) @@ -259,18 +293,25 @@ if (changed_md5(rsi)) { try(haven::write_sav(rsi, "data-raw/rsi_translation.sav"), silent = TRUE) try(haven::write_dta(rsi, "data-raw/rsi_translation.dta"), silent = TRUE) try(openxlsx::write.xlsx(rsi, "data-raw/rsi_translation.xlsx"), silent = TRUE) + try(arrow::write_feather(rsi, "data-raw/rsi_translation.feather"), silent = TRUE) + try(arrow::write_parquet(rsi, "data-raw/rsi_translation.parquet"), silent = TRUE) } -mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character) -if (changed_md5(mo)) { +if (changed_md5(microorganisms)) { usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}")) - write_md5(mo) - try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE) + write_md5(microorganisms) + try(saveRDS(microorganisms, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE) try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) - try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE) - try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE) - try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE) - try(openxlsx::write.xlsx(dplyr::select(mo, -snomed), "data-raw/microorganisms.xlsx"), silent = TRUE) + max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " ")) + mo <- microorganisms + mo$snomed <- max_50_snomed + mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character) + try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE) + try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE) + try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE) + try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE) + try(arrow::write_feather(microorganisms, "data-raw/microorganisms.feather"), silent = TRUE) + try(arrow::write_parquet(microorganisms, "data-raw/microorganisms.parquet"), silent = TRUE) } if (changed_md5(microorganisms.old)) { @@ -282,30 +323,36 @@ if (changed_md5(microorganisms.old)) { try(haven::write_sav(microorganisms.old, "data-raw/microorganisms.old.sav"), silent = TRUE) try(haven::write_dta(microorganisms.old, "data-raw/microorganisms.old.dta"), silent = TRUE) try(openxlsx::write.xlsx(microorganisms.old, "data-raw/microorganisms.old.xlsx"), silent = TRUE) + try(arrow::write_feather(microorganisms.old, "data-raw/microorganisms.old.feather"), silent = TRUE) + try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE) } ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character) if (changed_md5(ab)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}")) write_md5(ab) - try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE) - try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) + try(saveRDS(antibiotics, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE) + try(write.table(antibiotics, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) try(haven::write_sas(ab, "data-raw/antibiotics.sas"), silent = TRUE) try(haven::write_sav(ab, "data-raw/antibiotics.sav"), silent = TRUE) try(haven::write_dta(ab, "data-raw/antibiotics.dta"), silent = TRUE) try(openxlsx::write.xlsx(ab, "data-raw/antibiotics.xlsx"), silent = TRUE) + try(arrow::write_feather(antibiotics, "data-raw/antibiotics.feather"), silent = TRUE) + try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE) } av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character) if (changed_md5(av)) { usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}")) write_md5(av) - try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE) + try(saveRDS(antivirals, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE) try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE) try(haven::write_sas(av, "data-raw/antivirals.sas"), silent = TRUE) try(haven::write_sav(av, "data-raw/antivirals.sav"), silent = TRUE) try(haven::write_dta(av, "data-raw/antivirals.dta"), silent = TRUE) try(openxlsx::write.xlsx(av, "data-raw/antivirals.xlsx"), silent = TRUE) + try(arrow::write_feather(antivirals, "data-raw/antivirals.feather"), silent = TRUE) + try(arrow::write_parquet(antivirals, "data-raw/antivirals.parquet"), silent = TRUE) } # give official names to ABs and MOs @@ -321,6 +368,8 @@ if (changed_md5(intrinsicR)) { try(haven::write_sav(intrinsicR, "data-raw/intrinsic_resistant.sav"), silent = TRUE) try(haven::write_dta(intrinsicR, "data-raw/intrinsic_resistant.dta"), silent = TRUE) try(openxlsx::write.xlsx(intrinsicR, "data-raw/intrinsic_resistant.xlsx"), silent = TRUE) + try(arrow::write_feather(intrinsicR, "data-raw/intrinsic_resistant.feather"), silent = TRUE) + try(arrow::write_parquet(intrinsicR, "data-raw/intrinsic_resistant.parquet"), silent = TRUE) } if (changed_md5(dosage)) { @@ -332,6 +381,8 @@ if (changed_md5(dosage)) { try(haven::write_sav(dosage, "data-raw/dosage.sav"), silent = TRUE) try(haven::write_dta(dosage, "data-raw/dosage.dta"), silent = TRUE) try(openxlsx::write.xlsx(dosage, "data-raw/dosage.xlsx"), silent = TRUE) + try(arrow::write_feather(dosage, "data-raw/dosage.feather"), silent = TRUE) + try(arrow::write_parquet(dosage, "data-raw/dosage.parquet"), silent = TRUE) } reset_AMR_locale() @@ -340,3 +391,6 @@ reset_AMR_locale() current_globalenv <- ls(envir = globalenv()) rm(list = current_globalenv[!current_globalenv %in% old_globalenv]) rm(current_globalenv) + +devtools::load_all(quiet = TRUE) +devtools::document() diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index c2114668..6b21f264 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -901,7 +901,7 @@ usethis::use_data(rsi_translation, overwrite = TRUE, version = 2) usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2) # saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # to save microorganisms.translation internally to the package -# source("data-raw/_internals.R") +# source("data-raw/pre-commit-hook.R") # load new data sets again devtools::load_all(".") diff --git a/data-raw/reproduction_of_microorganisms_update.R b/data-raw/reproduction_of_microorganisms_update.R index 46cc353b..0d7c6969 100644 --- a/data-raw/reproduction_of_microorganisms_update.R +++ b/data-raw/reproduction_of_microorganisms_update.R @@ -444,7 +444,7 @@ rm(intrinsic_resistant) # load new data sets again devtools::load_all(".") -source("data-raw/_internals.R") +source("data-raw/pre-commit-hook.R") devtools::load_all(".") diff --git a/data-raw/rsi_translation.feather b/data-raw/rsi_translation.feather new file mode 100644 index 00000000..8bb4377b Binary files /dev/null and b/data-raw/rsi_translation.feather differ diff --git a/data-raw/rsi_translation.parquet b/data-raw/rsi_translation.parquet new file mode 100644 index 00000000..e4d4a5de Binary files /dev/null and b/data-raw/rsi_translation.parquet differ diff --git a/data-raw/rsi_translation.rds b/data-raw/rsi_translation.rds index ce7f8bc3..9456c896 100644 Binary files a/data-raw/rsi_translation.rds and b/data-raw/rsi_translation.rds differ diff --git a/vignettes/AMR_intro.png b/vignettes/AMR_intro.png new file mode 100644 index 00000000..d3b1e1c0 Binary files /dev/null and b/vignettes/AMR_intro.png differ diff --git a/vignettes/datasets.Rmd b/vignettes/datasets.Rmd index 3c86447f..72ff12c4 100644 --- a/vignettes/datasets.Rmd +++ b/vignettes/datasets.Rmd @@ -13,7 +13,7 @@ editor_options: chunk_output_type: console --- -```{r setup, include = FALSE, results = 'markup'} +```{r setup, include = FALSE, results = "markup"} knitr::opts_chunk$set( warning = FALSE, collapse = TRUE, @@ -40,30 +40,41 @@ download_txt <- function(filename) { ". Find more info about the structure of this data set [here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antibiotics", filename), ".html).\n") github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/" filename <- paste0("../data-raw/", filename) - txt <- paste0(filename, ".txt") rds <- paste0(filename, ".rds") + txt <- paste0(filename, ".txt") + excel <- paste0(filename, ".xlsx") + feather <- paste0(filename, ".feather") + parquet <- paste0(filename, ".parquet") + sas <- paste0(filename, ".sas") spss <- paste0(filename, ".sav") stata <- paste0(filename, ".dta") - sas <- paste0(filename, ".sas") - excel <- paste0(filename, ".xlsx") - create_txt <- function(filename, type, software) { - paste0("* Download as [", software, " file](", github_base, filename, ") (", AMR:::formatted_filesize(filename), ") \n") + create_txt <- function(filename, type, software, exists) { + if (isTRUE(exists)) { + paste0("* Download as [", software, "](", github_base, filename, ") (", + AMR:::formatted_filesize(filename), ") \n") + } else { + paste0("* *(unavailable as ", software, ")*\n") + } } - + if (any(file.exists(rds), - file.exists(excel), file.exists(txt), + file.exists(excel), + file.exists(feather), + file.exists(parquet), file.exists(sas), file.exists(spss), file.exists(stata))) { - msg <- c(msg, "\n**Direct download links:**\n\n") + msg <- c(msg, "\n**Direct download links:**\n\n", + create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)), + create_txt(txt, "txt", "tab-separated text file", file.exists(txt)), + create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)), + create_txt(feather, "feather", "Apache Feather file", file.exists(feather)), + create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)), + create_txt(sas, "sas", "SAS data file", file.exists(sas)), + create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)), + create_txt(stata, "dta", "Stata DTA file", file.exists(stata))) } - if (file.exists(rds)) msg <- c(msg, create_txt(rds, "rds", "R")) - if (file.exists(excel)) msg <- c(msg, create_txt(excel, "xlsx", "Excel")) - if (file.exists(txt)) msg <- c(msg, create_txt(txt, "txt", "plain text")) - if (file.exists(sas)) msg <- c(msg, create_txt(sas, "sas", "SAS")) - if (file.exists(spss)) msg <- c(msg, create_txt(spss, "sav", "SPSS")) - if (file.exists(stata)) msg <- c(msg, create_txt(stata, "dta", "Stata")) paste0(msg, collapse = "") } @@ -87,14 +98,13 @@ print_df <- function(x, rows = 6) { }) %>% knitr::kable(align = "c") } - ``` -All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply tab separated files that are machine-readable and suitable for input in any software program, such as laboratory information systems. +All reference data (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, MS Excel, Apache Feather, Apache Parquet, SPSS, SAS, and Stata. We also provide tab-separated text files that are machine-readable and suitable for input in any software program, such as laboratory information systems. On this page, we explain how to download them and how the structure of the data sets look like. -## Microorganisms (currently accepted names) +## `microorganisms`: Microbial Taxonomy (currently accepted names) `r structure_txt(microorganisms)` @@ -102,6 +112,8 @@ This data set is in R available as `microorganisms`, after you load the `AMR` pa `r download_txt("microorganisms")` +**NOTE: The exported files for Excel, SAS, SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Advice? Use R instead. + ### Source Our full taxonomy of microorganisms is based on the authoritative and comprehensive: @@ -130,7 +142,7 @@ microorganisms %>% print_df() ``` -## Microorganisms (previously accepted names) +## `microorganisms.old`: Microbial Taxonomy (previously accepted names) `r structure_txt(microorganisms.old)` @@ -158,7 +170,7 @@ microorganisms.old %>% ``` -## Antibiotic agents +## `antibiotics`: Antibiotic Agents `r structure_txt(antibiotics)` @@ -183,7 +195,7 @@ antibiotics %>% ``` -## Antiviral agents +## `antivirals`: Antiviral Agents `r structure_txt(antivirals)` @@ -205,7 +217,7 @@ antivirals %>% print_df() ``` -## Interpretation from MIC values / disk diameters to R/SI +## `rsi_translation`: Interpretation from MIC values / disk diameters to R/SI `r structure_txt(rsi_translation)` @@ -227,7 +239,7 @@ rsi_translation %>% ``` -## Intrinsic bacterial resistance +## `intrinsic_resistant`: Intrinsic Bacterial Resistance `r structure_txt(intrinsic_resistant)` @@ -253,7 +265,7 @@ intrinsic_resistant %>% ``` -## Dosage guidelines from EUCAST +## `dosage`: Dosage Guidelines from EUCAST `r structure_txt(dosage)` diff --git a/vignettes/welcome_to_AMR.Rmd b/vignettes/welcome_to_AMR.Rmd index 81b678e9..725b2387 100644 --- a/vignettes/welcome_to_AMR.Rmd +++ b/vignettes/welcome_to_AMR.Rmd @@ -22,15 +22,19 @@ knitr::opts_chunk$set( ) ``` -Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDRO's, find explanation of EUCAST rules, and much more: . +Note: to keep the package size as small as possible, we only included this vignette on CRAN. You can read more vignettes on our website about how to conduct AMR data analysis, determine MDROs, find explanation of EUCAST rules, and much more: . ---- -`AMR` is a free, open-source and independent R package (see [Copyright](https://msberends.github.io/AMR/#copyright)) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. +The `AMR` package is a [free and open-source](https://msberends.github.io/AMR/#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. + +```{r, echo = FALSE, out.width = "555px"} +knitr::include_graphics("AMR_intro.png") +``` After installing this package, R knows `r AMR:::format_included_data_number(AMR::microorganisms)` distinct microbial species and all `r AMR:::format_included_data_number(rbind(AMR::antibiotics[, "atc", drop = FALSE], AMR::antivirals[, "atc", drop = FALSE]))` antibiotic, antimycotic and antiviral drugs by name and code (including ATC, EARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. -The `AMR` package is available in Danish, Dutch, English, French, German, Italian, Portuguese, Russian, Spanish and Swedish. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. +The `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. This package is fully independent of any other R package and works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. Since its first public release in early 2018, this package has been downloaded from more than 175 countries. @@ -56,3 +60,9 @@ This package can be used for: All reference data sets (about microorganisms, antibiotics, R/SI interpretation, EUCAST rules, etc.) in this `AMR` package are publicly and freely available. We continually export our data sets to formats for use in R, SPSS, SAS, Stata and Excel. We also supply flat files that are machine-readable and suitable for input in any software program, such as laboratory information systems. Please find [all download links on our website](https://msberends.github.io/AMR/articles/datasets.html), which is automatically updated with every code change. This R package was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl). This R package formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)) but is actively and durably maintained (see [changelog)](https://msberends.github.io/AMR/news/index.html)) by two public healthcare organisations in the Netherlands. + +---- + + +This AMR package for R is free, open-source software and licensed under the [GNU General Public License v2.0 (GPL-2)](https://msberends.github.io/AMR/LICENSE-text.html). These requirements are consequently legally binding: modifications must be released under the same license when distributing the package, changes made to the code must be documented, source code must be made available when the package is distributed, and a copy of the license and copyright notice must be included with the package. +