diff --git a/DESCRIPTION b/DESCRIPTION index 6277afe21..223b63bb1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 3.0.1.9044 -Date: 2026-04-04 +Version: 3.0.1.9045 +Date: 2026-04-21 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index bd2e6faa4..bbfe873e4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9044 +# AMR 3.0.1.9045 ### New * Support for clinical breakpoints of 2026 of both CLSI and EUCAST, by adding all of their over 5,700 new clinical breakpoints to the `clinical_breakpoints` data set for usage in `as.sir()`. EUCAST 2026 is now the new default guideline for all MIC and disk diffusion interpretations. @@ -16,7 +16,9 @@ - Functions such as `susceptibility()` count WT as S and NWT as R * Function `interpretive_rules()`, which allows future implementation of CLSI interpretive rules (#235) - `eucast_rules()` has become a wrapper around that function + - Gained argument `add_if_missing` (default: `TRUE`). When set to `FALSE`, rules are only applied to cells that already contain an SIR value; `NA` cells are left untouched. This is useful with `overwrite = TRUE` to update reported results without imputing values for drugs that were not tested (#259) * Function `amr_course()`, which allows for automated download and unpacking of a GitHub repository for e.g. webinar use +* Two new `NA` objects, `NA_ab_` and `NA_mo_`, analogous to base R's `NA_character_` and `NA_integer_`, for use in pipelines that require typed missing values ### Fixes * Fixed a bug in `as.sir()` where values that were purely numeric (e.g., `"1"`) and matched the broad SIR-matching regex would be incorrectly stripped of all content by the Unicode letter filter diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index d611a940e..bae37579f 100644 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -766,7 +766,7 @@ vector_or <- function(v, quotes = TRUE, reverse = FALSE, sort = TRUE, initial_ca } if (isTRUE(quotes)) { if (isTRUE(documentation)) { - quotes <- '"' + quotes <- c("`\"", "\"`") } else { # use cli to format as values quotes <- c("{.val ", "}") diff --git a/R/aa_options.R b/R/aa_options.R index 5a05573df..482a7a181 100755 --- a/R/aa_options.R +++ b/R/aa_options.R @@ -35,7 +35,7 @@ #' `options(AMR_guideline = "CLSI")` #' @section Options (alphabetical order): #' * `AMR_antibiogram_formatting_type` \cr A [numeric] (1-22) to use in [antibiogram()], to indicate which formatting type to use. -#' * `AMR_breakpoint_type` \cr A [character] to use in [as.sir()], to indicate which breakpoint type to use. This must be either `r vector_or(clinical_breakpoints$type)`. +#' * `AMR_breakpoint_type` \cr A [character] to use in [as.sir()], to indicate which breakpoint type to use. This must be either `r vector_or(clinical_breakpoints$type, documentation = TRUE)`. #' * `AMR_capped_mic_handling` \cr A [character] to use in [as.sir()], to indicate how capped MIC values (`<`, `<=`, `>`, `>=`) should be interpreted. Must be one of `"none"`, `"conservative"`, `"standard"`, or `"lenient"` - the default is `"conservative"`. #' * `AMR_cleaning_regex` \cr A [regular expression][base::regex] (case-insensitive) to use in [as.mo()] and all [`mo_*`][mo_property()] functions, to clean the user input. The default is the outcome of [mo_cleaning_regex()], which removes texts between brackets and texts such as "species" and "serovar". #' * `AMR_custom_ab` \cr A file location to an RDS file, to use custom antimicrobial drugs with this package. This is explained in [add_custom_antimicrobials()]. diff --git a/R/ab_property.R b/R/ab_property.R index 3f90aa844..ce0098c05 100755 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -32,7 +32,7 @@ #' Use these functions to return a specific property of an antibiotic from the [antimicrobials] data set. All input values will be evaluated internally with [as.ab()]. #' @param x Any (vector of) text that can be coerced to a valid antibiotic drug code with [as.ab()]. #' @param tolower A [logical] to indicate whether the first [character] of every output should be transformed to a lower case [character]. This will lead to e.g. "polymyxin B" and not "polymyxin b". -#' @param property One of the column names of one of the [antimicrobials] data set: `vector_or(colnames(antimicrobials), sort = FALSE)`. +#' @param property One of the column names of one of the [antimicrobials] data set: `r vector_or(colnames(antimicrobials), documentation = TRUE, sort = FALSE)`. #' @param language Language of the returned text - the default is the current system language (see [get_AMR_locale()]) and can also be set with the package option [`AMR_locale`][AMR-options]. Use `language = NULL` or `language = ""` to prevent translation. #' @param administration Way of administration, either `"oral"` or `"iv"`. #' @param open Browse the URL using [utils::browseURL()]. diff --git a/R/antibiogram.R b/R/antibiogram.R index 40bf874b8..a614ef7ac 100755 --- a/R/antibiogram.R +++ b/R/antibiogram.R @@ -48,8 +48,8 @@ #' - `carbapenems() + "GEN"` #' - `carbapenems() + c("", "GEN")` #' - `carbapenems() + c("", aminoglycosides())` -#' @param mo_transform A character to transform microorganism input - must be `"name"`, `"shortname"` (default), `"gramstain"`, or one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. Can also be `NULL` to not transform the input or `NA` to consider all microorganisms 'unknown'. -#' @param ab_transform A character to transform antimicrobial input - must be one of the column names of the [antimicrobials] data set (defaults to `"name"`): `r vector_or(colnames(antimicrobials), sort = FALSE, quotes = TRUE)`. Can also be `NULL` to not transform the input. +#' @param mo_transform A character to transform microorganism input - must be `"name"`, `"shortname"` (default), `"gramstain"`, or one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, documentation = TRUE)`. Can also be `NULL` to not transform the input or `NA` to consider all microorganisms 'unknown'. +#' @param ab_transform A character to transform antimicrobial input - must be one of the column names of the [antimicrobials] data set (defaults to `"name"`): `r vector_or(colnames(antimicrobials), sort = FALSE, documentation = TRUE)`. Can also be `NULL` to not transform the input. #' @param syndromic_group A column name of `x`, or values calculated to split rows of `x`, e.g. by using [ifelse()] or [`case_when()`][dplyr::case_when()]. See *Examples*. #' @param add_total_n *(deprecated in favour of `formatting_type`)* A [logical] to indicate whether `n_tested` available numbers per pathogen should be added to the table (default is `TRUE`). This will add the lowest and highest number of available isolates per antimicrobial (e.g, if for *E. coli* 200 isolates are available for ciprofloxacin and 150 for amoxicillin, the returned number will be "150-200"). This option is unavailable when `wisca = TRUE`; in that case, use [retrieve_wisca_parameters()] to get the parameters used for WISCA. #' @param only_all_tested (for combination antibiograms): a [logical] to indicate that isolates must be tested for all antimicrobials, see *Details*. diff --git a/R/av_property.R b/R/av_property.R index 41f83df7c..d2001596b 100755 --- a/R/av_property.R +++ b/R/av_property.R @@ -32,7 +32,7 @@ #' Use these functions to return a specific property of an antiviral drug from the [antivirals] data set. All input values will be evaluated internally with [as.av()]. #' @param x Any (vector of) text that can be coerced to a valid antiviral drug code with [as.av()]. #' @param tolower A [logical] to indicate whether the first [character] of every output should be transformed to a lower case [character]. -#' @param property One of the column names of one of the [antivirals] data set: `vector_or(colnames(antivirals), sort = FALSE)`. +#' @param property One of the column names of one of the [antivirals] data set: `r vector_or(colnames(antivirals), documentation = TRUE, sort = FALSE)`. #' @param language Language of the returned text - the default is system language (see [get_AMR_locale()]) and can also be set with the package option [`AMR_locale`][AMR-options]. Use `language = NULL` or `language = ""` to prevent translation. #' @param administration Way of administration, either `"oral"` or `"iv"`. #' @param open Browse the URL using [utils::browseURL()]. diff --git a/R/data.R b/R/data.R index c387b4668..c3e228167 100755 --- a/R/data.R +++ b/R/data.R @@ -106,12 +106,12 @@ #' @format A [tibble][tibble::tibble] with `r format(nrow(microorganisms), big.mark = " ")` observations and `r ncol(microorganisms)` variables: #' - `mo`\cr ID of microorganism as used by this package. ***This is a unique identifier.*** #' - `fullname`\cr Full name, like `"Escherichia coli"`. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon. ***This is a unique identifier.*** -#' - `status` \cr Status of the taxon, either `r vector_or(microorganisms$status)` +#' - `status` \cr Status of the taxon, either `r vector_or(microorganisms$status, documentation = TRUE)` #' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism. Note that for fungi, *phylum* is equal to their taxonomic *division*. Also, for fungi, *subkingdom* and *subdivision* were left out since they do not occur in the bacterial taxonomy. #' - `rank`\cr Text of the taxonomic rank of the microorganism, such as `"species"` or `"genus"` #' - `ref`\cr Author(s) and year of related scientific publication. This contains only the *first surname* and year of the *latest* authors, e.g. "Wallis *et al.* 2006 *emend.* Smith and Jones 2018" becomes "Smith *et al.*, 2018". This field is directly retrieved from the source specified in the column `source`. Moreover, accents were removed to comply with CRAN that only allows ASCII characters. -#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance. -#' - `source`\cr Either `r vector_or(microorganisms$source)` (see *Source*) +#' - `oxygen_tolerance` \cr Oxygen tolerance, either `r vector_or(microorganisms$oxygen_tolerance, documentation = TRUE)`. These data were retrieved from BacDive (see *Source*). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently `r round(length(microorganisms$oxygen_tolerance[which(!is.na(microorganisms$oxygen_tolerance))]) / nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]) * 100, 1)`% of all `r format_included_data_number(nrow(microorganisms[which(microorganisms$kingdom == "Bacteria"), ]))` bacteria in the data set contain an oxygen tolerance. +#' - `source`\cr Either `r vector_or(microorganisms$source, documentation = TRUE)` (see *Source*) #' - `lpsn`\cr Identifier ('Record number') of `r TAXONOMY_VERSION$LPSN$name`. This will be the first/highest LPSN identifier to keep one identifier per row. For example, *Acetobacter ascendens* has LPSN Record number 7864 and 11011. Only the first is available in the `microorganisms` data set. ***This is a unique identifier***, though available for only `r format_included_data_number(sum(!is.na(microorganisms$lpsn)))` records. #' - `lpsn_parent`\cr LPSN identifier of the parent taxon #' - `lpsn_renamed_to`\cr LPSN identifier of the currently valid taxon @@ -222,8 +222,8 @@ #' - `date`\cr Date of receipt at the laboratory #' - `patient`\cr ID of the patient #' - `age`\cr Age of the patient -#' - `gender`\cr Gender of the patient, either `r vector_or(example_isolates$gender)` -#' - `ward`\cr Ward type where the patient was admitted, either `r vector_or(example_isolates$ward)` +#' - `gender`\cr Gender of the patient, either `r vector_or(example_isolates$gender, documentation = TRUE)` +#' - `ward`\cr Ward type where the patient was admitted, either `r vector_or(example_isolates$ward, documentation = TRUE)` #' - `mo`\cr ID of microorganism created with [as.mo()], see also the [microorganisms] data set #' - `PEN:RIF`\cr `r sum(vapply(FUN.VALUE = logical(1), example_isolates, is.sir))` different antimicrobials with class [`sir`] (see [as.sir()]); these column names occur in the [antimicrobials] data set and can be translated with [set_ab_names()] or [ab_name()] #' @inheritSection AMR Download Our Reference Data @@ -292,9 +292,9 @@ #' Use [as.sir()] to transform MICs or disks measurements to SIR values. #' @format A [tibble][tibble::tibble] with `r format(nrow(clinical_breakpoints), big.mark = " ")` observations and `r ncol(clinical_breakpoints)` variables: #' - `guideline`\cr Name of the guideline -#' - `type`\cr Breakpoint type, either `r vector_or(clinical_breakpoints$type)` -#' - `host`\cr Host of infectious agent. This is mostly useful for veterinary breakpoints and is either `r vector_or(clinical_breakpoints$host)` -#' - `method`\cr Testing method, either `r vector_or(clinical_breakpoints$method)` +#' - `type`\cr Breakpoint type, either `r vector_or(clinical_breakpoints$type, documentation = TRUE)` +#' - `host`\cr Host of infectious agent. This is mostly useful for veterinary breakpoints and is either `r vector_or(clinical_breakpoints$host, documentation = TRUE)` +#' - `method`\cr Testing method, either `r vector_or(clinical_breakpoints$method, documentation = TRUE)` #' - `site`\cr Body site for which the breakpoint must be applied, e.g. "Oral" or "Respiratory" #' - `mo`\cr Microbial ID, see [as.mo()] #' - `rank_index`\cr Taxonomic rank index of `mo` from 1 (subspecies/infraspecies) to 5 (unknown microorganism) @@ -307,7 +307,7 @@ #' - `is_SDD`\cr A [logical] value (`TRUE`/`FALSE`) to indicate whether the intermediate range between "S" and "R" should be interpreted as "SDD", instead of "I". This currently applies to `r sum(clinical_breakpoints$is_SDD)` breakpoints. #' @details #' ### Different Types of Breakpoints -#' Supported types of breakpoints are `r vector_and(clinical_breakpoints$type, quote = FALSE)`. ECOFF (Epidemiological cut-off) values are used in antimicrobial susceptibility testing to differentiate between wild-type and non-wild-type strains of bacteria or fungi. +#' Supported types of breakpoints are `r vector_and(clinical_breakpoints$type, quotes = FALSE)`. ECOFF (Epidemiological cut-off) values are used in antimicrobial susceptibility testing to differentiate between wild-type and non-wild-type strains of bacteria or fungi. #' #' The default is `"human"`, which can also be set with the package option [`AMR_breakpoint_type`][AMR-options]. Use [`as.sir(..., breakpoint_type = ...)`][as.sir()] to interpret raw data using a specific breakpoint type, e.g. `as.sir(..., breakpoint_type = "ECOFF")` to use ECOFFs. #' @@ -350,10 +350,10 @@ #' @format A [tibble][tibble::tibble] with `r format(nrow(dosage), big.mark = " ")` observations and `r ncol(dosage)` variables: #' - `ab`\cr Antimicrobial ID as used in this package (such as `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available #' - `name`\cr Official name of the antimicrobial drug as used by WHONET/EARS-Net or the WHO -#' - `type`\cr Type of the dosage, either `r vector_or(dosage$type)` +#' - `type`\cr Type of the dosage, either `r vector_or(dosage$type, documentation = TRUE)` #' - `dose`\cr Dose, such as "2 g" or "25 mg/kg" #' - `dose_times`\cr Number of times a dose must be administered -#' - `administration`\cr Route of administration, either `r vector_or(dosage$administration)` +#' - `administration`\cr Route of administration, either `r vector_or(dosage$administration, documentation = TRUE)` #' - `notes`\cr Additional dosage notes #' - `original_txt`\cr Original text in the PDF file of EUCAST #' - `eucast_version`\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply, either `r vector_or(dosage$eucast_version, quotes = FALSE, sort = TRUE, reverse = TRUE)` diff --git a/R/interpretive_rules.R b/R/interpretive_rules.R index abeb0909f..8f3562724 100755 --- a/R/interpretive_rules.R +++ b/R/interpretive_rules.R @@ -64,16 +64,17 @@ format_eucast_version_nr <- function(version, markdown = TRUE) { #' @param guideline A guideline name, either "EUCAST" (default) or "CLSI". This can be set with the package option [`AMR_guideline`][AMR-options]. #' @param rules A [character] vector that specifies which rules should be applied. Must be one or more of `"breakpoints"`, `"expected_phenotypes"`, `"expert"`, `"other"`, `"custom"`, `"all"`, and defaults to `c("breakpoints", "expected_phenotypes")`. The default value can be set to another value using the package option [`AMR_interpretive_rules`][AMR-options]: `options(AMR_interpretive_rules = "all")`. If using `"custom"`, be sure to fill in argument `custom_rules` too. Custom rules can be created with [custom_eucast_rules()]. #' @param verbose A [logical] to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time. -#' @param version_breakpoints The version number to use for the EUCAST Clinical Breakpoints guideline. Can be `r vector_or(names(EUCAST_VERSION_BREAKPOINTS), reverse = TRUE)`. -#' @param version_expected_phenotypes The version number to use for the EUCAST Expected Phenotypes. Can be `r vector_or(names(EUCAST_VERSION_EXPECTED_PHENOTYPES), reverse = TRUE)`. -#' @param version_expertrules The version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be `r vector_or(names(EUCAST_VERSION_EXPERT_RULES), reverse = TRUE)`. +#' @param version_breakpoints The version number to use for the EUCAST Clinical Breakpoints guideline. Can be `r vector_or(names(EUCAST_VERSION_BREAKPOINTS), documentation = TRUE, reverse = TRUE)`. +#' @param version_expected_phenotypes The version number to use for the EUCAST Expected Phenotypes. Can be `r vector_or(names(EUCAST_VERSION_EXPECTED_PHENOTYPES), documentation = TRUE, reverse = TRUE)`. +#' @param version_expertrules The version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be `r vector_or(names(EUCAST_VERSION_EXPERT_RULES), documentation = TRUE, reverse = TRUE)`. #' @param ampc_cephalosporin_resistance (only applies when `rules` contains `"expert"` or `"all"`) a [character] value that should be applied to cefotaxime, ceftriaxone and ceftazidime for AmpC de-repressed cephalosporin-resistant mutants - the default is `NA`. Currently only works when `version_expertrules` is `3.2` and higher; these versions of '*EUCAST Expert Rules on Enterobacterales*' state that results of cefotaxime, ceftriaxone and ceftazidime should be reported with a note, or results should be suppressed (emptied) for these three drugs. A value of `NA` (the default) for this argument will remove results for these three drugs, while e.g. a value of `"R"` will make the results for these drugs resistant. Use `NULL` or `FALSE` to not alter results for these three drugs of AmpC de-repressed cephalosporin-resistant mutants. Using `TRUE` is equal to using `"R"`. \cr For *EUCAST Expert Rules* v3.2, this rule applies to: `r vector_and(gsub("[^a-zA-Z ]+", "", unlist(strsplit(EUCAST_RULES_DF[which(EUCAST_RULES_DF$reference.version %in% c(3.2, 3.3) & EUCAST_RULES_DF$reference.rule %like% "ampc"), "this_value"][1], "|", fixed = TRUE))), quotes = "*")`. #' @param ... Column names of antimicrobials. To automatically detect antimicrobial column names, do not provide any named arguments; [guess_ab_col()] will then be used for detection. To manually specify a column, provide its name (case-insensitive) as an argument, e.g. `AMX = "amoxicillin"`. To skip a specific antimicrobial, set it to `NULL`, e.g. `TIC = NULL` to exclude ticarcillin. If a manually defined column does not exist in the data, it will be skipped with a warning. #' @param ab Any (vector of) text that can be coerced to a valid antimicrobial drug code with [as.ab()]. -#' @param administration Route of administration, either `r vector_or(dosage$administration)`. +#' @param administration Route of administration, either `r vector_or(dosage$administration, documentation = TRUE)`. #' @param only_sir_columns A [logical] to indicate whether only antimicrobial columns must be included that were transformed to class [sir][as.sir()] on beforehand. Defaults to `FALSE` if no columns of `x` have a class [sir][as.sir()]. #' @param custom_rules Custom rules to apply, created with [custom_eucast_rules()]. #' @param overwrite A [logical] indicating whether to overwrite existing SIR values (default: `FALSE`). When `FALSE`, only non-SIR values are modified (i.e., any value that is not already S, I or R). To ensure compliance with EUCAST guidelines, **this should remain** `FALSE`, as EUCAST notes often state that an organism "should be tested for susceptibility to individual agents or be reported resistant". +#' @param add_if_missing A [logical] indicating whether rules should also be applied to missing (`NA`) values (default: `TRUE`). When `FALSE`, rules are only applied to cells that already contain an SIR value; cells with `NA` are left untouched. This is particularly useful when using `overwrite = TRUE` with custom rules and you want to update reported results without imputing values for untested drugs. #' @inheritParams first_isolate #' @details #' **Note:** This function does not translate MIC or disk values to SIR values. Use [as.sir()] for that. \cr @@ -170,6 +171,7 @@ interpretive_rules <- function(x, only_sir_columns = any(is.sir(x)), custom_rules = NULL, overwrite = FALSE, + add_if_missing = TRUE, ...) { meet_criteria(x, allow_class = "data.frame") meet_criteria(col_mo, allow_class = "character", has_length = 1, is_in = colnames(x), allow_NULL = TRUE) @@ -184,6 +186,12 @@ interpretive_rules <- function(x, meet_criteria(only_sir_columns, allow_class = "logical", has_length = 1) meet_criteria(custom_rules, allow_class = "custom_eucast_rules", allow_NULL = TRUE) meet_criteria(overwrite, allow_class = "logical", has_length = 1) + meet_criteria(add_if_missing, allow_class = "logical", has_length = 1) + + stop_if( + !overwrite && !add_if_missing, + "Either set {.arg overwrite} or {.arg add_if_missing} to {.code TRUE}, or both." + ) stop_if( guideline == "CLSI", @@ -533,7 +541,8 @@ interpretive_rules <- function(x, warned = warned, info = info, verbose = verbose, - overwrite = overwrite + overwrite = overwrite, + add_if_missing = add_if_missing ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed @@ -575,7 +584,8 @@ interpretive_rules <- function(x, warned = warned, info = info, verbose = verbose, - overwrite = overwrite + overwrite = overwrite, + add_if_missing = add_if_missing ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed @@ -595,7 +605,7 @@ interpretive_rules <- function(x, } else { if (isTRUE(info)) { cat("\n") - message_("Skipping inhibitor-inheritance rules defined by this AMR package: setting S to drug+inhibitor where drug is S, and setting R to drug where drug+inhibitor is R. Add \"other\" or \"all\" to the {.arg rules} argument to apply those rules.") + message_("Skipping inhibitor-inheritance rules defined by this AMR package: setting S to drug+inhibitor where drug is S, and setting R to drug where drug+inhibitor is R. Add {.val other} or {.val all} to the {.arg rules} argument to apply those rules.") } } @@ -609,7 +619,7 @@ interpretive_rules <- function(x, # >>> Apply Official EUCAST rules <<< --------------------------------------------------- eucast_notification_shown <- FALSE if (!is.null(list(...)$eucast_rules_df)) { - # this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF %>% filter(is.na(have_these_values))) + # this allows: eucast_rules(x, eucast_rules_df = AMR:::EUCAST_RULES_DF |> filter(is.na(have_these_values))) eucast_rules_df_total <- list(...)$eucast_rules_df } else { # otherwise internal data file, created in data-raw/_pre_commit_checks.R @@ -862,7 +872,8 @@ interpretive_rules <- function(x, warned = warned, info = info, verbose = verbose, - overwrite = overwrite + overwrite = overwrite, + add_if_missing = add_if_missing ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed @@ -932,7 +943,8 @@ interpretive_rules <- function(x, warned = warned, info = info, verbose = verbose, - overwrite = overwrite + overwrite = overwrite, + add_if_missing = add_if_missing ) n_added <- n_added + run_changes$added n_changed <- n_changed + run_changes$changed @@ -1063,13 +1075,13 @@ interpretive_rules <- function(x, warn_lacking_sir_class <- warn_lacking_sir_class[order(colnames(x.bak))] warn_lacking_sir_class <- warn_lacking_sir_class[!is.na(warn_lacking_sir_class)] warning_( - "in {.help [{.fun eucast_rules}](AMR::eucast_rules)}: not all columns with antimicrobial results are of class {.cls sir}. Transform them on beforehand, e.g.:\n", - " - ", highlight_code(paste0(x_deparsed, " %>% as.sir(", ifelse(length(warn_lacking_sir_class) == 1, + "in {.help [{.fun eucast_rules}](AMR::eucast_rules)}: not all columns with antimicrobial results are of class {.cls sir}. Transform them on beforehand, e.g.:\n\n", + "\u00a0\u00a0", AMR_env$bullet_icon, " ", highlight_code(paste0(x_deparsed, " |> as.sir(", ifelse(length(warn_lacking_sir_class) == 1, warn_lacking_sir_class, paste0(warn_lacking_sir_class[1], ":", warn_lacking_sir_class[length(warn_lacking_sir_class)]) - ), ")")), "\n", - " - ", highlight_code(paste0(x_deparsed, " %>% mutate_if(is_sir_eligible, as.sir)")), "\n", - " - ", highlight_code(paste0(x_deparsed, " %>% mutate(across(where(is_sir_eligible), as.sir))")) + ), ")")), "\n\n", + "\u00a0\u00a0", AMR_env$bullet_icon, " ", highlight_code(paste0(x_deparsed, " |> mutate_if(is_sir_eligible, as.sir)")), "\n\n", + "\u00a0\u00a0", AMR_env$bullet_icon, " ", highlight_code(paste0(x_deparsed, " |> mutate(across(where(is_sir_eligible), as.sir))")) ) } @@ -1124,9 +1136,11 @@ edit_sir <- function(x, warned, info, verbose, - overwrite) { + overwrite, + add_if_missing) { cols <- unique(cols[!is.na(cols) & !is.null(cols)]) - + rows <- unique(rows) + # for Verbose Mode, keep track of all changes and return them track_changes <- list( added = 0, @@ -1152,32 +1166,50 @@ edit_sir <- function(x, track_changes$sir_warn <- cols[!vapply(FUN.VALUE = logical(1), x[, cols, drop = FALSE], is.sir)] } isNA <- is.na(new_edits[rows, cols]) - isSIR <- !isNA & (new_edits[rows, cols] == "S" | new_edits[rows, cols] == "I" | new_edits[rows, cols] == "R" | new_edits[rows, cols] == "SDD" | new_edits[rows, cols] == "NI" | new_edits[rows, cols] == "WT" | new_edits[rows, cols] == "NWT" | new_edits[rows, cols] == "NS") + isSIR <- !isNA & + (new_edits[rows, cols] == "S" | + new_edits[rows, cols] == "I" | + new_edits[rows, cols] == "R" | + new_edits[rows, cols] == "SDD" | + new_edits[rows, cols] == "NI" | + new_edits[rows, cols] == "WT" | + new_edits[rows, cols] == "NWT" | + new_edits[rows, cols] == "NS") non_SIR <- !isSIR if (isFALSE(overwrite) && any(isSIR) && message_not_thrown_before("edit_sir.warning_overwrite")) { - warning_("Some values had SIR values and were not overwritten, since {.code overwrite = FALSE}.") + warning_("in {.help [{.fun eucast_rules}](AMR::eucast_rules)}: some columns had SIR values which were not overwritten, since {.code overwrite = FALSE}.") } - tryCatch( - # insert into original table - if (isTRUE(overwrite)) { - new_edits[rows, cols] <- to + # determine which cells to modify based on overwrite and add_if_missing + if (isTRUE(overwrite)) { + if (isTRUE(add_if_missing)) { + apply_mask <- rep(TRUE, length(isSIR)) } else { - new_edits[rows, cols][non_SIR] <- to - }, + apply_mask <- isSIR + } + } else { + # overwrite = FALSE, add_if_missing = TRUE: fill missing and placeholder cells only + apply_mask <- !isSIR + } + + do_assign <- function() { + subset <- new_edits[rows, cols, drop = FALSE] + mask <- matrix(apply_mask, nrow = nrow(subset), ncol = ncol(subset)) + subset[mask] <- to + new_edits[rows, cols] <<- subset + } + + tryCatch( + do_assign(), warning = function(w) { if (w$message %like% "invalid factor level") { - xyz <- vapply(FUN.VALUE = logical(1), cols, function(col) { + vapply(FUN.VALUE = logical(1), cols, function(col) { new_edits[, col] <<- factor( x = as.character(pm_pull(new_edits, col)), levels = unique(c(to, levels(pm_pull(new_edits, col)))) ) TRUE }) - if (isTRUE(overwrite)) { - suppressWarnings(new_edits[rows, cols] <<- to) - } else { - suppressWarnings(new_edits[rows, cols][non_SIR] <<- to) - } + suppressWarnings(do_assign()) warning_( "in {.help [{.fun eucast_rules}](AMR::eucast_rules)}: value \"", to, "\" added to the factor levels of column", ifelse(length(cols) == 1, "", "s"), @@ -1185,7 +1217,7 @@ edit_sir <- function(x, " because this value was not an existing factor level." ) txt_warning() - warned <- FALSE + warned <<- FALSE } else { warning_("in {.help [{.fun eucast_rules}](AMR::eucast_rules)}: ", w$message) txt_warning() diff --git a/R/mo.R b/R/mo.R index d0d98d838..da6fa4484 100755 --- a/R/mo.R +++ b/R/mo.R @@ -792,7 +792,7 @@ print.mo <- function(x, print.shortnames = FALSE, ...) { names(x) <- x_names if (!all(x %in% c(AMR_env$MO_lookup$mo, NA))) { warning_( - "Some MO codes are from a previous AMR package version. ", + "Some MO codes are from another AMR package version. ", "Please update the MO codes with {.help [{.fun as.mo}](AMR::as.mo)}.", call = FALSE ) @@ -826,7 +826,7 @@ as.data.frame.mo <- function(x, ...) { add_MO_lookup_to_AMR_env() if (!all(x %in% c(AMR_env$MO_lookup$mo, NA))) { warning_( - "The data contains old MO codes (from a previous AMR package version). ", + "The data contains old MO codes (from another AMR package version). ", "Please update your MO codes with {.help [{.fun as.mo}](AMR::as.mo)}." ) } diff --git a/R/mo_property.R b/R/mo_property.R index 0db511798..287e136c4 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -31,7 +31,7 @@ #' #' Use these functions to return a specific property of a microorganism based on the latest accepted taxonomy. All input values will be evaluated internally with [as.mo()], which makes it possible to use microbial abbreviations, codes and names as input. See *Examples*. #' @param x Any [character] (vector) that can be coerced to a valid microorganism code with [as.mo()]. Can be left blank for auto-guessing the column containing microorganism codes if used in a data set, see *Examples*. -#' @param property One of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`, or must be `"shortname"`. +#' @param property One of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, documentation = TRUE)`, or must be `"shortname"`. #' @inheritParams as.mo #' @param ... Other arguments passed on to [as.mo()], such as 'minimum_matching_score', 'ignore_pattern', and 'remove_from_input'. #' @param ab Any (vector of) text that can be coerced to a valid antibiotic drug code with [as.ab()]. diff --git a/R/pca.R b/R/pca.R index 24c6a2e87..c7e2fc820 100755 --- a/R/pca.R +++ b/R/pca.R @@ -66,12 +66,12 @@ #' #' # new ggplot2 plotting method using this package: #' if (require("dplyr") && require("ggplot2")) { -#' ggplot_pca(pca_result) +#' ggplot_pca(pca_result) #' } #' if (require("dplyr") && require("ggplot2")) { -#' ggplot_pca(pca_result) + -#' scale_colour_viridis_d() + -#' labs(title = "Title here") +#' ggplot_pca(pca_result) + +#' scale_colour_viridis_d() + +#' labs(title = "Title here") #' } #' } pca <- function(x, diff --git a/R/plotting.R b/R/plotting.R index 0d292616c..8fe4c3958 100755 --- a/R/plotting.R +++ b/R/plotting.R @@ -200,7 +200,7 @@ #' theme_minimal() + #' geom_boxplot(fill = NA, colour = "grey30") + #' geom_jitter(width = 0.25) -#' labs(title = "scale_y_mic()/scale_colour_sir() automatically applied") +#' labs(title = "scale_y_mic()/scale_colour_sir() automatically applied") #' #' mic_sir_plot #' } diff --git a/R/sir.R b/R/sir.R index 644355dcb..4020acf2e 100755 --- a/R/sir.R +++ b/R/sir.R @@ -65,7 +65,7 @@ VALID_SIR_LEVELS <- c("S", "SDD", "I", "R", "NI", "WT", "NWT", "NS") #' @param substitute_missing_r_breakpoint A [logical] to indicate that a missing clinical breakpoints for R (resistant) must be substituted with R - the default is `FALSE`. Some (especially CLSI) breakpoints only have a breakpoint for S, meaning that the outcome can only be `"S"` or `NA`. Setting this to `TRUE` will convert the `NA`s in these cases to `"R"`. Can also be set with the package option [`AMR_substitute_missing_r_breakpoint`][AMR-options]. #' @param include_screening A [logical] to indicate that clinical breakpoints for screening are allowed - the default is `FALSE`. Can also be set with the package option [`AMR_include_screening`][AMR-options]. #' @param include_PKPD A [logical] to indicate that PK/PD clinical breakpoints must be applied as a last resort - the default is `TRUE`. Can also be set with the package option [`AMR_include_PKPD`][AMR-options]. -#' @param breakpoint_type The type of breakpoints to use, either `r vector_or(clinical_breakpoints$type)`. ECOFF stands for Epidemiological Cut-Off values. The default is `"human"`, which can also be set with the package option [`AMR_breakpoint_type`][AMR-options]. If `host` is set to values of veterinary species, this will automatically be set to `"animal"`. +#' @param breakpoint_type The type of breakpoints to use, either `r vector_or(clinical_breakpoints$type, documentation = TRUE)`. ECOFF stands for Epidemiological Cut-Off values. The default is `"human"`, which can also be set with the package option [`AMR_breakpoint_type`][AMR-options]. If `host` is set to values of veterinary species, this will automatically be set to `"animal"`. #' @param host A vector (or column name) with [character]s to indicate the host. Only useful for veterinary breakpoints, as it requires `breakpoint_type = "animal"`. The values can be any text resembling the animal species, even in any of the `r length(LANGUAGES_SUPPORTED)` supported languages of this package. For foreign languages, be sure to set the language with [set_AMR_locale()] (though it will be automatically guessed based on the system language). #' @param language Language to convert values set in `host` when using animal breakpoints. Use one of these supported language names or [ISO 639-1 codes](https://en.wikipedia.org/wiki/ISO_639-1): `r vector_or(paste0(sapply(LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), " (" , LANGUAGES_SUPPORTED, ")"), quotes = FALSE, sort = FALSE)`. #' @param verbose A [logical] to indicate that all notes should be printed during interpretation of MIC values or disk diffusion values. diff --git a/R/tidymodels.R b/R/tidymodels.R index ea00c7a55..b2513e0aa 100644 --- a/R/tidymodels.R +++ b/R/tidymodels.R @@ -21,7 +21,6 @@ #' @export #' @examples #' if (require("tidymodels")) { -#' #' # The below approach formed the basis for this paper: DOI 10.3389/fmicb.2025.1582703 #' # Presence of ESBL genes was predicted based on raw MIC values. #' @@ -40,13 +39,10 @@ #' #' # Create and prep a recipe with MIC log2 transformation #' mic_recipe <- recipe(esbl ~ ., data = training_data) %>% -#' #' # Optionally remove non-predictive variables #' remove_role(genus, old_role = "predictor") %>% -#' #' # Apply the log2 transformation to all MIC predictors #' step_mic_log2(all_mic_predictors()) %>% -#' #' # And apply the preparation steps #' prep() #' @@ -67,13 +63,15 @@ #' bind_cols(out_testing) #' #' # Evaluate predictions using standard classification metrics -#' our_metrics <- metric_set(accuracy, -#' recall, -#' precision, -#' sensitivity, -#' specificity, -#' ppv, -#' npv) +#' our_metrics <- metric_set( +#' accuracy, +#' recall, +#' precision, +#' sensitivity, +#' specificity, +#' ppv, +#' npv +#' ) #' metrics <- our_metrics(predictions, truth = esbl, estimate = .pred_class) #' #' # Show performance diff --git a/R/top_n_microorganisms.R b/R/top_n_microorganisms.R index 4b08f2c85..d6dd08c86 100755 --- a/R/top_n_microorganisms.R +++ b/R/top_n_microorganisms.R @@ -32,7 +32,7 @@ #' This function filters a data set to include only the top *n* microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera. #' @param x A data frame containing microbial data. #' @param n An integer specifying the maximum number of unique values of the `property` to include in the output. -#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, quotes = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation. When using `"species"` (default) or `"subpecies"`, the genus will be added to make sure each (sub)species still belongs to the right genus. +#' @param property A character string indicating the microorganism property to use for filtering. Must be one of the column names of the [microorganisms] data set: `r vector_or(colnames(microorganisms), sort = FALSE, documentation = TRUE)`. If `NULL`, the raw values from `col_mo` will be used without transformation. When using `"species"` (default) or `"subpecies"`, the genus will be added to make sure each (sub)species still belongs to the right genus. #' @param n_for_each An optional integer specifying the maximum number of rows to retain for each value of the selected property. If `NULL`, all rows within the top *n* groups will be included. #' @param col_mo A character string indicating the column in `x` that contains microorganism names or codes. Defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()]. #' @param ... Additional arguments passed on to [mo_property()] when `property` is not `NULL`. diff --git a/data-raw/AMR_vet.qmd b/data-raw/AMR_vet.qmd index 603428a9b..f46009cda 100644 --- a/data-raw/AMR_vet.qmd +++ b/data-raw/AMR_vet.qmd @@ -15,8 +15,10 @@ library(readr) library(tidyr) # WHONET version of 16th Feb 2024 -whonet_breakpoints <- read_tsv("WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"), - show_col_types = FALSE, guess_max = Inf) %>% +whonet_breakpoints <- read_tsv("WHONET/Resources/Breakpoints.txt", + na = c("", "NA", "-"), + show_col_types = FALSE, guess_max = Inf +) %>% filter(GUIDELINES %in% c("CLSI", "EUCAST")) dim(whonet_breakpoints) @@ -48,9 +50,9 @@ whonet_breakpoints |> ```{r} whonet_breakpoints |> - filter(HOST == "Cats", YEAR >= 2021) |> - select(GUIDELINES, YEAR, TEST_METHOD, ORGANISM_CODE, R, S) |> - mutate(MO_NAME = AMR::mo_shortname(ORGANISM_CODE), .before = R) |> + filter(HOST == "Cats", YEAR >= 2021) |> + select(GUIDELINES, YEAR, TEST_METHOD, ORGANISM_CODE, R, S) |> + mutate(MO_NAME = AMR::mo_shortname(ORGANISM_CODE), .before = R) |> as.data.frame() ``` @@ -58,12 +60,14 @@ whonet_breakpoints |> ```{r} whonet_breakpoints |> - filter(HOST == "Cats", YEAR == 2023) |> - mutate(MO = AMR::mo_shortname(ORGANISM_CODE), - AB = AMR::ab_name(WHONET_ABX_CODE), - SITE_OF_INFECTION = substr(SITE_OF_INFECTION, 1, 25)) |> - arrange(MO, AB) |> - select(MO, AB, SITE_OF_INFECTION) |> + filter(HOST == "Cats", YEAR == 2023) |> + mutate( + MO = AMR::mo_shortname(ORGANISM_CODE), + AB = AMR::ab_name(WHONET_ABX_CODE), + SITE_OF_INFECTION = substr(SITE_OF_INFECTION, 1, 25) + ) |> + arrange(MO, AB) |> + select(MO, AB, SITE_OF_INFECTION) |> as.data.frame() ``` diff --git a/data-raw/_pre_commit_checks.R b/data-raw/_pre_commit_checks.R index aa3b9d2bc..6a083863d 100644 --- a/data-raw/_pre_commit_checks.R +++ b/data-raw/_pre_commit_checks.R @@ -406,7 +406,7 @@ pre_commit_lst$AB_GLYCOPEPTIDES <- antimicrobials %>% pre_commit_lst$AB_FUSIDANES <- antimicrobials %>% filter(name %like% "fusi") %>% pull(ab) -pre_commit_lst$AB_IONOPHORES<- antimicrobials %>% +pre_commit_lst$AB_IONOPHORES <- antimicrobials %>% filter(name %like% "alamethicin|beauvericin|calcimycin|chloroquine|clioquinol|diiodohydroxyquinoline|dithiocarbamates|enniatin|epigallocatechin|gramicidin|hinokitiol|ionomycin|laidlomycin|lasalocid|maduramicin|monensin|narasin|nigericin|nonactin|nystatin|pyrazole|pyrithione|quercetin|salinomycin|semduramicin|valinomycin|zincophorin") %>% pull(ab) pre_commit_lst$AB_ISOXAZOLYLPENICILLINS <- antimicrobials %>% @@ -478,7 +478,8 @@ pre_commit_lst$AB_BETALACTAMS <- sort(c( pre_commit_lst$AB_PENICILLINS, pre_commit_lst$AB_CEPHALOSPORINS, pre_commit_lst$AB_CARBAPENEMS, - pre_commit_lst$AB_MONOBACTAMS)) + pre_commit_lst$AB_MONOBACTAMS +)) pre_commit_lst$AB_BETALACTAMASE_INHIBITORS <- antimicrobials %>% filter(atc_group2 %like% "Beta-lactamase inhibitors" | name %like% "bactam") %>% pull(ab) @@ -495,8 +496,9 @@ for (grp in pre_commit_lst$DEFINED_AB_GROUPS[pre_commit_lst$DEFINED_AB_GROUPS %u fn_name <- tolower(gsub("^AB_", "", grp)) if (!fn_name %in% ls(envir = asNamespace("AMR"))) { stop("Group '", grp, "' has ", length(pre_commit_lst[[grp]]), - " members (", toString(ab_name(pre_commit_lst[[grp]], tolower = T)), ") but no corresponding function '", fn_name, "()' exists in the AMR namespace.", - call. = FALSE) + " members (", toString(ab_name(pre_commit_lst[[grp]], tolower = T)), ") but no corresponding function '", fn_name, "()' exists in the AMR namespace.", + call. = FALSE + ) } } } @@ -534,46 +536,48 @@ for (i in seq_along(group_map)) { } # create priority list for ab_group() -pre_commit_lst$ABX_PRIORITY_LIST <- c("Aminopenicillins", - "Isoxazolylpenicillins", - "Ureidopenicillins", - "Oxazolidinones", - "Carbapenems", - "Cephalosporins (1st gen.)", - "Cephalosporins (2nd gen.)", - "Cephalosporins (3rd gen.)", - "Cephalosporins (4th gen.)", - "Cephalosporins (5th gen.)", - "Cephalosporins", - "Penicillins", - "Monobactams", - "Aminoglycosides", - "Lipoglycopeptides", - "Glycopeptides", - "Peptides", - "Lincosamides", - "Streptogramins", - "Macrolides", - "Nitrofurans", - "Phenicols", - "Phosphonics", - "Polymyxins", - "Fluoroquinolones", - "Quinolones", - "Rifamycins", - "Spiropyrimidinetriones", - "Trimethoprims", - "Sulfonamides", - "Tetracyclines", - "Ionophores", - "Antifungals", - "Antimycobacterials", - "Fusidanes", - "Beta-lactams", - "Beta-lactamase inhibitors", - "Pleuromutilins", - "Aminocoumarins", - "Other") +pre_commit_lst$ABX_PRIORITY_LIST <- c( + "Aminopenicillins", + "Isoxazolylpenicillins", + "Ureidopenicillins", + "Oxazolidinones", + "Carbapenems", + "Cephalosporins (1st gen.)", + "Cephalosporins (2nd gen.)", + "Cephalosporins (3rd gen.)", + "Cephalosporins (4th gen.)", + "Cephalosporins (5th gen.)", + "Cephalosporins", + "Penicillins", + "Monobactams", + "Aminoglycosides", + "Lipoglycopeptides", + "Glycopeptides", + "Peptides", + "Lincosamides", + "Streptogramins", + "Macrolides", + "Nitrofurans", + "Phenicols", + "Phosphonics", + "Polymyxins", + "Fluoroquinolones", + "Quinolones", + "Rifamycins", + "Spiropyrimidinetriones", + "Trimethoprims", + "Sulfonamides", + "Tetracyclines", + "Ionophores", + "Antifungals", + "Antimycobacterials", + "Fusidanes", + "Beta-lactams", + "Beta-lactamase inhibitors", + "Pleuromutilins", + "Aminocoumarins", + "Other" +) if (!all(unlist(antimicrobials$group) %in% pre_commit_lst$ABX_PRIORITY_LIST)) { stop("Missing group(s) in priority list: ", paste(setdiff(unlist(antimicrobials$group), pre_commit_lst$ABX_PRIORITY_LIST), collapse = ", ")) } @@ -589,17 +593,17 @@ pre_commit_lst$AV_LOOKUP <- create_AB_AV_lookup(antivirals) # Export to package as internal data ---- # usethis::use_data() must receive unquoted object names, which is not flexible at all. # we'll use good old base::save() instead -save(list = names(pre_commit_lst), - file = "R/sysdata.rda", - envir = as.environment(pre_commit_lst), - compress = "xz", - version = 2, - ascii = FALSE) +save( + list = names(pre_commit_lst), + file = "R/sysdata.rda", + envir = as.environment(pre_commit_lst), + compress = "xz", + version = 2, + ascii = FALSE +) usethis::ui_done("Saved to {usethis::ui_value('R/sysdata.rda')}") - - # Export data sets to the repository in different formats ----------------- for (pkg in c("haven", "openxlsx2", "arrow")) { @@ -621,7 +625,9 @@ write_md5 <- function(object) { } changed_md5 <- function(object) { path <- paste0("data-raw/", deparse(substitute(object)), ".md5") - if (!file.exists(path)) return(TRUE) + if (!file.exists(path)) { + return(TRUE) + } tryCatch( { conn <- file(path) @@ -759,11 +765,14 @@ devtools::load_all(quiet = TRUE) suppressMessages(set_AMR_locale("English")) files_changed <- function(paths = "^(R|data)/") { - tryCatch({ - changed_files <- system("git status", intern = TRUE) - changed_files <- unlist(strsplit(changed_files, " ")) - any(changed_files %like% paths[paths != "R/sysdata.rda"]) - }, error = function(e) TRUE) + tryCatch( + { + changed_files <- system("git status", intern = TRUE) + changed_files <- unlist(strsplit(changed_files, " ")) + any(changed_files %like% paths[paths != "R/sysdata.rda"]) + }, + error = function(e) TRUE + ) } # Update URLs ------------------------------------------------------------- @@ -801,8 +810,10 @@ if (files_changed()) { # Style pkg --------------------------------------------------------------- if (files_changed(paths = "^(R|tests)/")) { usethis::ui_info("Styling package") - styler::style_pkg(include_roxygen_examples = FALSE, - exclude_dirs = list.dirs(full.names = FALSE, recursive = FALSE)[!list.dirs(full.names = FALSE, recursive = FALSE) %in% c("R", "tests")]) + styler::style_pkg( + include_roxygen_examples = FALSE, + exclude_dirs = list.dirs(full.names = FALSE, recursive = FALSE)[!list.dirs(full.names = FALSE, recursive = FALSE) %in% c("R", "tests")] + ) } # Document pkg ------------------------------------------------------------ @@ -813,13 +824,13 @@ if (files_changed()) { # Update index.md and README.md ------------------------------------------- if (files_changed("README.Rmd") || - files_changed("index.Rmd") || - files_changed("man/microorganisms.Rd") || - files_changed("man/antimicrobials.Rd") || - files_changed("man/clinical_breakpoints.Rd") || - files_changed("man/antibiogram.Rd") || - files_changed("R/antibiogram.R") || - files_changed("data-raw/translations.tsv")) { + files_changed("index.Rmd") || + files_changed("man/microorganisms.Rd") || + files_changed("man/antimicrobials.Rd") || + files_changed("man/clinical_breakpoints.Rd") || + files_changed("man/antibiogram.Rd") || + files_changed("R/antibiogram.R") || + files_changed("data-raw/translations.tsv")) { usethis::ui_info("Rendering {usethis::ui_field('index.md')} and {usethis::ui_field('README.md')}") suppressWarnings(rmarkdown::render("index.Rmd", quiet = TRUE)) suppressWarnings(rmarkdown::render("README.Rmd", quiet = TRUE)) diff --git a/data-raw/_reproduction_scripts/reproduction_of_antimicrobials.R b/data-raw/_reproduction_scripts/reproduction_of_antimicrobials.R index 8d494e710..57899fbfe 100644 --- a/data-raw/_reproduction_scripts/reproduction_of_antimicrobials.R +++ b/data-raw/_reproduction_scripts/reproduction_of_antimicrobials.R @@ -262,9 +262,9 @@ get_synonyms <- function(CID, clean = TRUE) { if (is.na(CID[i])) { next } - + all_cids <- CID[i] - + # we will now get the closest compounds with a 96% threshold similar_cids <- tryCatch( data.table::fread( @@ -281,7 +281,7 @@ get_synonyms <- function(CID, clean = TRUE) { # leave out all CIDs that we have in our antimicrobials dataset to prevent duplication similar_cids <- similar_cids[!similar_cids %in% antimicrobials$cid[!is.na(antimicrobials$cid)]] all_cids <- unique(c(all_cids, similar_cids)) - + # for each one, we are getting the synonyms current_syns <- character(0) for (j in seq_len(length(all_cids))) { @@ -297,9 +297,9 @@ get_synonyms <- function(CID, clean = TRUE) { )[[1]], error = function(e) NA_character_ ) - + Sys.sleep(0.05) - + if (clean == TRUE) { # remove text between brackets synonyms_txt <- trimws(gsub( @@ -319,16 +319,16 @@ get_synonyms <- function(CID, clean = TRUE) { synonyms_txt <- gsub("[^a-z]+$", "", ignore.case = TRUE, synonyms_txt) # only length 5 to 20 and lower-case names starting with a capital letter synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(5:20) & - grepl("^[A-Z][a-z]+$", synonyms_txt, ignore.case = FALSE)] + grepl("^[A-Z][a-z]+$", synonyms_txt, ignore.case = FALSE)] synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE)) } - + # synonyms must not be set for other agents, so remove the duplicates synonyms_txt <- synonyms_txt[!synonyms_txt %in% unlist(synonyms)] - + current_syns <- c(current_syns, synonyms_txt) } - + current_syns <- unique(trimws(current_syns[tolower(current_syns) %in% unique(tolower(current_syns))])) synonyms[i] <- list(sort(current_syns)) } @@ -763,10 +763,12 @@ antimicrobials[which(antimicrobials$ab %in% c("CYC", "LNZ", "THA", "TZD")), "gro # add efflux effl <- antimicrobials |> filter(ab == "ACM") |> - mutate(ab = as.character("EFF"), - cid = NA_real_, - name = "Efflux", - group = "Other") + mutate( + ab = as.character("EFF"), + cid = NA_real_, + name = "Efflux", + group = "Other" + ) antimicrobials <- antimicrobials |> mutate(ab = as.character(ab)) |> bind_rows(effl) @@ -777,9 +779,11 @@ antimicrobials[which(antimicrobials$ab == "EFF"), "abbreviations"][[1]] <- list( # add clindamycin inducible screening clin <- antimicrobials |> filter(ab == "FOX1") |> - mutate(ab = as.character("CLI-S"), - name = "Clindamycin inducible screening", - group = "Macrolides/lincosamides") + mutate( + ab = as.character("CLI-S"), + name = "Clindamycin inducible screening", + group = "Macrolides/lincosamides" + ) antimicrobials <- antimicrobials |> mutate(ab = as.character(ab)) |> bind_rows(clin) @@ -791,109 +795,123 @@ antimicrobials <- antimicrobials |> bind_rows( antimicrobials |> filter(ab == "EFF") |> - mutate(ab = "BLA-S", - name = paste("Beta-lactamase", "screening test"), - cid = NA_real_, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("beta-lactamase", "betalactamase", "bl screen", "blt screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "BLA-S", + name = paste("Beta-lactamase", "screening test"), + cid = NA_real_, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("beta-lactamase", "betalactamase", "bl screen", "blt screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "PEN") |> - mutate(ab = "PEN-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("pen screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "PEN-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("pen screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "OXA") |> - mutate(ab = "OXA-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("oxa screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "OXA-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("oxa screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "PEF") |> - mutate(ab = "PEF-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("pef screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "PEF-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("pef screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "NAL") |> - mutate(ab = "NAL-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("nal screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "NAL-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("nal screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "NOR") |> - mutate(ab = "NOR-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("nor screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))), + mutate( + ab = "NOR-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("nor screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ), antimicrobials |> filter(ab == "TCY") |> - mutate(ab = "TCY-S", - name = paste(name, "screening test"), - cid = NA, - atc = list(character(0)), - atc_group1 = NA_character_, - atc_group2 = NA_character_, - abbreviations = list(c("tcy screen")), - synonyms = list(character(0)), - oral_ddd = NA_real_, - oral_units = NA_character_, - iv_ddd = NA_real_, - iv_units = NA_character_, - loinc = list(character(0))) + mutate( + ab = "TCY-S", + name = paste(name, "screening test"), + cid = NA, + atc = list(character(0)), + atc_group1 = NA_character_, + atc_group2 = NA_character_, + abbreviations = list(c("tcy screen")), + synonyms = list(character(0)), + oral_ddd = NA_real_, + oral_units = NA_character_, + iv_ddd = NA_real_, + iv_units = NA_character_, + loinc = list(character(0)) + ) ) @@ -919,16 +937,20 @@ antimicrobials <- antimicrobials |> antimicrobials |> filter(ab == "FPE") |> mutate(ab = as.character(ab)) |> - mutate(ab = "FTA", - name = "Cefepime/taniborbactam", - cid = NA_real_), + mutate( + ab = "FTA", + name = "Cefepime/taniborbactam", + cid = NA_real_ + ), antimicrobials |> filter(ab == "TBP") |> mutate(ab = as.character(ab)) |> - mutate(ab = "TAN", - name = "Taniborbactam", - cid = 76902493, - abbreviations = list("VNRX-5133")) + mutate( + ab = "TAN", + name = "Taniborbactam", + cid = 76902493, + abbreviations = list("VNRX-5133") + ) ) antimicrobials <- antimicrobials |> @@ -936,39 +958,51 @@ antimicrobials <- antimicrobials |> bind_rows( antimicrobials |> filter(ab == "CTB") |> - mutate(ab = "CTA", - cid = NA_real_, - name = "Ceftibuten/avibactam") |> + mutate( + ab = "CTA", + cid = NA_real_, + name = "Ceftibuten/avibactam" + ) |> select(1:4), antimicrobials |> filter(ab == "KAC") |> - mutate(ab = "KAS", - cid = NA_real_, - name = "Kasugamycin") |> + mutate( + ab = "KAS", + cid = NA_real_, + name = "Kasugamycin" + ) |> select(1:4), antimicrobials |> filter(ab == "PRI") |> - mutate(ab = "OST", - cid = NA_real_, - name = "Ostreogrycin") |> + mutate( + ab = "OST", + cid = NA_real_, + name = "Ostreogrycin" + ) |> select(1:4), antimicrobials |> filter(ab == "PRI") |> - mutate(ab = "THS", - cid = NA_real_, - name = "Thiostrepton") |> + mutate( + ab = "THS", + cid = NA_real_, + name = "Thiostrepton" + ) |> select(1, 3), antimicrobials |> filter(ab == "CLA1") |> - mutate(ab = "XER", - cid = NA_real_, - name = "Xeruborbactam") |> + mutate( + ab = "XER", + cid = NA_real_, + name = "Xeruborbactam" + ) |> select(1:4), antimicrobials |> filter(ab == "BLM") |> - mutate(ab = "ZOR", - cid = NA_real_, - name = "Zorbamycin") |> + mutate( + ab = "ZOR", + cid = NA_real_, + name = "Zorbamycin" + ) |> select(1:4), ) @@ -977,9 +1011,11 @@ antimicrobials <- antimicrobials |> bind_rows( antimicrobials |> filter(ab == "NOV") |> - mutate(ab = "CLB", - cid = 54706138, - name = "Clorobiocin") |> + mutate( + ab = "CLB", + cid = 54706138, + name = "Clorobiocin" + ) |> select(1:4), ) @@ -990,7 +1026,7 @@ get_atc_table <- function(ab_name, type = "human") { if (type == "human") { url <- "https://atcddd.fhi.no/atc_ddd_index/" } else if (type == "veterinary") { - url <- "https://atcddd.fhi.no/atcvet/atcvet_index/" + url <- "https://atcddd.fhi.no/atcvet/atcvet_index/" } else { stop("invalid type") } @@ -1055,8 +1091,10 @@ to_update <- 1:nrow(antimicrobials) # or just the empty ones: to_update <- which(sapply(antimicrobials$atc, function(x) length(x[!is.na(x)])) == 0) -updated_atc <- lapply(seq_len(length(to_update)), - function(x) NA_character_) +updated_atc <- lapply( + seq_len(length(to_update)), + function(x) NA_character_ +) # this takes around 10 minutes for the whole table (some ABx are skipped and go faster) diff --git a/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R b/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R index a1b46f741..c3c7426a9 100644 --- a/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R +++ b/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R @@ -72,12 +72,12 @@ whonet_organisms <- whonet_organisms_raw |> ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM) ) |> # try to match on GBIF identifier - left_join(microorganisms |> distinct(mo, gbif, status) |> filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) |> + left_join(microorganisms |> distinct(mo, gbif, status) |> filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) |> # remove duplicates arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) |> - distinct(ORGANISM_CODE, .keep_all = TRUE) |> + distinct(ORGANISM_CODE, .keep_all = TRUE) |> # add Enterobacterales, which is a subkingdom code in their data - bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) |> + bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) |> arrange(ORGANISM) @@ -88,31 +88,39 @@ unmatched <- whonet_organisms |> filter(is.na(mo)) # generate the mo codes and add their names message("Getting MO codes for WHONET input...") -unmatched <- unmatched |> - mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM), - minimum_matching_score = 0.55, - keep_synonyms = TRUE, - language = "en"), - mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"), - ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"), - ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"), - TRUE ~ mo), - mo_name = mo_name(mo, - keep_synonyms = TRUE, - language = "en")) +unmatched <- unmatched |> + mutate( + mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM), + minimum_matching_score = 0.55, + keep_synonyms = TRUE, + language = "en" + ), + mo = case_when( + ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"), + ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"), + ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"), + TRUE ~ mo + ), + mo_name = mo_name(mo, + keep_synonyms = TRUE, + language = "en" + ) + ) # check if coercion at least resembles the first part (genus) -unmatched <- unmatched |> +unmatched <- unmatched |> mutate( first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE), - keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") |> + keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic" + ) |> arrange(keep) unmatched |> View() unmatched <- unmatched |> filter(keep == TRUE) -organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) |> - bind_rows(unmatched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) |> - mutate(name = mo_name(mo, keep_synonyms = TRUE)) |> +organisms <- matched |> + transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) |> + bind_rows(unmatched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) |> + mutate(name = mo_name(mo, keep_synonyms = TRUE)) |> arrange(code) # self-defined codes in the MO table must be retained @@ -125,25 +133,33 @@ organisms <- organisms |> # some subspecies exist, while their upper species do not, add them as the species level: subspp <- organisms |> filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) & - mo_species(mo, keep_synonyms = TRUE) != "" & - mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") |> - mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE), - mo_species(mo, keep_synonyms = TRUE)), - keep_synonyms = TRUE), - name = mo_name(mo, keep_synonyms = TRUE)) + mo_species(mo, keep_synonyms = TRUE) != "" & + mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") |> + mutate( + mo = as.mo( + paste( + mo_genus(mo, keep_synonyms = TRUE), + mo_species(mo, keep_synonyms = TRUE) + ), + keep_synonyms = TRUE + ), + name = mo_name(mo, keep_synonyms = TRUE) + ) organisms <- organisms |> filter(!code %in% subspp$code) |> bind_rows(subspp) |> arrange(code) # add the groups -organisms <- organisms |> - bind_rows(tibble(code = organisms |> filter(!is.na(group)) |> pull(group) |> unique(), - group = NA, - mo = organisms |> filter(!is.na(group)) |> pull(group) |> unique() |> as.mo(keep_synonyms = TRUE), - name = mo_name(mo, keep_synonyms = TRUE))) |> - arrange(code, group) |> - select(-group) |> +organisms <- organisms |> + bind_rows(tibble( + code = organisms |> filter(!is.na(group)) |> pull(group) |> unique(), + group = NA, + mo = organisms |> filter(!is.na(group)) |> pull(group) |> unique() |> as.mo(keep_synonyms = TRUE), + name = mo_name(mo, keep_synonyms = TRUE) + )) |> + arrange(code, group) |> + select(-group) |> distinct() # no XXX organisms <- organisms |> filter(code != "XXX") @@ -153,7 +169,7 @@ organisms <- organisms |> filter(code != "XXX") # 2025-04-20 still the case # 2026-03-27 still the case, but fixed using `existing_codes` above organisms |> filter(code == "SGM") -# organisms <- organisms |> +# organisms <- organisms |> # filter(!(code == "SGM" & name %like% "Streptococcus")) # this must be empty: organisms$code[organisms$code |> duplicated()] @@ -165,12 +181,12 @@ saveRDS(organisms, "data-raw/organisms.rds", version = 2) #--- # update microorganisms.codes with the latest WHONET codes -microorganisms.codes2 <- microorganisms.codes |> +microorganisms.codes2 <- microorganisms.codes |> # remove all old WHONET codes, whether we (in the end) keep them or not - filter(!toupper(code) %in% toupper(organisms$code)) |> + filter(!toupper(code) %in% toupper(organisms$code)) |> # and add the new ones - bind_rows(organisms |> select(code, mo)) |> - arrange(code) |> + bind_rows(organisms |> select(code, mo)) |> + arrange(code) |> distinct(code, .keep_all = TRUE) # new codes: microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)] @@ -214,47 +230,53 @@ devtools::load_all() # now that we have the correct MO codes, get the breakpoints and convert them -whonet_breakpoints_raw |> - count(GUIDELINES, BREAKPOINT_TYPE) |> - pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> +whonet_breakpoints_raw |> + count(GUIDELINES, BREAKPOINT_TYPE) |> + pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> janitor::adorn_totals(where = c("row", "col")) -whonet_breakpoints_raw |> +whonet_breakpoints_raw |> filter(YEAR == format(Sys.Date(), "%Y")) |> - count(GUIDELINES, YEAR, BREAKPOINT_TYPE) |> - pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> + count(GUIDELINES, YEAR, BREAKPOINT_TYPE) |> + pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> janitor::adorn_totals(where = c("row", "col")) # compared to current AMR::clinical_breakpoints |> count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |> arrange(tolower(type)) |> - pivot_wider(names_from = type, values_from = n) |> + pivot_wider(names_from = type, values_from = n) |> as.data.frame() |> janitor::adorn_totals(where = c("row", "col")) breakpoints <- whonet_breakpoints_raw |> mutate(code = toupper(ORGANISM_CODE)) |> - left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")), - # GEN (Generic) and ALL (All) are PK/PD codes - data.frame(code = c("ALL", "GEN"), - mo = rep(as.mo("UNKNOWN"), 2)))) + left_join(bind_rows( + microorganisms.codes |> filter(!code %in% c("ALL", "GEN")), + # GEN (Generic) and ALL (All) are PK/PD codes + data.frame( + code = c("ALL", "GEN"), + mo = rep(as.mo("UNKNOWN"), 2) + ) + )) # these ones lack an MO name, they cannot be used: unknown <- breakpoints |> filter(is.na(mo)) |> pull(code) |> unique() -breakpoints |> - filter(code %in% unknown) |> +breakpoints |> + filter(code %in% unknown) |> count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE) # 2025-04-20: these codes are currently: cps, fso. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them: -breakpoints <- breakpoints |> +breakpoints <- breakpoints |> filter(!is.na(mo)) # and these ones have unknown antibiotics according to WHONET itself: -breakpoints |> - filter(!WHONET_ABX_CODE %in% whonet_antibiotics_raw$WHONET_ABX_CODE) |> +breakpoints |> + filter(!WHONET_ABX_CODE %in% whonet_antibiotics_raw$WHONET_ABX_CODE) |> count(GUIDELINES, WHONET_ABX_CODE) |> - mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE), - ab_name = ab_name(ab)) + mutate( + ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE), + ab_name = ab_name(ab) + ) # 2025-04-20: these codes are currently: CFC, ROX, FIX, and N/A. All have the right replacements in `antimicrobials`, so we can safely use as.ab() later on # the NAs are for M. tuberculosis, they are empty breakpoints breakpoints <- breakpoints |> @@ -264,7 +286,7 @@ breakpoints <- breakpoints |> ## Build new breakpoints table ---- breakpoints_new <- breakpoints |> - filter(!is.na(WHONET_ABX_CODE)) |> + filter(!is.na(WHONET_ABX_CODE)) |> transmute( guideline = paste(GUIDELINES, YEAR), type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)), @@ -301,22 +323,26 @@ breakpoints_new <- breakpoints |> distinct(guideline, type, host, ab, mo, method, site, breakpoint_S, .keep_all = TRUE) # fix reference table names -breakpoints_new |> filter(guideline %like% "EUCAST", is.na(ref_tbl)) |> View() -breakpoints_new <- breakpoints_new |> - mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl), - is.na(ref_tbl) ~ "Unknown", - TRUE ~ ref_tbl)) +breakpoints_new |> + filter(guideline %like% "EUCAST", is.na(ref_tbl)) |> + View() +breakpoints_new <- breakpoints_new |> + mutate(ref_tbl = case_when( + is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl), + is.na(ref_tbl) ~ "Unknown", + TRUE ~ ref_tbl + )) # clean disk zones breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE])) breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE])) # regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses -breakpoints_new |> +breakpoints_new |> filter(host %like% "foal") |> count(guideline, host, ab) -breakpoints_new <- breakpoints_new |> - filter(host %unlike% "foal") |> +breakpoints_new <- breakpoints_new |> + filter(host %unlike% "foal") |> mutate(host = ifelse(host %like% "horse", "horse", host)) # FIXES FOR WHONET ERRORS ---- @@ -324,8 +350,12 @@ m <- unique(as.double(as.mic(levels(as.mic(1))))) # WHONET has no >1024 but instead uses 1025, 513, and 129, so as.mic() cannot be used to clean. # instead, raise these one higher valid MIC factor level: -breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S) -breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R) +breakpoints_new |> + filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> + distinct(breakpoint_S) +breakpoints_new |> + filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> + distinct(breakpoint_R) breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1] breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1] breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1] @@ -353,12 +383,12 @@ breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$m breakpoints_new |> filter(method == "MIC" & guideline %like% "EUCAST" & mo %like% as.mo("B_HMPHL")) |> count(guideline, mo) -breakpoints_new <- breakpoints_new |> +breakpoints_new <- breakpoints_new |> bind_rows( breakpoints_new |> - filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") |> + filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") |> mutate(mo = as.mo("B_HMPHL_PRNF")) - ) |> + ) |> arrange(desc(guideline), mo, ab, type, host, method) |> distinct() # Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed @@ -387,7 +417,9 @@ breakpoints_new <- breakpoints_new |> filter(!wrong) # 2025-04-20/ fixed now # WHONET sets for EUCAST 2026 TMP breakpoints for all Klebsiella, but this is now only for non-aerogenes species -kleb_spp <- microorganisms |> filter(rank == "species", genus == "Klebsiella", !species %in% c("", "aerogenes")) |> pull(mo) +kleb_spp <- microorganisms |> + filter(rank == "species", genus == "Klebsiella", !species %in% c("", "aerogenes")) |> + pull(mo) kleb_tmp_mic <- breakpoints_new |> filter(guideline == "EUCAST 2026", method == "MIC", ab == "TMP", mo == as.mo("Klebsiella")) |> uncount(length(kleb_spp)) |> @@ -398,8 +430,10 @@ kleb_tmp_disk <- breakpoints_new |> mutate(mo = kleb_spp) breakpoints_new <- breakpoints_new |> filter(!(guideline == "EUCAST 2026" & method == "MIC" & ab == "TMP" & mo == as.mo("Klebsiella"))) |> - bind_rows(kleb_tmp_mic, - kleb_tmp_disk) + bind_rows( + kleb_tmp_mic, + kleb_tmp_disk + ) # WHONET contains wrong EUCAST breakpoints for enterococci/SXT: disk should be 23/23, not 21/50, and MIC should be 1/1, not 0.032/1 # applies to all previous years, since v11 (2011) @@ -441,14 +475,14 @@ breakpoints_new <- breakpoints_new |> # check the strange duplicates -breakpoints_new |> +breakpoints_new |> mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>% - filter(id %in% .$id[which(duplicated(id))]) |> + filter(id %in% .$id[which(duplicated(id))]) |> arrange(desc(guideline)) |> View() # 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints_raw df, we have to remove duplicates # 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025) -breakpoints_new <- breakpoints_new |> +breakpoints_new <- breakpoints_new |> distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE) @@ -469,7 +503,7 @@ dim(clinical_breakpoints) # SAVE TO PACKAGE ---- # determine rank again now that some changes were made on taxonomic level (genus -> species) -breakpoints_new <- breakpoints_new |> +breakpoints_new <- breakpoints_new |> mutate(rank_index = case_when( mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1, mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2, diff --git a/data-raw/_reproduction_scripts/reproduction_of_microorganisms.R b/data-raw/_reproduction_scripts/reproduction_of_microorganisms.R index 681234bf2..be874f3e2 100644 --- a/data-raw/_reproduction_scripts/reproduction_of_microorganisms.R +++ b/data-raw/_reproduction_scripts/reproduction_of_microorganisms.R @@ -649,7 +649,9 @@ taxonomy_mycobank <- taxonomy_mycobank %>% arrange(fullname) taxonomy_mycobank %>% count(rank, sort = TRUE) -taxonomy_mycobank %>% filter(rank %like% "#") %>% count(rank) +taxonomy_mycobank %>% + filter(rank %like% "#") %>% + count(rank) taxonomy_mycobank3 <- taxonomy_mycobank @@ -2546,7 +2548,9 @@ taxonomy %>% arrange(mo) %>% View() # keep the firsts -taxonomy <- taxonomy %>% arrange(mo) %>% distinct(mo, .keep_all = TRUE) +taxonomy <- taxonomy %>% + arrange(mo) %>% + distinct(mo, .keep_all = TRUE) # are fullnames unique? taxonomy %>% @@ -2997,7 +3001,9 @@ taxonomy$rank[which(taxonomy$fullname %like% "unknown")] <- "(unknown rank)" # this happened in early 2025, check that MO codes do not have repeated elements # fixed it then like this: microorganisms$mo <- gsub("B_SCLLM_CNNM_LNSM_LNSM_LNSM_LNSM", "B_SCLLM_CNNM", microorganisms$mo) -taxonomy |> filter(mo %like% "_.*_.*_.*_") |> View() +taxonomy |> + filter(mo %like% "_.*_.*_.*_") |> + View() fix_old_mos <- function(dataset) { @@ -3085,7 +3091,9 @@ microorganisms <- taxonomy # set class class(microorganisms$mo) <- c("mo", "character") -microorganisms <- microorganisms %>% arrange(fullname) %>% df_remove_nonASCII() +microorganisms <- microorganisms %>% + arrange(fullname) %>% + df_remove_nonASCII() usethis::use_data( microorganisms, overwrite = TRUE, diff --git a/data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R b/data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R index 7463b6b6a..dc1f6bdca 100644 --- a/data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R +++ b/data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R @@ -59,72 +59,101 @@ whonet_organisms <- whonet_organisms %>% mutate( # this one was called Issatchenkia orientalis, but it should be: ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM) - ) %>% + ) %>% # try to match on GBIF identifier - left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>% + left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>% # remove duplicates arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>% - distinct(ORGANISM_CODE, .keep_all = TRUE) %>% + distinct(ORGANISM_CODE, .keep_all = TRUE) %>% # add Enterobacterales, which is a subkingdom code in their data - bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>% + bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>% arrange(ORGANISM) # check non-existing species groups in the microorganisms table whonet_organisms %>% filter(!is.na(SPECIES_GROUP)) %>% group_by(SPECIES_GROUP) %>% - summarise(complex = ORGANISM[ORGANISM %like% " (group|complex)"][1], - organisms = paste0(n(), ": ", paste(sort(unique(ORGANISM)), collapse = ", "))) %>% + summarise( + complex = ORGANISM[ORGANISM %like% " (group|complex)"][1], + organisms = paste0(n(), ": ", paste(sort(unique(ORGANISM)), collapse = ", ")) + ) %>% filter(!SPECIES_GROUP %in% microorganisms.codes$code) # create the species group data set ---- microorganisms.groups <- whonet_organisms %>% # these will not be translated well - filter(!ORGANISM %in% c("Trueperella pyogenes-like bacteria", - "Mycobacterium suricattae", - "Mycobacterium canetti")) %>% + filter(!ORGANISM %in% c( + "Trueperella pyogenes-like bacteria", + "Mycobacterium suricattae", + "Mycobacterium canetti" + )) %>% filter(!is.na(SPECIES_GROUP), SPECIES_GROUP != ORGANISM_CODE) %>% - transmute(mo_group = as.mo(SPECIES_GROUP), - mo = ifelse(is.na(mo), - as.character(as.mo(ORGANISM, keep_synonyms = TRUE, minimum_matching_score = 0)), - mo)) %>% + transmute( + mo_group = as.mo(SPECIES_GROUP), + mo = ifelse(is.na(mo), + as.character(as.mo(ORGANISM, keep_synonyms = TRUE, minimum_matching_score = 0)), + mo + ) + ) %>% # add our own CoNS and CoPS, WHONET does not strictly follow Becker et al. (2014, 2019, 2020) - filter(mo_group != as.mo("CoNS")) %>% - bind_rows(tibble(mo_group = as.mo("CoNS"), mo = MO_CONS)) %>% - filter(mo_group != as.mo("CoPS")) %>% - bind_rows(tibble(mo_group = as.mo("CoPS"), mo = MO_COPS)) %>% + filter(mo_group != as.mo("CoNS")) %>% + bind_rows(tibble(mo_group = as.mo("CoNS"), mo = MO_CONS)) %>% + filter(mo_group != as.mo("CoPS")) %>% + bind_rows(tibble(mo_group = as.mo("CoPS"), mo = MO_COPS)) %>% # at least all our Lancefield-grouped streptococci must be in the beta-haemolytic group: - bind_rows(tibble(mo_group = as.mo("Beta-haemolytic streptococcus"), - mo = c(MO_LANCEFIELD, - microorganisms %>% filter(fullname %like% "^Streptococcus Group") %>% pull(mo)))) %>% + bind_rows(tibble( + mo_group = as.mo("Beta-haemolytic streptococcus"), + mo = c( + MO_LANCEFIELD, + microorganisms %>% filter(fullname %like% "^Streptococcus Group") %>% pull(mo) + ) + )) %>% # and per Streptococcus group as well: # group A - S. pyogenes - bind_rows(tibble(mo_group = as.mo("Streptococcus Group A"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_PYGN(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group A"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_PYGN(_|$)")] + )) %>% # group B - S. agalactiae - bind_rows(tibble(mo_group = as.mo("Streptococcus Group B"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_AGLC(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group B"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_AGLC(_|$)")] + )) %>% # group C - all subspecies within S. dysgalactiae and S. equi (such as S. equi zooepidemicus) - bind_rows(tibble(mo_group = as.mo("Streptococcus Group C"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(DYSG|EQUI)(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group C"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(DYSG|EQUI)(_|$)")] + )) %>% # group F - Milleri group == S. anginosus group, which incl. S. anginosus, S. constellatus, S. intermedius - bind_rows(tibble(mo_group = as.mo("Streptococcus Group F"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(ANGN|CNST|INTR)(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group F"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(ANGN|CNST|INTR)(_|$)")] + )) %>% # group G - S. dysgalactiae and S. canis (though dysgalactiae is also group C and will be matched there) - bind_rows(tibble(mo_group = as.mo("Streptococcus Group G"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(DYSG|CANS)(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group G"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(DYSG|CANS)(_|$)")] + )) %>% # group H - S. sanguinis - bind_rows(tibble(mo_group = as.mo("Streptococcus Group H"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_SNGN(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group H"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_SNGN(_|$)")] + )) %>% # group K - S. salivarius, incl. S. salivarius salivariuss and S. salivarius thermophilus - bind_rows(tibble(mo_group = as.mo("Streptococcus Group K"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_SLVR(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group K"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_SLVR(_|$)")] + )) %>% # group L - only S. dysgalactiae - bind_rows(tibble(mo_group = as.mo("Streptococcus Group L"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_DYSG(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group L"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_DYSG(_|$)")] + )) %>% # and for EUCAST: Strep group A, B, C, G - bind_rows(tibble(mo_group = as.mo("Streptococcus Group A, B, C, G"), - mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(PYGN|AGLC|DYSG|EQUI|CANS|GRPA|GRPB|GRPC|GRPG)(_|$)")])) %>% + bind_rows(tibble( + mo_group = as.mo("Streptococcus Group A, B, C, G"), + mo = microorganisms$mo[which(microorganisms$mo %like% "^B_STRPT_(PYGN|AGLC|DYSG|EQUI|CANS|GRPA|GRPB|GRPC|GRPG)(_|$)")] + )) %>% # HACEK is: # - Haemophilus species # - Aggregatibacter species @@ -133,38 +162,46 @@ microorganisms.groups <- whonet_organisms %>% # - Kingella species # - and previously Actinobacillus actinomycetemcomitans # see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3656887/ - filter(mo_group != as.mo("HACEK")) %>% - bind_rows(tibble(mo_group = as.mo("HACEK"), mo = microorganisms %>% filter(genus == "Haemophilus") %>% pull(mo))) %>% - bind_rows(tibble(mo_group = as.mo("HACEK"), mo = microorganisms %>% filter(genus == "Aggregatibacter") %>% pull(mo))) %>% + filter(mo_group != as.mo("HACEK")) %>% + bind_rows(tibble(mo_group = as.mo("HACEK"), mo = microorganisms %>% filter(genus == "Haemophilus") %>% pull(mo))) %>% + bind_rows(tibble(mo_group = as.mo("HACEK"), mo = microorganisms %>% filter(genus == "Aggregatibacter") %>% pull(mo))) %>% bind_rows(tibble(mo_group = as.mo("HACEK"), mo = as.mo("Cardiobacterium hominis", keep_synonyms = TRUE))) %>% bind_rows(tibble(mo_group = as.mo("HACEK"), mo = as.mo("Eikenella corrodens", keep_synonyms = TRUE))) %>% bind_rows(tibble(mo_group = as.mo("HACEK"), mo = microorganisms %>% filter(genus == "Kingella") %>% pull(mo))) %>% bind_rows(tibble(mo_group = as.mo("HACEK"), mo = as.mo("Actinobacillus actinomycetemcomitans", keep_synonyms = TRUE))) %>% # Citrobacter freundii complex in the NCBI Taxonomy Browser: # https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1344959 - filter(mo_group != "B_CTRBC_FRND-C") %>% - bind_rows(tibble(mo_group = as.mo("B_CTRBC_FRND-C"), - mo = paste("Citrobacter", c("freundii", "braakii", "gillenii", "murliniae", "portucalensis", "sedlakii", "werkmanii", "youngae")) %>% as.mo(keep_synonyms = TRUE))) %>% + filter(mo_group != "B_CTRBC_FRND-C") %>% + bind_rows(tibble( + mo_group = as.mo("B_CTRBC_FRND-C"), + mo = paste("Citrobacter", c("freundii", "braakii", "gillenii", "murliniae", "portucalensis", "sedlakii", "werkmanii", "youngae")) %>% as.mo(keep_synonyms = TRUE) + )) %>% # Klebsiella pneumoniae complex - filter(mo_group != "B_KLBSL_PNMN-C") %>% - bind_rows(tibble(mo_group = as.mo("B_KLBSL_PNMN-C"), - mo = paste("Klebsiella", c("africana", "pneumoniae", "quasipneumoniae", "quasivariicola", "variicola")) %>% as.mo(keep_synonyms = TRUE))) %>% + filter(mo_group != "B_KLBSL_PNMN-C") %>% + bind_rows(tibble( + mo_group = as.mo("B_KLBSL_PNMN-C"), + mo = paste("Klebsiella", c("africana", "pneumoniae", "quasipneumoniae", "quasivariicola", "variicola")) %>% as.mo(keep_synonyms = TRUE) + )) %>% # Yersinia pseudotuberculosis complex in the NCBI Taxonomy Browser: # https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1649845 - filter(mo_group != "B_YERSN_PSDT-C") %>% - bind_rows(tibble(mo_group = as.mo("B_YERSN_PSDT-C"), - mo = paste("Yersinia", c("pseudotuberculosis", "pestis", "similis", "wautersii")) %>% as.mo(keep_synonyms = TRUE))) %>% + filter(mo_group != "B_YERSN_PSDT-C") %>% + bind_rows(tibble( + mo_group = as.mo("B_YERSN_PSDT-C"), + mo = paste("Yersinia", c("pseudotuberculosis", "pestis", "similis", "wautersii")) %>% as.mo(keep_synonyms = TRUE) + )) %>% # RGM are Rapidly-growing Mycobacteria, see https://pubmed.ncbi.nlm.nih.gov/28084211/ - filter(mo_group != "B_MYCBC_RGM") %>% - bind_rows(tibble(mo_group = as.mo("B_MYCBC_RGM"), - mo = paste("Mycobacterium", c( "abscessus abscessus", "abscessus bolletii", "abscessus massiliense", "agri", "aichiense", "algericum", "alvei", "anyangense", "arabiense", "aromaticivorans", "aubagnense", "aubagnense", "aurum", "austroafricanum", "bacteremicum", "boenickei", "bourgelatii", "brisbanense", "brumae", "canariasense", "celeriflavum", "chelonae", "chitae", "chlorophenolicum", "chubuense", "confluentis", "cosmeticum", "crocinum", "diernhoferi", "duvalii", "elephantis", "fallax", "flavescens", "fluoranthenivorans", "fortuitum", "franklinii", "frederiksbergense", "gadium", "gilvum", "goodii", "hassiacum", "hippocampi", "hodleri", "holsaticum", "houstonense", "immunogenum", "insubricum", "iranicum", "komossense", "litorale", "llatzerense", "madagascariense", "mageritense", "monacense", "moriokaense", "mucogenicum", "mucogenicum", "murale", "neoaurum", "neworleansense", "novocastrense", "obuense", "pallens", "parafortuitum", "peregrinum", "phlei", "phocaicum", "phocaicum", "porcinum", "poriferae", "psychrotolerans", "pyrenivorans", "rhodesiae", "rufum", "rutilum", "salmoniphilum", "sediminis", "senegalense", "septicum", "setense", "smegmatis", "sphagni", "thermoresistibile", "tokaiense", "vaccae", "vanbaalenii", "wolinskyi")) %>% as.mo(keep_synonyms = TRUE))) + filter(mo_group != "B_MYCBC_RGM") %>% + bind_rows(tibble( + mo_group = as.mo("B_MYCBC_RGM"), + mo = paste("Mycobacterium", c("abscessus abscessus", "abscessus bolletii", "abscessus massiliense", "agri", "aichiense", "algericum", "alvei", "anyangense", "arabiense", "aromaticivorans", "aubagnense", "aubagnense", "aurum", "austroafricanum", "bacteremicum", "boenickei", "bourgelatii", "brisbanense", "brumae", "canariasense", "celeriflavum", "chelonae", "chitae", "chlorophenolicum", "chubuense", "confluentis", "cosmeticum", "crocinum", "diernhoferi", "duvalii", "elephantis", "fallax", "flavescens", "fluoranthenivorans", "fortuitum", "franklinii", "frederiksbergense", "gadium", "gilvum", "goodii", "hassiacum", "hippocampi", "hodleri", "holsaticum", "houstonense", "immunogenum", "insubricum", "iranicum", "komossense", "litorale", "llatzerense", "madagascariense", "mageritense", "monacense", "moriokaense", "mucogenicum", "mucogenicum", "murale", "neoaurum", "neworleansense", "novocastrense", "obuense", "pallens", "parafortuitum", "peregrinum", "phlei", "phocaicum", "phocaicum", "porcinum", "poriferae", "psychrotolerans", "pyrenivorans", "rhodesiae", "rufum", "rutilum", "salmoniphilum", "sediminis", "senegalense", "septicum", "setense", "smegmatis", "sphagni", "thermoresistibile", "tokaiense", "vaccae", "vanbaalenii", "wolinskyi")) %>% as.mo(keep_synonyms = TRUE) + )) # add subspecies to all species for (group in unique(microorganisms.groups$mo_group)) { spp <- microorganisms.groups %>% - filter(mo_group == group & mo_rank(mo, keep_synonyms = TRUE) == "species") %>% - pull(mo) %>% - paste0(collapse = "|") %>% + filter(mo_group == group & mo_rank(mo, keep_synonyms = TRUE) == "species") %>% + pull(mo) %>% + paste0(collapse = "|") %>% paste0("^(", ., ")") mos <- microorganisms %>% filter(mo %like% spp & rank == "subspecies") %>% @@ -175,9 +212,11 @@ for (group in unique(microorganisms.groups$mo_group)) { # add full names, arrange and clean microorganisms.groups <- microorganisms.groups %>% - mutate(mo_group_name = mo_name(mo_group, keep_synonyms = TRUE, language = NULL), - mo_name = mo_name(mo, keep_synonyms = TRUE, language = NULL)) %>% - arrange(mo_group_name, mo_name) %>% + mutate( + mo_group_name = mo_name(mo_group, keep_synonyms = TRUE, language = NULL), + mo_name = mo_name(mo, keep_synonyms = TRUE, language = NULL) + ) %>% + arrange(mo_group_name, mo_name) %>% filter(mo_group != mo) %>% distinct() %>% dataset_UTF8_to_ASCII() diff --git a/data-raw/extractATCs.R b/data-raw/extractATCs.R index 8d769ac83..d664cb666 100644 --- a/data-raw/extractATCs.R +++ b/data-raw/extractATCs.R @@ -68,9 +68,11 @@ new_ab <- complete_tbl |> ) |> mutate(name = paste0(substr(toupper(name), 1, 1), substr(name, 2, 999))) |> mutate(name = gsub(" and ", "/", name)) |> - filter(name %unlike% "^Combinations", - name %unlike% "/beta[-]lactamase inhibitor", - name %unlike% "combinations") |> + filter( + name %unlike% "^Combinations", + name %unlike% "/beta[-]lactamase inhibitor", + name %unlike% "combinations" + ) |> arrange(name) new_atcs <- new_ab |> diff --git a/data-raw/loinc.R b/data-raw/loinc.R index 26ddc72e1..4dc5cc8b5 100644 --- a/data-raw/loinc.R +++ b/data-raw/loinc.R @@ -31,7 +31,7 @@ # Steps to reproduce: # 1. Create a fake account at https://loinc.org (sad you have to create one...) -# 2. Download the CSV from https://loinc.org/download/loinc-complete/ +# 2. Download the CSV from https://loinc.org/download/loinc-complete/ # 3. Read file LoincTable/Loinc.csv loinc_df <- read.csv("data-raw/Loinc.csv", row.names = NULL, @@ -47,7 +47,7 @@ loinc_df %>% filter(COMPONENT %like% "ampicillin|fluconazol|meropenem") %>% count(CLASS, sort = TRUE) loinc_df <- loinc_df %>% - filter(CLASS %in% c("DRUG/TOX", "ABXBACT")) %>% + filter(CLASS %in% c("DRUG/TOX", "ABXBACT")) %>% mutate(name = generalise_antibiotic_name(COMPONENT), .before = 1) # antimicrobials diff --git a/data-raw/sensititre_ab.R b/data-raw/sensititre_ab.R index 87c64a1ed..898ac75d3 100644 --- a/data-raw/sensititre_ab.R +++ b/data-raw/sensititre_ab.R @@ -1,470 +1,470 @@ codes <- tibble::tribble( ~code, ~name, - "ABT773", "Abbott 773", - "AESCUL", "Aesculin", - "AGMATI", "Agmatine", - "AMDPEN", "Amidinopenicillin subclass", - "AMICYC", "Aminocyclitol class", - "AMIFLO", "Amifloxacin", - "AMIGLY", "Aminoglycoside class", - "AMIKAC", "Amikacin", - "AMIPEN", "Aminopenicillin subclass", - "AMOCL2", "Amoxicillin/ Clav.Acid */2", - "AMOCL4", "Amoxicillin/ Clav. Acid 4:1", - "AMOCLA", "Amoxicillin/ Clavulanic Acid", - "AMOXIC", "Amoxicillin", - "AMP100", "Ampicillin 100 ug/ml", - "AMP200", "Ampicillin 200 ug/ml", - "AMPHOT", "Amphotericin B", - "AMPICI", "Ampicillin", - "AMPSUL", "Ampicillin/ Sulbactam", - "ANIDUL", "Anidulafungin", - "ANSAMY", "Rifabutin", - "ANSMYC", "Ansamycin class", - "APALCI", "Apalcillin", - "APOXIC", "Apoxicillin", - "APRAMY", "Apramycin", - "ARABIN", "Arabinose", - "ARABIT", "Arabitol", - "ARBEKA", "Arbekacin", - "ARGINI", "Arginine", - "ASPOXI", "Aspoxicillin", - "ASTROM", "Astromycin", - "AVILAM", "Avilamycin", - "AZD256", "AZD2563", - "AZITHR", "Azithromycin", - "AZLOCI", "Azlocillin", - "AZT1", "Aztreonam 1 ug/ml", - "AZTREO", "Aztreonam", - "BACAMP", "Bacampicillin", - "BACITR", "Bacitracin", - "BAMMYC", "Bambermycin class", - "BAY12", "BAY12-8039", - "BERBER", "Berberine", - "BESIFL", "Besifloxacin", - "BETA", "Beta-lactamase", - "B", "HAEM Beta-haemolysis", - "BIAPEN", "Biapenem (L-627)", - "BLACT", "Beta-lactam class", - "BLINHB", "Beta-lactam Inhibitor class", - "B", "MGLU B-Methyl Glucoside", - "CAPREO", "Capreomycin", - "CAPRYL", "Caprylic Acid", - "CARBAD", "Carbadox", - "CARBAP", "Carbapenem class", - "CARBEN", "Carbenicillin", - "CARPEN", "Carboxypenicillin subclass", - "CASPOF", "Caspofungin", - "CATALA", "Catalase", - "CCARB", "Carbacephem subclass", - "CEFACL", "Cefaclor", - "CEFADR", "Cefadroxil", - "CEFAMA", "Cefamandole", - "CEFATR", "Cefatrizine", - "CEFAXE", "Cefuroxime (axetil)", - "CEFAZE", "Cefazedon", - "CEFAZO", "Cefazolin", - "CEFBUP", "Cefbuperazone", - "CEFCAP", "Cefcapene", - "CEFCLA", "Cefepime/ Clavulanic Acid", - "CEFCLI", "Cefclidin", - "CEFDIN", "Cefdinir", - "CEFDIT", "Cefditoren", - "CEFEP4", "Cefepime 4 ug/ml", - "CEFEPI", "Cefepime", - "CEFETA", "Cefetamet", - "CEFIXI", "Cefixime", - "CEFMEN", "Cefmenoxime", - "CEFMET", "Cefmetazole", - "CEFMIN", "Cefminox", - "CEFMTM", "Cefmetamet", - "CEFO32", "Cefotaxime 32 ug/ml", - "CEFONI", "Cefonicid", - "CEFOPE", "Cefoperazone", - "CEFORA", "Ceforanide", - "CEFOSE", "Cefoselis", - "CEFOTA", "Cefotaxime", - "CEFOTE", "Cefotetan", - "CEFOTI", "Cefotiam", - "CEFOVE", "Cefovecin", - "CEFOXI", "Cefoxitin", - "CEFOZO", "Cefozopran", - "CEFPAM", "Cefpiramide", - "CEFPIM", "Cefpimizole", - "CEFPOD", "Cefpodoxime", - "CEFPOM", "Cefpirome", - "CEFPRO", "Cefprozil", - "CEFQUI", "Cefquinome", - "CEFROX", "Cefroxidime", - "CEFSUL", "Cefsulodin", - "CEFTAR", "Ceftaroline", - "CEFTAZ", "Ceftazidime", - "CEFTER", "Cefteram", - "CEFTEZ", "Ceftezole", - "CEFTIB", "Ceftibuten", - "CEFTIF", "Ceftiofur", - "CEFTIX", "Ceftioxadine", - "CEFTIZ", "Ceftizoxime", - "CEFTOB", "Ceftobiprole", - "CEFTRI", "Ceftriaxone", - "CEFURO", "Cefuroxime (sodium)", - "CEFUZO", "Cefuzonam", - "CELLOB", "Cellobiose", - "CEPALE", "Cefalexin", - "CEPHAC", "Cephacetril", - "CEPHAL", "Cephalothin", - "CEPHAP", "Cephapirin", - "CEPHEM", "Cephem class", - "CEPHOR", "Cephem (oral) class", - "CEPHPA", "Cephem (parenteral) class", - "CEPHRA", "Cephradine", - "CEPLOR", "Cephaloridine", - "CHLORA", "Chloramphenicol", - "CHLTET", "Chlortetracycline", - "CI983", "CI-983", - "CINOXA", "Cinoxacin", - "CIPROF", "Ciprofloxacin", - "CIPROP", "CIPROP", - "CITRAT", "Citrate", - "CLARYT", "Clarithromycin", - "CLIN32", "Clindamycin 32 ug/ml", - "CLINAF", "Clinafloxacin", - "CLINDA", "Clindamycin", - "CLISPE", "Clindamycin/ Spectinomycin", - "CLOFAM", "Clofazimine", - "CLOXAC", "Cloxacillin", - "CMYC", "Cephamycin subclass", - "COAGUL", "Coagulase", - "COLFAZ", "Colfazamine", - "COLIST", "Colistin", - "COLMET", "Colistimethate", - "COMBO", "Combination class", - "CORAL", "Cephem (oral) class", - "COUMER", "Coumermycin", - "COXA", "Oxacephem subclass", - "CPAREN", "Cephem (parenteral) class", - "CPCA", "Cond. Pyridone Carboxylic Acid class", - "CSPOR", "Cephalosporin class", - "CSPOR1", "Cephalosporin I-Generation subclass", - "CSPOR2", "Cephalosporin II-Generation subclass", - "CSPOR3", "Cephalosporin III-Generation subclass", - "CSPOR4", "Cephalosporin IV-Generation subclass", - "CSPOR5", "Cephalosporin V-Generation subclass", - "CYCLAC", "Cyclacillin", - "CYCLOS", "Cycloserine", - "DALBAV", "Dalbavancin", - "DALFOP", "Dalfopristin", - "DANOFL", "Danofloxacin", - "DAPT25", "Daptomycin 25mg/L Ca", - "DAPT50", "Daptomycin 50mg/L Ca", - "DAPTOM", "Daptomycin", - "DEMECY", "Demeclocycline", - "DIBEKA", "Dibekacin", - "DICLOX", "Dicloxacillin", - "DIFLOX", "Difloxacin", - "DIRITH", "Dirithromycin", - "DORIPE", "Doripenem", - "DOXYCY", "Doxycycline", - "DTEST1", "DTest1", - "DTEST2", "DTest2", - "ENOXA", "Enoxacin", - "ENROFL", "Enrofloxacin", - "ERTAPE", "Ertapenem", - "ERY32", "Erythromycin 32 ug/ml", - "ERYSCH", "Erythromycin/ Sulphachloropyrid", - "ERYSDI", "Erythromycin/ Sulphadimethoxine", - "ERYSPE", "Erythromycin/ Spectinomycin", - "ERYSUL", "Erythromycin/ Sulfizoxazole", - "ERYTH", "Erythromycin", - "ESBL", "Extended spectrum beta-lactamase", - "ETHAMB", "Ethambutol", - "ETHION", "Ethionamide", - "FAROPE", "Faropenem", - "FLAVOM", "Flavomycin", - "FLEROX", "Fleroxacin", - "FLOMOX", "Flomoxef", - "FLORFE", "Florfenicol", - "FLQUIN", "Fluoroquinolone class", - "FLUCLO", "Flucloxacillin", - "FLUCON", "Fluconazole", - "FLUCYT", "5-Flucytosine", - "FLUMEQ", "Flumequine", - "FOPSUL", "Cefoperazone/ Sulbactam", - "FOSFOM", "Fosfomycin", - "FOSG6P", "Fosfomycin + Glucose6Phosphate", - "FOSMYC", "Fosfomycin class", - "FOSTRO", "Fosfomycin-trometamol", - "FOT1", "Cefotaxime 1 ug/ml", - "FOXSCR", "Cefoxitin Screen Test", - "FPINHB", "Folate Pathway Inhibitor class", - "FR1", "FR1", - "FR10", "FR10", - "FR12", "FR12", - "FR13", "FR13", - "FR14", "FR14", - "FR15", "FR15", - "FR16", "FR16", - "FR17", "FR17", - "FR18", "FR18", - "FR19", "FR19", - "FR20", "FR20", - "FR21", "FR21", - "FR22", "FR22", - "FR23", "FR23", - "FR24", "FR24", - "FR25", "FR25", - "FR26", "FR26", - "FR27", "FR27", - "FR28", "FR28", - "FR29", "FR29", - "FR3", "FR3", - "FR30", "FR30", - "FR31", "FR31", - "FR32", "FR32", - "FR5", "FR5", - "FR6", "FR6", - "FR7", "FR7", - "FR8", "FR8", - "FR9", "FR9", - "FRAMYC", "Framycetin", - "FRUCTO", "Fructose", - "FURALT", "Furaltadone", - "FURAZO", "Furazolidone", - "FUSACI", "Fusidic Acid", - "FUSIDA", "Fusidate", - "GARENO", "Garenoxacin", - "GARLIC", "Garlic", - "GATIFL", "Gatifloxacin", - "GE1000", "Gentamicin 1000 ug/ml", - "GE2000", "Gentamicin 2000 ug/ml", - "GEMIFL", "Gemifloxacin", - "GEN128", "Gentamicin 128 ug/ml", - "GEN500", "Gentamicin 500 ug/ml", - "GENTA1", "Gentamicin 1024 ug/ml", - "GENTAM", "Gentamicin", - "GLUCOS", "Glucose", - "GLYCER", "Glycerol", - "GLYCO", "Glycopeptide class", - "GREPAF", "Grepafloxacin", - "HETACI", "Hetacillin", - "HIPPUR", "Hippurate hydrolysis", - "HODGE", "Hodge Test", - "IB367", "IB-367", - "IBAFLO", "Ibafloxacin", - "ICLAPR", "Iclaprim", - "IMIDAZ", "Imidazole class", - "IMIP32", "Imipenem 32 ug/ml", - "IMIPEN", "Imipenem", - "INDOLE", "Indole", - "INOSIT", "Inositol", - "ISEPAM", "Isepamycin", - "ISONIA", "Isoniazid", - "ISOPEN", "Isoxazolyl Penicillin subclass", - "ITRACO", "Itraconazole", - "JOSAMY", "Josamycin", - "KANAMY", "Kanamycin", - "KETOCO", "Ketoconazole", - "KETOLI", "Ketolide class", - "LEVOFL", "Levofloxacin", - "LINCOM", "Lincomycin", - "LINCOS", "Lincosamide class", - "LINEZO", "Linezolid", - "LINFLO", "Linopristin-Flopristin", - "LINNEO", "Lincomycin/ Neomycin 2:1 ratio", - "LINSPE", "Lincomycin/ Spectinomycin", - "LIPGLY", "Lipoglycopeptide subclass", - "LIPOPE", "Lipopeptide class", - "LOMEFL", "Lomefloxacin", - "LORACA", "Loracarbef", - "LYSINE", "Lysine", - "MACCON", "Growth on MacConkey", - "MACRO", "Macrolide class", - "MALONA", "Malonate", - "MALTOS", "Maltose", - "MANNIT", "Mannitol", - "MARBOF", "Marbofloxacin", - "MECILL", "Mecillinam", - "MEROPE", "Meropenem", - "METHCY", "Methacycline", - "METHIC", "Methicillin", - "METRON", "Metronidazole", - "MEZLO", "Mezlocillin", - "MEZSUL", "Mezlocillin/ Sulbactam", - "MICAFU", "Micafungin", - "MICRON", "Micronomycin", - "MIDEKA", "Midekamycin", - "MINOCY", "Minocycline", - "MONOBA", "Monobactam class", - "MOTILI", "Motility", - "MOXALA", "Moxalactam", - "MOXIFL", "Moxifloxacin", - "MUPIRO", "Mupirocin", - "NAFCIL", "Nafcillin", - "NALAC", "Nalidixic Acid", - "NARASI", "Narasin", - "NEGCTL", "Negative Growth Control", - "NEOMYC", "Neomycin", - "NETILM", "Netilmicin", - "NFURAN", "Nitrofuran class", - "NIMIDA", "Nitroimidazole class", - "NIT16", "Nitrofurantoin 16ul", - "NITFUR", "Nitrofurazone", - "NITRAT", "Nitrate", - "NITRO", "Nitrofurantoin", - "NITSUL", "Nitrofurantoin/ Sulphadrazine", - "NORFLO", "Norfloxacin", - "NOVOBI", "Novobiocin", - "NYSTAN", "Nystantin", - "OFLOXA", "Ofloxacin", - "OLAQUI", "Olaquindox", - "OLEAND", "Oleandomycin", - "OPTOCH", "Optochin Sensitivity", - "ORBIFL", "Orbifloxacin", - "ORITAV", "Oritavancin", - "ORMSUL", "Ormetoprim/ Sulphadimethoxine", - "ORNIST", "Ornithine Spot Test", - "ORNITH", "Ornithine", - "OXACIL", "Oxacillin + 2% NaCl", - "OXAZOL", "Oxazolidinone class", - "OXIDAS", "Oxidase", - "OXOACI", "Oxolinic Acid", - "OXTSCH", "Oxytet/Tylosin Tar/Sulphachlor", - "OXTSDI", "Oxytet/Tylosin Tar/Sulphadimet", - "OXYCEP", "Oxyimino Cephalosporin subclass", - "OXYSCH", "Oxytetracycline/ Sulphachloropy", - "OXYTET", "Oxytetracycline", - "PASRAA", "Para-aminosalicylic acid", - "PEN003", "Penicillin 0.03ug", - "PENCIL", "Penicillin class", - "PENIC8", "Penicillin 8 ug/ml", - "PENICA", "Penicillin 1-2-8 ug/ml", - "PENICI", "Penicillin", - "PENMEN", "Penicillin(meningitis)", - "PENNME", "Penicillin(nonmeningitis)", - "PENNOV", "Penicillin/ Novobiocin", - "PENORA", "Penicillin (Oral)", - "PENSCH", "Penicillin/ Sulphachloropyridaz", - "PENSTR", "Penicillin/ Streptomycin", - "PERFLO", "Perfloxacin", - "PHENIC", "Phenicol class", - "PHEPEN", "Phenoxymethylpenicillin", - "PIGMEN", "Pigment", - "PIPACI", "Pipemidic Acid", - "PIPERA", "Piperacillin", - "PIPTAZ", "Piperacillin/ Tazobactam", - "PIRLIM", "Pirlimycin", - "PIVMEC", "Pivmecillinam", - "PLUERO", "Plueromutilin class", - "POD4", "Cefpodoxime 4 ug/ml", - "PODCLA", "Cefpodoxime/ Clavulanic Acid", - "POLION", "Polyether Ionophore class", - "POLPEP", "Polypeptide class", - "POLYB", "Polymyxin B", - "POSACO", "Posaconazole", - "POSCTL", "Positive Growth Control", - "PREMAF", "Premafloxacin", - "PRISTI", "Pristinamycin", - "PSPEN", "Penicillinase-stable Penicillin class", - "PYRUVA", "Pyruvate", - "QUIN", "Quinolone class", - "QUINOL", "Quinolones", - "QUINS1", "Quinolones subclass 1", - "QUINUP", "Quinupristin", - "R28965", "RU 28965", - "RAFFIN", "Raffinose", - "RAMOPL", "Ramoplanin", - "RAVUCO", "Ravuconazole", - "RAZUPE", "Razupenem", - "RHAMNO", "Rhamnose", - "RIFAMP", "Rifampin", - "RIFMYC", "Rifamycin class", - "ROKITA", "Rokitamycin", - "ROXITH", "Roxithromycin", - "S21420", "Schering 21420", - "S21561", "Schering 21561", - "S21562", "Schering 21562", - "S22591", "Schering 22591", - "S29482", "Schering 29482", - "S29486", "Schering 29486", - "S34343", "Schering 34343", - "S38609", "Schering 38609", - "SALCTL", "Positive Control +2% NaCl", - "SALINO", "Salinomycin", - "SANFET", "Sanfetrinem", - "SARAFL", "Sarafloxacin", - "SB2LB2", "SB265805/ LB20304", - "SBQLO", "SB265805", - "SDIMET", "Sulphadimethoxine", - "SIPRAM", "Sipramycin", - "SISOMY", "Sisomycin", - "SITAFL", "Sitafloxacin", - "SORBIT", "Sorbitol", - "SPARFL", "Sparfloxacin", - "SPECT", "Spectinomycin", - "SPIRAM", "Spiramycin", - "ST1000", "Streptomycin 1000 ug/ml", - "ST2000", "Streptomycin 2000 ug/ml", - "STREPT", "Streptomycin", - "STRGRA", "Streptogramin class", - "SUCROS", "Sucrose", - "SULAMI", "Sulfonamide subclass", - "SULBAC", "Sulbactam", - "SULBEN", "Sulbenicillin", - "SULCHL", "Sulphachloropyridazine", - "SULDIA", "Sulphadiazine", - "SULDIM", "Sulphadimidine", - "SULFAM", "Sulphamethoxazole", - "SULFIZ", "Sulfisoxazole", - "SULMET", "Sulphamethazine", - "SULOPE", "Sulopenem", - "SULTHI", "Sulphathiazole", - "SULTOS", "Sultamicillin Tosilate", - "SYNERC", "Quinupristin/dalfopristin", - "TANNAL", "Tannalbit", - "TAXCLA", "Cefotaxime/clavulanic acid", - "TAXMEN", "Cefotaxime (meningitis)", - "TAXNME", "Cefotaxime (nonmeningitis)", - "TAZCLA", "Ceftazidime/clavulanic acid", - "TAZOBA", "Tazobactam", - "TDA", "TDA", - "TEICOP", "Teicoplanin", - "TELAVA", "Telavancin", - "TELITH", "Telithromycin", - "TEMAFL", "Temafloxacin", - "TEMOCI", "Temocillin", - "TETCYC", "Tetracycline class", - "TETRA", "Tetracycline", - "THIAPH", "Thiaphenicol", - "TIAMUL", "Tiamulin", - "TICARC", "Ticarcillin", - "TICCLA", "Ticarcillin/ Clavulanic Acid", - "TIGECY", "Tigecycline", - "TILMIC", "Tilmicosin", - "TOBRAM", "Tobramycin", - "TOSUFL", "Tosufloxacin", - "TREHAL", "Trehalose", - "TRIBR", "Trimethoprim/ Sulphadiazine", - "TRICLA", "Ceftriaxone/clavulanic acid", - "TRIM", "Trimethoprim", - "TRIMEN", "Ceftriaxone (meningitis)", - "TRINME", "Ceftriaxone (nonmeningitis)", - "TRISUL", "Trimethoprim/ Sulphamethoxazole", - "TROSPE", "Trospectinomycin", - "TROVAF", "Trovafloxacin", - "TULATH", "Tulathromycin", - "TYLO", "Tylosin (Tartrate/ Base)", - "UNDECA", "Undecanoic Acid", - "UREA", "Urea", - "UREPEN", "Ureidopenicillin subclass", - "UVAURS", "Uva Ursa", - "VANCOM", "Vancomycin", - "VIRGIN", "Virginiamycin", - "VORICO", "Voriconazole", - "W49373", "Win 49373-3", - "W49548", "Win 49548-2A", - "W51692", "Win 51692", - "XYLOSE", "Xylose", - "YELPIG", "Yellow Pigment" + "ABT773", "Abbott 773", + "AESCUL", "Aesculin", + "AGMATI", "Agmatine", + "AMDPEN", "Amidinopenicillin subclass", + "AMICYC", "Aminocyclitol class", + "AMIFLO", "Amifloxacin", + "AMIGLY", "Aminoglycoside class", + "AMIKAC", "Amikacin", + "AMIPEN", "Aminopenicillin subclass", + "AMOCL2", "Amoxicillin/ Clav.Acid */2", + "AMOCL4", "Amoxicillin/ Clav. Acid 4:1", + "AMOCLA", "Amoxicillin/ Clavulanic Acid", + "AMOXIC", "Amoxicillin", + "AMP100", "Ampicillin 100 ug/ml", + "AMP200", "Ampicillin 200 ug/ml", + "AMPHOT", "Amphotericin B", + "AMPICI", "Ampicillin", + "AMPSUL", "Ampicillin/ Sulbactam", + "ANIDUL", "Anidulafungin", + "ANSAMY", "Rifabutin", + "ANSMYC", "Ansamycin class", + "APALCI", "Apalcillin", + "APOXIC", "Apoxicillin", + "APRAMY", "Apramycin", + "ARABIN", "Arabinose", + "ARABIT", "Arabitol", + "ARBEKA", "Arbekacin", + "ARGINI", "Arginine", + "ASPOXI", "Aspoxicillin", + "ASTROM", "Astromycin", + "AVILAM", "Avilamycin", + "AZD256", "AZD2563", + "AZITHR", "Azithromycin", + "AZLOCI", "Azlocillin", + "AZT1", "Aztreonam 1 ug/ml", + "AZTREO", "Aztreonam", + "BACAMP", "Bacampicillin", + "BACITR", "Bacitracin", + "BAMMYC", "Bambermycin class", + "BAY12", "BAY12-8039", + "BERBER", "Berberine", + "BESIFL", "Besifloxacin", + "BETA", "Beta-lactamase", + "B", "HAEM Beta-haemolysis", + "BIAPEN", "Biapenem (L-627)", + "BLACT", "Beta-lactam class", + "BLINHB", "Beta-lactam Inhibitor class", + "B", "MGLU B-Methyl Glucoside", + "CAPREO", "Capreomycin", + "CAPRYL", "Caprylic Acid", + "CARBAD", "Carbadox", + "CARBAP", "Carbapenem class", + "CARBEN", "Carbenicillin", + "CARPEN", "Carboxypenicillin subclass", + "CASPOF", "Caspofungin", + "CATALA", "Catalase", + "CCARB", "Carbacephem subclass", + "CEFACL", "Cefaclor", + "CEFADR", "Cefadroxil", + "CEFAMA", "Cefamandole", + "CEFATR", "Cefatrizine", + "CEFAXE", "Cefuroxime (axetil)", + "CEFAZE", "Cefazedon", + "CEFAZO", "Cefazolin", + "CEFBUP", "Cefbuperazone", + "CEFCAP", "Cefcapene", + "CEFCLA", "Cefepime/ Clavulanic Acid", + "CEFCLI", "Cefclidin", + "CEFDIN", "Cefdinir", + "CEFDIT", "Cefditoren", + "CEFEP4", "Cefepime 4 ug/ml", + "CEFEPI", "Cefepime", + "CEFETA", "Cefetamet", + "CEFIXI", "Cefixime", + "CEFMEN", "Cefmenoxime", + "CEFMET", "Cefmetazole", + "CEFMIN", "Cefminox", + "CEFMTM", "Cefmetamet", + "CEFO32", "Cefotaxime 32 ug/ml", + "CEFONI", "Cefonicid", + "CEFOPE", "Cefoperazone", + "CEFORA", "Ceforanide", + "CEFOSE", "Cefoselis", + "CEFOTA", "Cefotaxime", + "CEFOTE", "Cefotetan", + "CEFOTI", "Cefotiam", + "CEFOVE", "Cefovecin", + "CEFOXI", "Cefoxitin", + "CEFOZO", "Cefozopran", + "CEFPAM", "Cefpiramide", + "CEFPIM", "Cefpimizole", + "CEFPOD", "Cefpodoxime", + "CEFPOM", "Cefpirome", + "CEFPRO", "Cefprozil", + "CEFQUI", "Cefquinome", + "CEFROX", "Cefroxidime", + "CEFSUL", "Cefsulodin", + "CEFTAR", "Ceftaroline", + "CEFTAZ", "Ceftazidime", + "CEFTER", "Cefteram", + "CEFTEZ", "Ceftezole", + "CEFTIB", "Ceftibuten", + "CEFTIF", "Ceftiofur", + "CEFTIX", "Ceftioxadine", + "CEFTIZ", "Ceftizoxime", + "CEFTOB", "Ceftobiprole", + "CEFTRI", "Ceftriaxone", + "CEFURO", "Cefuroxime (sodium)", + "CEFUZO", "Cefuzonam", + "CELLOB", "Cellobiose", + "CEPALE", "Cefalexin", + "CEPHAC", "Cephacetril", + "CEPHAL", "Cephalothin", + "CEPHAP", "Cephapirin", + "CEPHEM", "Cephem class", + "CEPHOR", "Cephem (oral) class", + "CEPHPA", "Cephem (parenteral) class", + "CEPHRA", "Cephradine", + "CEPLOR", "Cephaloridine", + "CHLORA", "Chloramphenicol", + "CHLTET", "Chlortetracycline", + "CI983", "CI-983", + "CINOXA", "Cinoxacin", + "CIPROF", "Ciprofloxacin", + "CIPROP", "CIPROP", + "CITRAT", "Citrate", + "CLARYT", "Clarithromycin", + "CLIN32", "Clindamycin 32 ug/ml", + "CLINAF", "Clinafloxacin", + "CLINDA", "Clindamycin", + "CLISPE", "Clindamycin/ Spectinomycin", + "CLOFAM", "Clofazimine", + "CLOXAC", "Cloxacillin", + "CMYC", "Cephamycin subclass", + "COAGUL", "Coagulase", + "COLFAZ", "Colfazamine", + "COLIST", "Colistin", + "COLMET", "Colistimethate", + "COMBO", "Combination class", + "CORAL", "Cephem (oral) class", + "COUMER", "Coumermycin", + "COXA", "Oxacephem subclass", + "CPAREN", "Cephem (parenteral) class", + "CPCA", "Cond. Pyridone Carboxylic Acid class", + "CSPOR", "Cephalosporin class", + "CSPOR1", "Cephalosporin I-Generation subclass", + "CSPOR2", "Cephalosporin II-Generation subclass", + "CSPOR3", "Cephalosporin III-Generation subclass", + "CSPOR4", "Cephalosporin IV-Generation subclass", + "CSPOR5", "Cephalosporin V-Generation subclass", + "CYCLAC", "Cyclacillin", + "CYCLOS", "Cycloserine", + "DALBAV", "Dalbavancin", + "DALFOP", "Dalfopristin", + "DANOFL", "Danofloxacin", + "DAPT25", "Daptomycin 25mg/L Ca", + "DAPT50", "Daptomycin 50mg/L Ca", + "DAPTOM", "Daptomycin", + "DEMECY", "Demeclocycline", + "DIBEKA", "Dibekacin", + "DICLOX", "Dicloxacillin", + "DIFLOX", "Difloxacin", + "DIRITH", "Dirithromycin", + "DORIPE", "Doripenem", + "DOXYCY", "Doxycycline", + "DTEST1", "DTest1", + "DTEST2", "DTest2", + "ENOXA", "Enoxacin", + "ENROFL", "Enrofloxacin", + "ERTAPE", "Ertapenem", + "ERY32", "Erythromycin 32 ug/ml", + "ERYSCH", "Erythromycin/ Sulphachloropyrid", + "ERYSDI", "Erythromycin/ Sulphadimethoxine", + "ERYSPE", "Erythromycin/ Spectinomycin", + "ERYSUL", "Erythromycin/ Sulfizoxazole", + "ERYTH", "Erythromycin", + "ESBL", "Extended spectrum beta-lactamase", + "ETHAMB", "Ethambutol", + "ETHION", "Ethionamide", + "FAROPE", "Faropenem", + "FLAVOM", "Flavomycin", + "FLEROX", "Fleroxacin", + "FLOMOX", "Flomoxef", + "FLORFE", "Florfenicol", + "FLQUIN", "Fluoroquinolone class", + "FLUCLO", "Flucloxacillin", + "FLUCON", "Fluconazole", + "FLUCYT", "5-Flucytosine", + "FLUMEQ", "Flumequine", + "FOPSUL", "Cefoperazone/ Sulbactam", + "FOSFOM", "Fosfomycin", + "FOSG6P", "Fosfomycin + Glucose6Phosphate", + "FOSMYC", "Fosfomycin class", + "FOSTRO", "Fosfomycin-trometamol", + "FOT1", "Cefotaxime 1 ug/ml", + "FOXSCR", "Cefoxitin Screen Test", + "FPINHB", "Folate Pathway Inhibitor class", + "FR1", "FR1", + "FR10", "FR10", + "FR12", "FR12", + "FR13", "FR13", + "FR14", "FR14", + "FR15", "FR15", + "FR16", "FR16", + "FR17", "FR17", + "FR18", "FR18", + "FR19", "FR19", + "FR20", "FR20", + "FR21", "FR21", + "FR22", "FR22", + "FR23", "FR23", + "FR24", "FR24", + "FR25", "FR25", + "FR26", "FR26", + "FR27", "FR27", + "FR28", "FR28", + "FR29", "FR29", + "FR3", "FR3", + "FR30", "FR30", + "FR31", "FR31", + "FR32", "FR32", + "FR5", "FR5", + "FR6", "FR6", + "FR7", "FR7", + "FR8", "FR8", + "FR9", "FR9", + "FRAMYC", "Framycetin", + "FRUCTO", "Fructose", + "FURALT", "Furaltadone", + "FURAZO", "Furazolidone", + "FUSACI", "Fusidic Acid", + "FUSIDA", "Fusidate", + "GARENO", "Garenoxacin", + "GARLIC", "Garlic", + "GATIFL", "Gatifloxacin", + "GE1000", "Gentamicin 1000 ug/ml", + "GE2000", "Gentamicin 2000 ug/ml", + "GEMIFL", "Gemifloxacin", + "GEN128", "Gentamicin 128 ug/ml", + "GEN500", "Gentamicin 500 ug/ml", + "GENTA1", "Gentamicin 1024 ug/ml", + "GENTAM", "Gentamicin", + "GLUCOS", "Glucose", + "GLYCER", "Glycerol", + "GLYCO", "Glycopeptide class", + "GREPAF", "Grepafloxacin", + "HETACI", "Hetacillin", + "HIPPUR", "Hippurate hydrolysis", + "HODGE", "Hodge Test", + "IB367", "IB-367", + "IBAFLO", "Ibafloxacin", + "ICLAPR", "Iclaprim", + "IMIDAZ", "Imidazole class", + "IMIP32", "Imipenem 32 ug/ml", + "IMIPEN", "Imipenem", + "INDOLE", "Indole", + "INOSIT", "Inositol", + "ISEPAM", "Isepamycin", + "ISONIA", "Isoniazid", + "ISOPEN", "Isoxazolyl Penicillin subclass", + "ITRACO", "Itraconazole", + "JOSAMY", "Josamycin", + "KANAMY", "Kanamycin", + "KETOCO", "Ketoconazole", + "KETOLI", "Ketolide class", + "LEVOFL", "Levofloxacin", + "LINCOM", "Lincomycin", + "LINCOS", "Lincosamide class", + "LINEZO", "Linezolid", + "LINFLO", "Linopristin-Flopristin", + "LINNEO", "Lincomycin/ Neomycin 2:1 ratio", + "LINSPE", "Lincomycin/ Spectinomycin", + "LIPGLY", "Lipoglycopeptide subclass", + "LIPOPE", "Lipopeptide class", + "LOMEFL", "Lomefloxacin", + "LORACA", "Loracarbef", + "LYSINE", "Lysine", + "MACCON", "Growth on MacConkey", + "MACRO", "Macrolide class", + "MALONA", "Malonate", + "MALTOS", "Maltose", + "MANNIT", "Mannitol", + "MARBOF", "Marbofloxacin", + "MECILL", "Mecillinam", + "MEROPE", "Meropenem", + "METHCY", "Methacycline", + "METHIC", "Methicillin", + "METRON", "Metronidazole", + "MEZLO", "Mezlocillin", + "MEZSUL", "Mezlocillin/ Sulbactam", + "MICAFU", "Micafungin", + "MICRON", "Micronomycin", + "MIDEKA", "Midekamycin", + "MINOCY", "Minocycline", + "MONOBA", "Monobactam class", + "MOTILI", "Motility", + "MOXALA", "Moxalactam", + "MOXIFL", "Moxifloxacin", + "MUPIRO", "Mupirocin", + "NAFCIL", "Nafcillin", + "NALAC", "Nalidixic Acid", + "NARASI", "Narasin", + "NEGCTL", "Negative Growth Control", + "NEOMYC", "Neomycin", + "NETILM", "Netilmicin", + "NFURAN", "Nitrofuran class", + "NIMIDA", "Nitroimidazole class", + "NIT16", "Nitrofurantoin 16ul", + "NITFUR", "Nitrofurazone", + "NITRAT", "Nitrate", + "NITRO", "Nitrofurantoin", + "NITSUL", "Nitrofurantoin/ Sulphadrazine", + "NORFLO", "Norfloxacin", + "NOVOBI", "Novobiocin", + "NYSTAN", "Nystantin", + "OFLOXA", "Ofloxacin", + "OLAQUI", "Olaquindox", + "OLEAND", "Oleandomycin", + "OPTOCH", "Optochin Sensitivity", + "ORBIFL", "Orbifloxacin", + "ORITAV", "Oritavancin", + "ORMSUL", "Ormetoprim/ Sulphadimethoxine", + "ORNIST", "Ornithine Spot Test", + "ORNITH", "Ornithine", + "OXACIL", "Oxacillin + 2% NaCl", + "OXAZOL", "Oxazolidinone class", + "OXIDAS", "Oxidase", + "OXOACI", "Oxolinic Acid", + "OXTSCH", "Oxytet/Tylosin Tar/Sulphachlor", + "OXTSDI", "Oxytet/Tylosin Tar/Sulphadimet", + "OXYCEP", "Oxyimino Cephalosporin subclass", + "OXYSCH", "Oxytetracycline/ Sulphachloropy", + "OXYTET", "Oxytetracycline", + "PASRAA", "Para-aminosalicylic acid", + "PEN003", "Penicillin 0.03ug", + "PENCIL", "Penicillin class", + "PENIC8", "Penicillin 8 ug/ml", + "PENICA", "Penicillin 1-2-8 ug/ml", + "PENICI", "Penicillin", + "PENMEN", "Penicillin(meningitis)", + "PENNME", "Penicillin(nonmeningitis)", + "PENNOV", "Penicillin/ Novobiocin", + "PENORA", "Penicillin (Oral)", + "PENSCH", "Penicillin/ Sulphachloropyridaz", + "PENSTR", "Penicillin/ Streptomycin", + "PERFLO", "Perfloxacin", + "PHENIC", "Phenicol class", + "PHEPEN", "Phenoxymethylpenicillin", + "PIGMEN", "Pigment", + "PIPACI", "Pipemidic Acid", + "PIPERA", "Piperacillin", + "PIPTAZ", "Piperacillin/ Tazobactam", + "PIRLIM", "Pirlimycin", + "PIVMEC", "Pivmecillinam", + "PLUERO", "Plueromutilin class", + "POD4", "Cefpodoxime 4 ug/ml", + "PODCLA", "Cefpodoxime/ Clavulanic Acid", + "POLION", "Polyether Ionophore class", + "POLPEP", "Polypeptide class", + "POLYB", "Polymyxin B", + "POSACO", "Posaconazole", + "POSCTL", "Positive Growth Control", + "PREMAF", "Premafloxacin", + "PRISTI", "Pristinamycin", + "PSPEN", "Penicillinase-stable Penicillin class", + "PYRUVA", "Pyruvate", + "QUIN", "Quinolone class", + "QUINOL", "Quinolones", + "QUINS1", "Quinolones subclass 1", + "QUINUP", "Quinupristin", + "R28965", "RU 28965", + "RAFFIN", "Raffinose", + "RAMOPL", "Ramoplanin", + "RAVUCO", "Ravuconazole", + "RAZUPE", "Razupenem", + "RHAMNO", "Rhamnose", + "RIFAMP", "Rifampin", + "RIFMYC", "Rifamycin class", + "ROKITA", "Rokitamycin", + "ROXITH", "Roxithromycin", + "S21420", "Schering 21420", + "S21561", "Schering 21561", + "S21562", "Schering 21562", + "S22591", "Schering 22591", + "S29482", "Schering 29482", + "S29486", "Schering 29486", + "S34343", "Schering 34343", + "S38609", "Schering 38609", + "SALCTL", "Positive Control +2% NaCl", + "SALINO", "Salinomycin", + "SANFET", "Sanfetrinem", + "SARAFL", "Sarafloxacin", + "SB2LB2", "SB265805/ LB20304", + "SBQLO", "SB265805", + "SDIMET", "Sulphadimethoxine", + "SIPRAM", "Sipramycin", + "SISOMY", "Sisomycin", + "SITAFL", "Sitafloxacin", + "SORBIT", "Sorbitol", + "SPARFL", "Sparfloxacin", + "SPECT", "Spectinomycin", + "SPIRAM", "Spiramycin", + "ST1000", "Streptomycin 1000 ug/ml", + "ST2000", "Streptomycin 2000 ug/ml", + "STREPT", "Streptomycin", + "STRGRA", "Streptogramin class", + "SUCROS", "Sucrose", + "SULAMI", "Sulfonamide subclass", + "SULBAC", "Sulbactam", + "SULBEN", "Sulbenicillin", + "SULCHL", "Sulphachloropyridazine", + "SULDIA", "Sulphadiazine", + "SULDIM", "Sulphadimidine", + "SULFAM", "Sulphamethoxazole", + "SULFIZ", "Sulfisoxazole", + "SULMET", "Sulphamethazine", + "SULOPE", "Sulopenem", + "SULTHI", "Sulphathiazole", + "SULTOS", "Sultamicillin Tosilate", + "SYNERC", "Quinupristin/dalfopristin", + "TANNAL", "Tannalbit", + "TAXCLA", "Cefotaxime/clavulanic acid", + "TAXMEN", "Cefotaxime (meningitis)", + "TAXNME", "Cefotaxime (nonmeningitis)", + "TAZCLA", "Ceftazidime/clavulanic acid", + "TAZOBA", "Tazobactam", + "TDA", "TDA", + "TEICOP", "Teicoplanin", + "TELAVA", "Telavancin", + "TELITH", "Telithromycin", + "TEMAFL", "Temafloxacin", + "TEMOCI", "Temocillin", + "TETCYC", "Tetracycline class", + "TETRA", "Tetracycline", + "THIAPH", "Thiaphenicol", + "TIAMUL", "Tiamulin", + "TICARC", "Ticarcillin", + "TICCLA", "Ticarcillin/ Clavulanic Acid", + "TIGECY", "Tigecycline", + "TILMIC", "Tilmicosin", + "TOBRAM", "Tobramycin", + "TOSUFL", "Tosufloxacin", + "TREHAL", "Trehalose", + "TRIBR", "Trimethoprim/ Sulphadiazine", + "TRICLA", "Ceftriaxone/clavulanic acid", + "TRIM", "Trimethoprim", + "TRIMEN", "Ceftriaxone (meningitis)", + "TRINME", "Ceftriaxone (nonmeningitis)", + "TRISUL", "Trimethoprim/ Sulphamethoxazole", + "TROSPE", "Trospectinomycin", + "TROVAF", "Trovafloxacin", + "TULATH", "Tulathromycin", + "TYLO", "Tylosin (Tartrate/ Base)", + "UNDECA", "Undecanoic Acid", + "UREA", "Urea", + "UREPEN", "Ureidopenicillin subclass", + "UVAURS", "Uva Ursa", + "VANCOM", "Vancomycin", + "VIRGIN", "Virginiamycin", + "VORICO", "Voriconazole", + "W49373", "Win 49373-3", + "W49548", "Win 49548-2A", + "W51692", "Win 51692", + "XYLOSE", "Xylose", + "YELPIG", "Yellow Pigment" ) codes$name <- gsub("Apoxi", "Aspoxi", codes$name) @@ -476,10 +476,10 @@ codes$ab_name <- ab_name(codes$name_gen) codes$lev <- unlist(Map(f = function(a, b) { as.double(utils::adist(a, b, - ignore.case = FALSE, - fixed = TRUE, - costs = c(insertions = 5, deletions = 1, substitutions = 10), - counts = FALSE + ignore.case = FALSE, + fixed = TRUE, + costs = c(insertions = 5, deletions = 1, substitutions = 10), + counts = FALSE )) }, codes$name_gen, generalise_antibiotic_name(codes$ab_name), USE.NAMES = FALSE)) @@ -487,7 +487,10 @@ codes$lev_pct <- codes$lev / nchar(codes$name) View(codes) -import <- codes |> filter(lev <= 10 | name_gen == "PENICILLIN") |> as_tibble() |> mutate(ab = as.ab(ab_name, fast_mode = TRUE)) +import <- codes |> + filter(lev <= 10 | name_gen == "PENICILLIN") |> + as_tibble() |> + mutate(ab = as.ab(ab_name, fast_mode = TRUE)) for (i in seq_len(NROW(import))) { # put them in the abbreviations diff --git a/data-raw/sensititre_mo.R b/data-raw/sensititre_mo.R index 5c154713c..b1a14c716 100644 --- a/data-raw/sensititre_mo.R +++ b/data-raw/sensititre_mo.R @@ -1056,8 +1056,10 @@ import$mo <- as.mo(import$mo_name) microorganisms.codes <- microorganisms.codes |> bind_rows( - tibble(code = toupper(import$code), - mo = import$mo) |> + tibble( + code = toupper(import$code), + mo = import$mo + ) |> distinct() ) |> arrange(code) @@ -1065,4 +1067,3 @@ class(microorganisms.codes$mo) <- c("mo", "character") usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2) rm(microorganisms.codes) devtools::load_all() - diff --git a/data-raw/wisca_reprex.R b/data-raw/wisca_reprex.R index bed01caca..bdad7a522 100644 --- a/data-raw/wisca_reprex.R +++ b/data-raw/wisca_reprex.R @@ -3,13 +3,15 @@ df <- example_isolates |> mutate(mo = ifelse(mo_genus(mo) == "Klebsiella", as.mo("Klebsiella"), mo)) |> top_n_microorganisms(10) -out_new <- df |> antibiogram(c("TZP","TZP+GEN","TZP+TOB"), wisca = TRUE, syndromic_group = "ward") -out_nonwisca <- df |> antibiogram(c("TZP","TZP+GEN","TZP+TOB"), - syndromic_group = "ward", - mo_transform = function(x) "", - digits = 1, - minimum = 10, - formatting_type = 14) |> +out_new <- df |> antibiogram(c("TZP", "TZP+GEN", "TZP+TOB"), wisca = TRUE, syndromic_group = "ward") +out_nonwisca <- df |> + antibiogram(c("TZP", "TZP+GEN", "TZP+TOB"), + syndromic_group = "ward", + mo_transform = function(x) "", + digits = 1, + minimum = 10, + formatting_type = 14 + ) |> as_tibble() |> select(-Pathogen) @@ -17,7 +19,3 @@ out_nonwisca <- df |> antibiogram(c("TZP","TZP+GEN","TZP+TOB"), # parameters_amr.R: number of first isolates are determined on the whole data set, while Klebsiella is aggregated afterwards (=duplicates on genus level) source("~/Downloads/estimate_definition_amr.R") - - - - diff --git a/man/AMR-options.Rd b/man/AMR-options.Rd index 2b7abae75..e1f77b379 100644 --- a/man/AMR-options.Rd +++ b/man/AMR-options.Rd @@ -12,7 +12,7 @@ This is an overview of all the package-specific options you can set in the \code \itemize{ \item \code{AMR_antibiogram_formatting_type} \cr A \link{numeric} (1-22) to use in \code{\link[=antibiogram]{antibiogram()}}, to indicate which formatting type to use. -\item \code{AMR_breakpoint_type} \cr A \link{character} to use in \code{\link[=as.sir]{as.sir()}}, to indicate which breakpoint type to use. This must be either {.val ECOFF}, {.val animal}, or {.val human}. +\item \code{AMR_breakpoint_type} \cr A \link{character} to use in \code{\link[=as.sir]{as.sir()}}, to indicate which breakpoint type to use. This must be either \code{"ECOFF"}, \code{"animal"}, or \code{"human"}. \item \code{AMR_capped_mic_handling} \cr A \link{character} to use in \code{\link[=as.sir]{as.sir()}}, to indicate how capped MIC values (\code{<}, \code{<=}, \code{>}, \code{>=}) should be interpreted. Must be one of \code{"none"}, \code{"conservative"}, \code{"standard"}, or \code{"lenient"} - the default is \code{"conservative"}. \item \code{AMR_cleaning_regex} \cr A \link[base:regex]{regular expression} (case-insensitive) to use in \code{\link[=as.mo]{as.mo()}} and all \code{\link[=mo_property]{mo_*}} functions, to clean the user input. The default is the outcome of \code{\link[=mo_cleaning_regex]{mo_cleaning_regex()}}, which removes texts between brackets and texts such as "species" and "serovar". \item \code{AMR_custom_ab} \cr A file location to an RDS file, to use custom antimicrobial drugs with this package. This is explained in \code{\link[=add_custom_antimicrobials]{add_custom_antimicrobials()}}. diff --git a/man/ab_property.Rd b/man/ab_property.Rd index 002d0733e..44a6857c5 100644 --- a/man/ab_property.Rd +++ b/man/ab_property.Rd @@ -67,7 +67,7 @@ set_ab_names(data, ..., property = "name", language = get_AMR_locale(), \item{open}{Browse the URL using \code{\link[utils:browseURL]{utils::browseURL()}}.} -\item{property}{One of the column names of one of the \link{antimicrobials} data set: \code{vector_or(colnames(antimicrobials), sort = FALSE)}.} +\item{property}{One of the column names of one of the \link{antimicrobials} data set: \code{"ab"}, \code{"cid"}, \code{"name"}, \code{"group"}, \code{"atc"}, \code{"atc_group1"}, \code{"atc_group2"}, \code{"abbreviations"}, \code{"synonyms"}, \code{"oral_ddd"}, \code{"oral_units"}, \code{"iv_ddd"}, \code{"iv_units"}, or \code{"loinc"}.} \item{data}{A \link{data.frame} of which the columns need to be renamed, or a \link{character} vector of column names.} diff --git a/man/antibiogram.Rd b/man/antibiogram.Rd index 83d1fe651..940a2fd51 100644 --- a/man/antibiogram.Rd +++ b/man/antibiogram.Rd @@ -68,9 +68,9 @@ retrieve_wisca_parameters(wisca_model, ...) } }} -\item{mo_transform}{A character to transform microorganism input - must be \code{"name"}, \code{"shortname"} (default), \code{"gramstain"}, or one of the column names of the \link{microorganisms} data set: {.val mo}, {.val fullname}, {.val status}, {.val kingdom}, {.val phylum}, {.val class}, {.val order}, {.val family}, {.val genus}, {.val species}, {.val subspecies}, {.val rank}, {.val ref}, {.val oxygen_tolerance}, {.val source}, {.val lpsn}, {.val lpsn_parent}, {.val lpsn_renamed_to}, {.val mycobank}, {.val mycobank_parent}, {.val mycobank_renamed_to}, {.val gbif}, {.val gbif_parent}, {.val gbif_renamed_to}, {.val prevalence}, or {.val snomed}. Can also be \code{NULL} to not transform the input or \code{NA} to consider all microorganisms 'unknown'.} +\item{mo_transform}{A character to transform microorganism input - must be \code{"name"}, \code{"shortname"} (default), \code{"gramstain"}, or one of the column names of the \link{microorganisms} data set: \code{"mo"}, \code{"fullname"}, \code{"status"}, \code{"kingdom"}, \code{"phylum"}, \code{"class"}, \code{"order"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"subspecies"}, \code{"rank"}, \code{"ref"}, \code{"oxygen_tolerance"}, \code{"source"}, \code{"lpsn"}, \code{"lpsn_parent"}, \code{"lpsn_renamed_to"}, \code{"mycobank"}, \code{"mycobank_parent"}, \code{"mycobank_renamed_to"}, \code{"gbif"}, \code{"gbif_parent"}, \code{"gbif_renamed_to"}, \code{"prevalence"}, or \code{"snomed"}. Can also be \code{NULL} to not transform the input or \code{NA} to consider all microorganisms 'unknown'.} -\item{ab_transform}{A character to transform antimicrobial input - must be one of the column names of the \link{antimicrobials} data set (defaults to \code{"name"}): {.val ab}, {.val cid}, {.val name}, {.val group}, {.val atc}, {.val atc_group1}, {.val atc_group2}, {.val abbreviations}, {.val synonyms}, {.val oral_ddd}, {.val oral_units}, {.val iv_ddd}, {.val iv_units}, or {.val loinc}. Can also be \code{NULL} to not transform the input.} +\item{ab_transform}{A character to transform antimicrobial input - must be one of the column names of the \link{antimicrobials} data set (defaults to \code{"name"}): \code{"ab"}, \code{"cid"}, \code{"name"}, \code{"group"}, \code{"atc"}, \code{"atc_group1"}, \code{"atc_group2"}, \code{"abbreviations"}, \code{"synonyms"}, \code{"oral_ddd"}, \code{"oral_units"}, \code{"iv_ddd"}, \code{"iv_units"}, or \code{"loinc"}. Can also be \code{NULL} to not transform the input.} \item{syndromic_group}{A column name of \code{x}, or values calculated to split rows of \code{x}, e.g. by using \code{\link[=ifelse]{ifelse()}} or \code{\link[dplyr:case-and-replace-when]{case_when()}}. See \emph{Examples}.} diff --git a/man/antimicrobial_selectors.Rd b/man/antimicrobial_selectors.Rd index ea5de7a5f..eb5d28095 100644 --- a/man/antimicrobial_selectors.Rd +++ b/man/antimicrobial_selectors.Rd @@ -157,7 +157,7 @@ not_intrinsic_resistant(only_sir_columns = FALSE, col_mo = NULL, \item{col_mo}{Column name of the names or codes of the microorganisms (see \code{\link[=as.mo]{as.mo()}}) - the default is the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.} -\item{version_expected_phenotypes}{The version number to use for the EUCAST Expected Phenotypes. Can be {.val 1.2}.} +\item{version_expected_phenotypes}{The version number to use for the EUCAST Expected Phenotypes. Can be \code{"1.2"}.} } \value{ When used inside selecting or filtering, this returns a \link{character} vector of column names, with additional class \code{"amr_selector"}. When used individually, this returns an \link[=as.ab]{'ab' vector} with all possible antimicrobials that the function would be able to select or filter. diff --git a/man/as.sir.Rd b/man/as.sir.Rd index 7439fe84d..88e5bfa1e 100644 --- a/man/as.sir.Rd +++ b/man/as.sir.Rd @@ -138,7 +138,7 @@ The default \code{"conservative"} setting ensures cautious handling of uncertain \item{include_PKPD}{A \link{logical} to indicate that PK/PD clinical breakpoints must be applied as a last resort - the default is \code{TRUE}. Can also be set with the package option \code{\link[=AMR-options]{AMR_include_PKPD}}.} -\item{breakpoint_type}{The type of breakpoints to use, either {.val ECOFF}, {.val animal}, or {.val human}. ECOFF stands for Epidemiological Cut-Off values. The default is \code{"human"}, which can also be set with the package option \code{\link[=AMR-options]{AMR_breakpoint_type}}. If \code{host} is set to values of veterinary species, this will automatically be set to \code{"animal"}.} +\item{breakpoint_type}{The type of breakpoints to use, either \code{"ECOFF"}, \code{"animal"}, or \code{"human"}. ECOFF stands for Epidemiological Cut-Off values. The default is \code{"human"}, which can also be set with the package option \code{\link[=AMR-options]{AMR_breakpoint_type}}. If \code{host} is set to values of veterinary species, this will automatically be set to \code{"animal"}.} \item{host}{A vector (or column name) with \link{character}s to indicate the host. Only useful for veterinary breakpoints, as it requires \code{breakpoint_type = "animal"}. The values can be any text resembling the animal species, even in any of the 28 supported languages of this package. For foreign languages, be sure to set the language with \code{\link[=set_AMR_locale]{set_AMR_locale()}} (though it will be automatically guessed based on the system language).} diff --git a/man/av_property.Rd b/man/av_property.Rd index a312061c5..35ce82403 100644 --- a/man/av_property.Rd +++ b/man/av_property.Rd @@ -52,7 +52,7 @@ av_property(x, property = "name", language = get_AMR_locale(), ...) \item{open}{Browse the URL using \code{\link[utils:browseURL]{utils::browseURL()}}.} -\item{property}{One of the column names of one of the \link{antivirals} data set: \code{vector_or(colnames(antivirals), sort = FALSE)}.} +\item{property}{One of the column names of one of the \link{antivirals} data set: \code{"av"}, \code{"name"}, \code{"atc"}, \code{"cid"}, \code{"atc_group"}, \code{"synonyms"}, \code{"oral_ddd"}, \code{"oral_units"}, \code{"iv_ddd"}, \code{"iv_units"}, or \code{"loinc"}.} } \value{ \itemize{ diff --git a/man/clinical_breakpoints.Rd b/man/clinical_breakpoints.Rd index 7138abec3..4070bfeba 100644 --- a/man/clinical_breakpoints.Rd +++ b/man/clinical_breakpoints.Rd @@ -8,9 +8,9 @@ A \link[tibble:tibble]{tibble} with 45 730 observations and 14 variables: \itemize{ \item \code{guideline}\cr Name of the guideline -\item \code{type}\cr Breakpoint type, either {.val ECOFF}, {.val animal}, or {.val human} -\item \code{host}\cr Host of infectious agent. This is mostly useful for veterinary breakpoints and is either {.val ECOFF}, {.val aquatic}, {.val cats}, {.val cattle}, {.val dogs}, {.val horse}, {.val human}, {.val poultry}, or {.val swine} -\item \code{method}\cr Testing method, either {.val DISK} or {.val MIC} +\item \code{type}\cr Breakpoint type, either \code{"ECOFF"}, \code{"animal"}, or \code{"human"} +\item \code{host}\cr Host of infectious agent. This is mostly useful for veterinary breakpoints and is either \code{"ECOFF"}, \code{"aquatic"}, \code{"cats"}, \code{"cattle"}, \code{"dogs"}, \code{"horse"}, \code{"human"}, \code{"poultry"}, or \code{"swine"} +\item \code{method}\cr Testing method, either \code{"DISK"} or \code{"MIC"} \item \code{site}\cr Body site for which the breakpoint must be applied, e.g. "Oral" or "Respiratory" \item \code{mo}\cr Microbial ID, see \code{\link[=as.mo]{as.mo()}} \item \code{rank_index}\cr Taxonomic rank index of \code{mo} from 1 (subspecies/infraspecies) to 5 (unknown microorganism) diff --git a/man/custom_eucast_rules.Rd b/man/custom_eucast_rules.Rd index 1dcc03020..355578f88 100644 --- a/man/custom_eucast_rules.Rd +++ b/man/custom_eucast_rules.Rd @@ -61,7 +61,7 @@ eucast_rules(df, \subsection{Using taxonomic properties in rules}{ -There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", and "snomed". Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}: +There is one exception in columns used for the rules: all column names of the \link{microorganisms} data set can also be used, but do not have to exist in the data set. These column names are: \code{"mo"}, \code{"fullname"}, \code{"status"}, \code{"kingdom"}, \code{"phylum"}, \code{"class"}, \code{"order"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"subspecies"}, \code{"rank"}, \code{"ref"}, \code{"oxygen_tolerance"}, \code{"source"}, \code{"lpsn"}, \code{"lpsn_parent"}, \code{"lpsn_renamed_to"}, \code{"mycobank"}, \code{"mycobank_parent"}, \code{"mycobank_renamed_to"}, \code{"gbif"}, \code{"gbif_parent"}, \code{"gbif_renamed_to"}, \code{"prevalence"}, and \code{"snomed"}. Thus, this next example will work as well, despite the fact that the \code{df} data set does not contain a column \code{genus}: \if{html}{\out{
}}\preformatted{y <- custom_eucast_rules( TZP == "S" & genus == "Klebsiella" ~ aminopenicillins == "S", diff --git a/man/dosage.Rd b/man/dosage.Rd index 8e2ba35be..28ac43706 100644 --- a/man/dosage.Rd +++ b/man/dosage.Rd @@ -9,10 +9,10 @@ A \link[tibble:tibble]{tibble} with 759 observations and 9 variables: \itemize{ \item \code{ab}\cr Antimicrobial ID as used in this package (such as \code{AMC}), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available \item \code{name}\cr Official name of the antimicrobial drug as used by WHONET/EARS-Net or the WHO -\item \code{type}\cr Type of the dosage, either {.val high_dosage}, {.val standard_dosage}, or {.val uncomplicated_uti} +\item \code{type}\cr Type of the dosage, either \code{"high_dosage"}, \code{"standard_dosage"}, or \code{"uncomplicated_uti"} \item \code{dose}\cr Dose, such as "2 g" or "25 mg/kg" \item \code{dose_times}\cr Number of times a dose must be administered -\item \code{administration}\cr Route of administration, either {.val }, {.val im}, {.val iv}, {.val oral}, or NA +\item \code{administration}\cr Route of administration, either \code{""}, \code{"im"}, \code{"iv"}, \code{"oral"}, or NA \item \code{notes}\cr Additional dosage notes \item \code{original_txt}\cr Original text in the PDF file of EUCAST \item \code{eucast_version}\cr Version number of the EUCAST Clinical Breakpoints guideline to which these dosages apply, either 15, 14, 13.1, 12, or 11 diff --git a/man/example_isolates.Rd b/man/example_isolates.Rd index e52d4de22..eb79fd1e5 100644 --- a/man/example_isolates.Rd +++ b/man/example_isolates.Rd @@ -10,8 +10,8 @@ A \link[tibble:tibble]{tibble} with 2 000 observations and 46 variables: \item \code{date}\cr Date of receipt at the laboratory \item \code{patient}\cr ID of the patient \item \code{age}\cr Age of the patient -\item \code{gender}\cr Gender of the patient, either {.val F} or {.val M} -\item \code{ward}\cr Ward type where the patient was admitted, either {.val Clinical}, {.val ICU}, or {.val Outpatient} +\item \code{gender}\cr Gender of the patient, either \code{"F"} or \code{"M"} +\item \code{ward}\cr Ward type where the patient was admitted, either \code{"Clinical"}, \code{"ICU"}, or \code{"Outpatient"} \item \code{mo}\cr ID of microorganism created with \code{\link[=as.mo]{as.mo()}}, see also the \link{microorganisms} data set \item \code{PEN:RIF}\cr 40 different antimicrobials with class \code{\link{sir}} (see \code{\link[=as.sir]{as.sir()}}); these column names occur in the \link{antimicrobials} data set and can be translated with \code{\link[=set_ab_names]{set_ab_names()}} or \code{\link[=ab_name]{ab_name()}} } diff --git a/man/interpretive_rules.Rd b/man/interpretive_rules.Rd index 7ec09fc93..341196f17 100644 --- a/man/interpretive_rules.Rd +++ b/man/interpretive_rules.Rd @@ -27,7 +27,7 @@ interpretive_rules(x, col_mo = NULL, info = interactive(), verbose = FALSE, version_breakpoints = 16, version_expected_phenotypes = 1.2, version_expertrules = 3.3, ampc_cephalosporin_resistance = NA, only_sir_columns = any(is.sir(x)), - custom_rules = NULL, overwrite = FALSE, ...) + custom_rules = NULL, overwrite = FALSE, add_if_missing = TRUE, ...) eucast_rules(x, col_mo = NULL, info = interactive(), rules = getOption("AMR_interpretive_rules", default = c("breakpoints", @@ -52,11 +52,11 @@ eucast_dosage(ab, administration = "iv", version_breakpoints = 15) \item{verbose}{A \link{logical} to turn Verbose mode on and off (default is off). In Verbose mode, the function does not apply rules to the data, but instead returns a data set in logbook form with extensive info about which rows and columns would be effected and in which way. Using Verbose mode takes a lot more time.} -\item{version_breakpoints}{The version number to use for the EUCAST Clinical Breakpoints guideline. Can be {.val 16.0}, {.val 15.0}, {.val 14.0}, {.val 13.1}, {.val 12.0}, {.val 11.0}, or {.val 10.0}.} +\item{version_breakpoints}{The version number to use for the EUCAST Clinical Breakpoints guideline. Can be \code{"16.0"}, \code{"15.0"}, \code{"14.0"}, \code{"13.1"}, \code{"12.0"}, \code{"11.0"}, or \code{"10.0"}.} -\item{version_expected_phenotypes}{The version number to use for the EUCAST Expected Phenotypes. Can be {.val 1.2}.} +\item{version_expected_phenotypes}{The version number to use for the EUCAST Expected Phenotypes. Can be \code{"1.2"}.} -\item{version_expertrules}{The version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be {.val 3.3}, {.val 3.2}, or {.val 3.1}.} +\item{version_expertrules}{The version number to use for the EUCAST Expert Rules and Intrinsic Resistance guideline. Can be \code{"3.3"}, \code{"3.2"}, or \code{"3.1"}.} \item{ampc_cephalosporin_resistance}{(only applies when \code{rules} contains \code{"expert"} or \code{"all"}) a \link{character} value that should be applied to cefotaxime, ceftriaxone and ceftazidime for AmpC de-repressed cephalosporin-resistant mutants - the default is \code{NA}. Currently only works when \code{version_expertrules} is \code{3.2} and higher; these versions of '\emph{EUCAST Expert Rules on Enterobacterales}' state that results of cefotaxime, ceftriaxone and ceftazidime should be reported with a note, or results should be suppressed (emptied) for these three drugs. A value of \code{NA} (the default) for this argument will remove results for these three drugs, while e.g. a value of \code{"R"} will make the results for these drugs resistant. Use \code{NULL} or \code{FALSE} to not alter results for these three drugs of AmpC de-repressed cephalosporin-resistant mutants. Using \code{TRUE} is equal to using \code{"R"}. \cr For \emph{EUCAST Expert Rules} v3.2, this rule applies to: \emph{Citrobacter braakii}, \emph{Citrobacter freundii}, \emph{Citrobacter gillenii}, \emph{Citrobacter murliniae}, \emph{Citrobacter rodenticum}, \emph{Citrobacter sedlakii}, \emph{Citrobacter werkmanii}, \emph{Citrobacter youngae}, \emph{Enterobacter}, \emph{Hafnia alvei}, \emph{Klebsiella aerogenes}, \emph{Morganella morganii}, \emph{Providencia}, and \emph{Serratia}.} @@ -66,11 +66,13 @@ eucast_dosage(ab, administration = "iv", version_breakpoints = 15) \item{overwrite}{A \link{logical} indicating whether to overwrite existing SIR values (default: \code{FALSE}). When \code{FALSE}, only non-SIR values are modified (i.e., any value that is not already S, I or R). To ensure compliance with EUCAST guidelines, \strong{this should remain} \code{FALSE}, as EUCAST notes often state that an organism "should be tested for susceptibility to individual agents or be reported resistant".} +\item{add_if_missing}{A \link{logical} indicating whether rules should also be applied to missing (\code{NA}) values (default: \code{TRUE}). When \code{FALSE}, rules are only applied to cells that already contain an SIR value; cells with \code{NA} are left untouched. This is particularly useful when using \code{overwrite = TRUE} with custom rules and you want to update reported results without imputing values for untested drugs.} + \item{...}{Column names of antimicrobials. To automatically detect antimicrobial column names, do not provide any named arguments; \code{\link[=guess_ab_col]{guess_ab_col()}} will then be used for detection. To manually specify a column, provide its name (case-insensitive) as an argument, e.g. \code{AMX = "amoxicillin"}. To skip a specific antimicrobial, set it to \code{NULL}, e.g. \code{TIC = NULL} to exclude ticarcillin. If a manually defined column does not exist in the data, it will be skipped with a warning.} \item{ab}{Any (vector of) text that can be coerced to a valid antimicrobial drug code with \code{\link[=as.ab]{as.ab()}}.} -\item{administration}{Route of administration, either {.val }, {.val im}, {.val iv}, {.val oral}, or NA.} +\item{administration}{Route of administration, either \code{""}, \code{"im"}, \code{"iv"}, \code{"oral"}, or NA.} } \value{ The input of \code{x}, possibly with edited values of antimicrobials. Or, if \code{verbose = TRUE}, a \link{data.frame} with all original and new values of the affected bug-drug combinations. diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index d8d211178..177cb8648 100644 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -9,12 +9,12 @@ A \link[tibble:tibble]{tibble} with 78 679 observations and 26 variables: \itemize{ \item \code{mo}\cr ID of microorganism as used by this package. \emph{\strong{This is a unique identifier.}} \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon. \emph{\strong{This is a unique identifier.}} -\item \code{status} \cr Status of the taxon, either {.val accepted}, {.val not validly published}, {.val synonym}, or {.val unknown} +\item \code{status} \cr Status of the taxon, either \code{"accepted"}, \code{"not validly published"}, \code{"synonym"}, or \code{"unknown"} \item \code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{subspecies}\cr Taxonomic rank of the microorganism. Note that for fungi, \emph{phylum} is equal to their taxonomic \emph{division}. Also, for fungi, \emph{subkingdom} and \emph{subdivision} were left out since they do not occur in the bacterial taxonomy. \item \code{rank}\cr Text of the taxonomic rank of the microorganism, such as \code{"species"} or \code{"genus"} \item \code{ref}\cr Author(s) and year of related scientific publication. This contains only the \emph{first surname} and year of the \emph{latest} authors, e.g. "Wallis \emph{et al.} 2006 \emph{emend.} Smith and Jones 2018" becomes "Smith \emph{et al.}, 2018". This field is directly retrieved from the source specified in the column \code{source}. Moreover, accents were removed to comply with CRAN that only allows ASCII characters. -\item \code{oxygen_tolerance} \cr Oxygen tolerance, either {.val aerobe}, {.val anaerobe}, {.val anaerobe/microaerophile}, {.val facultative anaerobe}, {.val likely facultative anaerobe}, {.val microaerophile}, or NA. These data were retrieved from BacDive (see \emph{Source}). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently 68.3\% of all ~39 000 bacteria in the data set contain an oxygen tolerance. -\item \code{source}\cr Either {.val GBIF}, {.val LPSN}, {.val Manually added}, {.val MycoBank}, or {.val manually added} (see \emph{Source}) +\item \code{oxygen_tolerance} \cr Oxygen tolerance, either \code{"aerobe"}, \code{"anaerobe"}, \code{"anaerobe/microaerophile"}, \code{"facultative anaerobe"}, \code{"likely facultative anaerobe"}, \code{"microaerophile"}, or NA. These data were retrieved from BacDive (see \emph{Source}). Items that contain "likely" are missing from BacDive and were extrapolated from other species within the same genus to guess the oxygen tolerance. Currently 68.3\% of all ~39 000 bacteria in the data set contain an oxygen tolerance. +\item \code{source}\cr Either \code{"GBIF"}, \code{"LPSN"}, \code{"Manually added"}, \code{"MycoBank"}, or \code{"manually added"} (see \emph{Source}) \item \code{lpsn}\cr Identifier ('Record number') of List of Prokaryotic names with Standing in Nomenclature (LPSN). This will be the first/highest LPSN identifier to keep one identifier per row. For example, \emph{Acetobacter ascendens} has LPSN Record number 7864 and 11011. Only the first is available in the \code{microorganisms} data set. \emph{\strong{This is a unique identifier}}, though available for only ~33 000 records. \item \code{lpsn_parent}\cr LPSN identifier of the parent taxon \item \code{lpsn_renamed_to}\cr LPSN identifier of the currently valid taxon diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 1655c258a..ad14421f6 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -165,7 +165,7 @@ The default is \code{FALSE}, which will return a note if outdated taxonomic name \item{open}{Browse the URL using \code{\link[utils:browseURL]{browseURL()}}.} -\item{property}{One of the column names of the \link{microorganisms} data set: {.val mo}, {.val fullname}, {.val status}, {.val kingdom}, {.val phylum}, {.val class}, {.val order}, {.val family}, {.val genus}, {.val species}, {.val subspecies}, {.val rank}, {.val ref}, {.val oxygen_tolerance}, {.val source}, {.val lpsn}, {.val lpsn_parent}, {.val lpsn_renamed_to}, {.val mycobank}, {.val mycobank_parent}, {.val mycobank_renamed_to}, {.val gbif}, {.val gbif_parent}, {.val gbif_renamed_to}, {.val prevalence}, or {.val snomed}, or must be \code{"shortname"}.} +\item{property}{One of the column names of the \link{microorganisms} data set: \code{"mo"}, \code{"fullname"}, \code{"status"}, \code{"kingdom"}, \code{"phylum"}, \code{"class"}, \code{"order"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"subspecies"}, \code{"rank"}, \code{"ref"}, \code{"oxygen_tolerance"}, \code{"source"}, \code{"lpsn"}, \code{"lpsn_parent"}, \code{"lpsn_renamed_to"}, \code{"mycobank"}, \code{"mycobank_parent"}, \code{"mycobank_renamed_to"}, \code{"gbif"}, \code{"gbif_parent"}, \code{"gbif_renamed_to"}, \code{"prevalence"}, or \code{"snomed"}, or must be \code{"shortname"}.} } \value{ \itemize{ diff --git a/man/plot.Rd b/man/plot.Rd index 2b9813c18..ae6c61d0d 100644 --- a/man/plot.Rd +++ b/man/plot.Rd @@ -137,7 +137,7 @@ labels_sir_count(position = NULL, x = "antibiotic", \item{include_PKPD}{A \link{logical} to indicate that PK/PD clinical breakpoints must be applied as a last resort - the default is \code{TRUE}. Can also be set with the package option \code{\link[=AMR-options]{AMR_include_PKPD}}.} -\item{breakpoint_type}{The type of breakpoints to use, either {.val ECOFF}, {.val animal}, or {.val human}. ECOFF stands for Epidemiological Cut-Off values. The default is \code{"human"}, which can also be set with the package option \code{\link[=AMR-options]{AMR_breakpoint_type}}. If \code{host} is set to values of veterinary species, this will automatically be set to \code{"animal"}.} +\item{breakpoint_type}{The type of breakpoints to use, either \code{"ECOFF"}, \code{"animal"}, or \code{"human"}. ECOFF stands for Epidemiological Cut-Off values. The default is \code{"human"}, which can also be set with the package option \code{\link[=AMR-options]{AMR_breakpoint_type}}. If \code{host} is set to values of veterinary species, this will automatically be set to \code{"animal"}.} \item{facet}{Variable to split plots by, either \code{"interpretation"} (default) or \code{"antibiotic"} or a grouping variable.} diff --git a/man/top_n_microorganisms.Rd b/man/top_n_microorganisms.Rd index fe9adccac..de949606a 100644 --- a/man/top_n_microorganisms.Rd +++ b/man/top_n_microorganisms.Rd @@ -12,7 +12,7 @@ top_n_microorganisms(x, n, property = "species", n_for_each = NULL, \item{n}{An integer specifying the maximum number of unique values of the \code{property} to include in the output.} -\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: {.val mo}, {.val fullname}, {.val status}, {.val kingdom}, {.val phylum}, {.val class}, {.val order}, {.val family}, {.val genus}, {.val species}, {.val subspecies}, {.val rank}, {.val ref}, {.val oxygen_tolerance}, {.val source}, {.val lpsn}, {.val lpsn_parent}, {.val lpsn_renamed_to}, {.val mycobank}, {.val mycobank_parent}, {.val mycobank_renamed_to}, {.val gbif}, {.val gbif_parent}, {.val gbif_renamed_to}, {.val prevalence}, or {.val snomed}. If \code{NULL}, the raw values from \code{col_mo} will be used without transformation. When using \code{"species"} (default) or \code{"subpecies"}, the genus will be added to make sure each (sub)species still belongs to the right genus.} +\item{property}{A character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: \code{"mo"}, \code{"fullname"}, \code{"status"}, \code{"kingdom"}, \code{"phylum"}, \code{"class"}, \code{"order"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"subspecies"}, \code{"rank"}, \code{"ref"}, \code{"oxygen_tolerance"}, \code{"source"}, \code{"lpsn"}, \code{"lpsn_parent"}, \code{"lpsn_renamed_to"}, \code{"mycobank"}, \code{"mycobank_parent"}, \code{"mycobank_renamed_to"}, \code{"gbif"}, \code{"gbif_parent"}, \code{"gbif_renamed_to"}, \code{"prevalence"}, or \code{"snomed"}. If \code{NULL}, the raw values from \code{col_mo} will be used without transformation. When using \code{"species"} (default) or \code{"subpecies"}, the genus will be added to make sure each (sub)species still belongs to the right genus.} \item{n_for_each}{An optional integer specifying the maximum number of rows to retain for each value of the selected property. If \code{NULL}, all rows within the top \emph{n} groups will be included.} diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index 2a5105c27..8c659c255 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -268,7 +268,8 @@ To create a traditional antibiogram, simply state which antibiotics should be us ```{r trad} antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems())) + antibiotics = c(aminoglycosides(), carbapenems()) +) ``` Notice that the `antibiogram()` function automatically prints in the right format when using Quarto or R Markdown (such as this page), and even applies italics for taxonomic names (by using `italicise_taxonomy()` internally). @@ -277,10 +278,11 @@ It also uses the language of your OS if this is either `r AMR:::vector_or(vapply ```{r trad2} antibiogram(example_isolates, - mo_transform = "gramstain", - antibiotics = aminoglycosides(), - ab_transform = "name", - language = "es") + mo_transform = "gramstain", + antibiotics = aminoglycosides(), + ab_transform = "name", + language = "es" +) ``` ### Combined Antibiogram @@ -289,8 +291,9 @@ To create a combined antibiogram, use antibiotic codes or names with a plus `+` ```{r comb} combined_ab <- antibiogram(example_isolates, - antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), - ab_transform = NULL) + antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), + ab_transform = NULL +) combined_ab ``` @@ -300,8 +303,9 @@ To create a syndromic antibiogram, the `syndromic_group` argument must be used. ```{r synd} antibiogram(example_isolates, - antibiotics = c(aminoglycosides(), carbapenems()), - syndromic_group = "ward") + antibiotics = c(aminoglycosides(), carbapenems()), + syndromic_group = "ward" +) ``` ### Weighted-Incidence Syndromic Combination Antibiogram (WISCA) @@ -310,8 +314,10 @@ To create a **Weighted-Incidence Syndromic Combination Antibiogram (WISCA)**, si ```{r wisca} example_isolates %>% - wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), - minimum = 10) # Recommended threshold: ≥30 + wisca( + antibiotics = c("TZP", "TZP+TOB", "TZP+GEN"), + minimum = 10 + ) # Recommended threshold: ≥30 ``` WISCA uses a **Bayesian decision model** to integrate data from multiple pathogens, improving empirical therapy guidance, especially for low-incidence infections. It is **pathogen-agnostic**, meaning results are syndrome-based rather than stratified by microorganism. @@ -323,8 +329,10 @@ For **patient- or syndrome-specific WISCA**, run the function on a grouped `tibb ```{r wisca_grouped} example_isolates %>% top_n_microorganisms(n = 10) %>% - group_by(age_group = age_groups(age, c(25, 50, 75)), - gender) %>% + group_by( + age_group = age_groups(age, c(25, 50, 75)), + gender + ) %>% wisca(antibiotics = c("TZP", "TZP+TOB", "TZP+GEN")) ``` @@ -379,17 +387,21 @@ We can visualise MIC distributions and their SIR interpretations using `ggplot2` ```{r mic_plot} # add a group -my_data$group <- rep(c("A", "B", "C", "D"), each = 25) +my_data$group <- rep(c("A", "B", "C", "D"), each = 25) -ggplot(my_data, - aes(x = group, y = MIC, colour = SIR)) + +ggplot( + my_data, + aes(x = group, y = MIC, colour = SIR) +) + geom_jitter(width = 0.2, size = 2) + geom_boxplot(fill = NA, colour = "grey40") + scale_y_mic() + scale_colour_sir() + - labs(title = "MIC Distribution and SIR Interpretation", - x = "Sample Groups", - y = "MIC (mg/L)") + labs( + title = "MIC Distribution and SIR Interpretation", + x = "Sample Groups", + y = "MIC (mg/L)" + ) ``` This plot provides an intuitive way to assess susceptibility patterns across different groups while incorporating clinical breakpoints. diff --git a/vignettes/AMR_with_tidymodels.Rmd b/vignettes/AMR_with_tidymodels.Rmd index 156dfc14e..7da7cfe71 100644 --- a/vignettes/AMR_with_tidymodels.Rmd +++ b/vignettes/AMR_with_tidymodels.Rmd @@ -53,8 +53,8 @@ We begin by loading the required libraries and preparing the `example_isolates` ```{r lib packages, message = FALSE, warning = FALSE, results = 'asis'} # Load required libraries -library(AMR) # For AMR data analysis -library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...) +library(AMR) # For AMR data analysis +library(tidymodels) # For machine learning workflows, and data manipulation (dplyr, tidyr, ...) ``` Prepare the data: @@ -68,13 +68,19 @@ data <- example_isolates %>% # select AB results dynamically select(mo, aminoglycosides(), betalactams()) %>% # replace NAs with NI (not-interpretable) - mutate(across(where(is.sir), - ~replace_na(.x, "NI")), - # make factors of SIR columns - across(where(is.sir), - as.integer), - # get Gramstain of microorganisms - mo = as.factor(mo_gramstain(mo))) %>% + mutate( + across( + where(is.sir), + ~ replace_na(.x, "NI") + ), + # make factors of SIR columns + across( + where(is.sir), + as.integer + ), + # get Gramstain of microorganisms + mo = as.factor(mo_gramstain(mo)) + ) %>% # drop NAs - the ones without a Gramstain (fungi, etc.) drop_na() ``` @@ -149,7 +155,7 @@ To train the model, we split the data into training and testing sets. Then, we f set.seed(123) # For reproducibility data_split <- initial_split(data, prop = 0.8) # 80% training, 20% testing training_data <- training(data_split) # Training set -testing_data <- testing(data_split) # Testing set +testing_data <- testing(data_split) # Testing set # Fit the workflow to the training data fitted_workflow <- resistance_workflow %>% @@ -168,7 +174,7 @@ Next, we evaluate the model on the testing data. ```{r} # Make predictions on the testing set predictions <- fitted_workflow %>% - predict(testing_data) # Generate predictions + predict(testing_data) # Generate predictions probabilities <- fitted_workflow %>% predict(testing_data, type = "prob") # Generate probabilities @@ -266,8 +272,8 @@ testing_data <- testing(split) # Define the recipe mic_recipe <- recipe(esbl ~ ., data = training_data) %>% - remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable - step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors + remove_role(genus, old_role = "predictor") %>% # Remove non-informative variable + step_mic_log2(all_mic_predictors()) # Log2 transform all MIC predictors prep(mic_recipe) ``` @@ -341,9 +347,11 @@ library(ggplot2) ggplot(predictions, aes(x = esbl, fill = .pred_class)) + geom_bar(position = "stack") + - labs(title = "Predicted vs Actual ESBL Status", - x = "Actual ESBL", - y = "Count") + + labs( + title = "Predicted vs Actual ESBL Status", + x = "Actual ESBL", + y = "Count" + ) + theme_minimal() ``` @@ -351,18 +359,27 @@ And plot the certainties too - how certain were the actual predictions? ```{r} predictions %>% - mutate(certainty = ifelse(.pred_class == "FALSE", - .pred_FALSE, - .pred_TRUE), - correct = ifelse(esbl == .pred_class, "Right", "Wrong")) %>% - ggplot(aes(x = seq_len(nrow(predictions)), - y = certainty, - colour = correct)) + - scale_colour_manual(values = c(Right = "green3", Wrong = "red2"), - name = "Correct?") + + mutate( + certainty = ifelse(.pred_class == "FALSE", + .pred_FALSE, + .pred_TRUE + ), + correct = ifelse(esbl == .pred_class, "Right", "Wrong") + ) %>% + ggplot(aes( + x = seq_len(nrow(predictions)), + y = certainty, + colour = correct + )) + + scale_colour_manual( + values = c(Right = "green3", Wrong = "red2"), + name = "Correct?" + ) + geom_point() + - scale_y_continuous(labels = function(x) paste0(x * 100, "%"), - limits = c(0.5, 1)) + + scale_y_continuous( + labels = function(x) paste0(x * 100, "%"), + limits = c(0.5, 1) + ) + theme_minimal() ``` @@ -399,13 +416,18 @@ library(tidymodels) # Transform dataset data_time <- example_isolates %>% top_n_microorganisms(n = 10) %>% # Filter on the top #10 species - mutate(year = as.integer(format(date, "%Y")), # Extract year from date - gramstain = mo_gramstain(mo)) %>% # Get taxonomic names + mutate( + year = as.integer(format(date, "%Y")), # Extract year from date + gramstain = mo_gramstain(mo) + ) %>% # Get taxonomic names group_by(year, gramstain) %>% - summarise(across(c(AMX, AMC, CIP), - function(x) resistance(x, minimum = 0), - .names = "res_{.col}"), - .groups = "drop") %>% + summarise( + across(c(AMX, AMC, CIP), + function(x) resistance(x, minimum = 0), + .names = "res_{.col}" + ), + .groups = "drop" + ) %>% filter(!is.na(res_AMX) & !is.na(res_AMC) & !is.na(res_CIP)) # Drop missing values data_time @@ -426,9 +448,9 @@ We now define the modelling workflow, which consists of a preprocessing step, a ```{r} # Define the recipe resistance_recipe_time <- recipe(res_AMX ~ year + gramstain, data = data_time) %>% - step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical - step_normalize(year) %>% # Normalise year for better model performance - step_nzv(all_predictors()) # Remove near-zero variance predictors + step_dummy(gramstain, one_hot = TRUE) %>% # Convert categorical to numerical + step_normalize(year) %>% # Normalise year for better model performance + step_nzv(all_predictors()) # Remove near-zero variance predictors resistance_recipe_time ``` @@ -514,9 +536,11 @@ library(ggplot2) ggplot(predictions_time, aes(x = year)) + geom_point(aes(y = res_AMX, color = "Actual")) + geom_line(aes(y = .pred, color = "Predicted")) + - labs(title = "Predicted vs Actual AMX Resistance Over Time", - x = "Year", - y = "Resistance Proportion") + + labs( + title = "Predicted vs Actual AMX Resistance Over Time", + x = "Year", + y = "Resistance Proportion" + ) + theme_minimal() ``` @@ -525,13 +549,17 @@ Additionally, we can visualise resistance trends in `ggplot2` and directly add l ```{r} ggplot(data_time, aes(x = year, y = res_AMX, color = gramstain)) + geom_line() + - labs(title = "AMX Resistance Trends", - x = "Year", - y = "Resistance Proportion") + + labs( + title = "AMX Resistance Trends", + x = "Year", + y = "Resistance Proportion" + ) + # add a linear model directly in ggplot2: - geom_smooth(method = "lm", - formula = y ~ x, - alpha = 0.25) + + geom_smooth( + method = "lm", + formula = y ~ x, + alpha = 0.25 + ) + theme_minimal() ``` diff --git a/vignettes/EUCAST.Rmd b/vignettes/EUCAST.Rmd index 78627f2fd..8e45e8493 100644 --- a/vignettes/EUCAST.Rmd +++ b/vignettes/EUCAST.Rmd @@ -80,7 +80,7 @@ data <- tibble::tibble( CAZ = "-", # Ceftazidime CXM = "-", # Cefuroxime PEN = "S", # Benzylenicillin - FOX = "S" # Cefoxitin + FOX = "S" # Cefoxitin ) ``` ```{r, eval = FALSE} diff --git a/vignettes/WISCA.Rmd b/vignettes/WISCA.Rmd index 14d41ae1e..dc28089d7 100644 --- a/vignettes/WISCA.Rmd +++ b/vignettes/WISCA.Rmd @@ -147,31 +147,35 @@ data$syndrome <- ifelse(data$mo %like% "coli", "UTI", "No UTI") ```{r} wisca(data, - antimicrobials = c("AMC", "CIP", "GEN")) + antimicrobials = c("AMC", "CIP", "GEN") +) ``` ### Use combination regimens ```{r} wisca(data, - antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN")) + antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN") +) ``` ### Stratify by syndrome ```{r} wisca(data, - antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"), - syndromic_group = "syndrome") + antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"), + syndromic_group = "syndrome" +) ``` The `AMR` package is available in `r length(AMR:::LANGUAGES_SUPPORTED)` languages, which can all be used for the `wisca()` function too: ```{r} wisca(data, - antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"), - syndromic_group = gsub("UTI", "UCI", data$syndrome), - language = "Spanish") + antimicrobials = c("AMC", "AMC + CIP", "AMC + GEN"), + syndromic_group = gsub("UTI", "UCI", data$syndrome), + language = "Spanish" +) ```