diff --git a/DESCRIPTION b/DESCRIPTION
index 19b41c1b..3396a01c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: AMR
-Version: 1.4.0.9011
-Date: 2020-10-27
+Version: 1.4.0.9012
+Date: 2020-11-05
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index a19ed118..b533a07b 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
-# AMR 1.4.0.9011
-## Last updated: 27 October 2020
+# AMR 1.4.0.9012
+## Last updated: 5 November 2020
### New
* Functions `is_gram_negative()` and `is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE`, thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
@@ -9,7 +9,7 @@
* For all function parameters in the code, it is now defined what the exact type of user input should be (inspired by the [`typed`](https://github.com/moodymudskipper/typed) package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined.
* Deprecated function `p_symbol()` that not really fits the scope of this package. It will be removed in a future version. See [here](https://github.com/msberends/AMR/blob/v1.4.0/R/p_symbol.R) for the source code to preserve it.
* Better determination of disk zones and MIC values when running `as.rsi()` on a data.frame
-* Updated coagulase-negative staphylococci with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS
+* Updated coagulase-negative staphylococci determination with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS
* Fix for using parameter `reference_df` in `as.mo()` and `mo_*()` functions that contain old microbial codes (from previous package versions)
### Other
diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R
index a3836393..bfe77618 100755
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -87,7 +87,7 @@ addin_insert_like <- function() {
current_row_txt <- context$contents[current_row]
pos_preceded_by <- function(txt) {
- substr(current_row_txt, current_col - nchar(txt), current_col) == txt
+ substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt)
}
replace_pos <- function(old, with) {
modifyRange(document_range(document_position(current_row, current_col - nchar(old)),
diff --git a/R/like.R b/R/like.R
index 3d1c8b8b..013e829c 100755
--- a/R/like.R
+++ b/R/like.R
@@ -67,6 +67,11 @@
#' if (require("dplyr")) {
#' example_isolates %>%
#' filter(mo_name(mo) %like% "^ent")
+#'
+#' example_isolates %>%
+#' mutate(group = case_when(hospital_id %like% "A|D" ~ "Group 1",
+#' mo_name(mo) %not_like% "^Staph" ~ "Group 2a",
+#' TRUE ~ "Group 2b"))
#' }
#' }
like <- function(x, pattern, ignore.case = TRUE) {
@@ -168,7 +173,6 @@ like <- function(x, pattern, ignore.case = TRUE) {
like(x, pattern, ignore.case = FALSE)
}
-
#' @rdname like
#' @export
"%not_like_case%" <- function(x, pattern) {
diff --git a/R/mo.R b/R/mo.R
index 463e2dbd..3dccdf3c 100755
--- a/R/mo.R
+++ b/R/mo.R
@@ -199,31 +199,16 @@ as.mo <- function(x,
uncertainty_level <- translate_allow_uncertain(allow_uncertain)
if (mo_source_isvalid(reference_df)
- & isFALSE(Becker)
- & isFALSE(Lancefield)
- & !is.null(reference_df)
- & all(x %in% reference_df[, 1][[1]])) {
+ && isFALSE(Becker)
+ && isFALSE(Lancefield)
+ && !is.null(reference_df)
+ && all(x %in% unlist(reference_df), na.rm = TRUE)) {
- # has valid own reference_df
- reference_df <- reference_df %pm>% pm_filter(!is.na(mo))
- # keep only first two columns, second must be mo
- if (colnames(reference_df)[1] == "mo") {
- reference_df <- reference_df[, c(2, 1)]
- } else {
- reference_df <- reference_df[, c(1, 2)]
- }
- # some microbial codes might be old
- reference_df[, 1] <- as.mo(reference_df[, 1, drop = TRUE])
-
- colnames(reference_df)[1] <- "x"
- # remove factors, just keep characters
- suppressWarnings(
- reference_df[] <- lapply(reference_df, as.character)
- )
+ reference_df <- repair_reference_df(reference_df)
suppressWarnings(
y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>%
pm_left_join(reference_df, by = "x") %pm>%
- pm_pull("mo")
+ pm_pull(mo)
)
} else if (all(x[!is.na(x)] %in% MO_lookup$mo)
@@ -406,22 +391,7 @@ exec_as.mo <- function(x,
# defined df to check for
if (!is.null(reference_df)) {
mo_source_isvalid(reference_df)
-
- reference_df <- reference_df %pm>% pm_filter(!is.na(mo))
- # keep only first two columns, second must be named "mo"
- if (colnames(reference_df)[1] == "mo") {
- reference_df <- reference_df[, c(2, 1)]
- } else {
- reference_df <- reference_df[, c(1, 2)]
- }
- # some microbial codes might be old
- reference_df[, 1] <- as.mo(reference_df[, 1, drop = TRUE])
-
- colnames(reference_df)[1] <- "x"
- # remove factors, just keep characters
- suppressWarnings(
- reference_df[] <- lapply(reference_df, as.character)
- )
+ reference_df <- repair_reference_df(reference_df)
}
# all empty
@@ -1936,7 +1906,11 @@ replace_old_mo_codes <- function(x, property) {
if (property != "mo") {
message_(font_blue("NOTE: The input contained old microbial codes (from previous package versions). Please update your MO codes with as.mo()."))
} else {
- message_(font_blue("NOTE:", length(matched), "old microbial codes (from previous package versions) were updated to current used codes."))
+ if (length(matched) == 1) {
+ message_(font_blue("NOTE: 1 old microbial code (from previous package versions) was updated to a current used code."))
+ } else {
+ message_(font_blue("NOTE:", length(matched), "old microbial codes (from previous package versions) were updated to current used codes."))
+ }
}
}
x
@@ -1955,6 +1929,27 @@ replace_ignore_pattern <- function(x, ignore_pattern) {
x
}
+repair_reference_df <- function(reference_df) {
+ # has valid own reference_df
+ reference_df <- reference_df %pm>%
+ pm_filter(!is.na(mo))
+
+ # keep only first two columns, second must be mo
+ if (colnames(reference_df)[1] == "mo") {
+ reference_df <- reference_df %pm>% pm_select(2, "mo")
+ } else {
+ reference_df <- reference_df %pm>% pm_select(1, "mo")
+ }
+ # some microbial codes might be old
+ reference_df[, 2] <- as.mo(reference_df[, 2, drop = TRUE])
+ # remove factors, just keep characters
+ suppressWarnings(
+ reference_df[] <- lapply(reference_df, as.character)
+ )
+ colnames(reference_df)[1] <- "x"
+ reference_df
+}
+
left_join_MO_lookup <- function(x, ...) {
pm_left_join(x = x, y = MO_lookup, ...)
}
diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R
index 1311c164..808450d3 100755
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -25,7 +25,9 @@
#' Calculate the matching score for microorganisms
#'
-#' This helper function is used by [as.mo()] to determine the most probable match of taxonomic records, based on user input.
+#' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input.
+#' @inheritSection lifecycle Stable lifecycle
+#' @author Matthijs S. Berends
#' @param x Any user input value(s)
#' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
#' @section Matching score for microorganisms:
diff --git a/docs/404.html b/docs/404.html
index 9139204f..e52ba3cd 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -81,7 +81,7 @@
NEWS.md
-
typed
package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined.p_symbol()
that not really fits the scope of this package. It will be removed in a future version. See here for the source code to preserve it.as.rsi()
on a data.framereference_df
in as.mo()
and mo_*()
functions that contain old microbial codes (from previous package versions)This helper function is used by as.mo()
to determine the most probable match of taxonomic records, based on user input.
This algorithm is used by as.mo()
and all the mo_*
functions to determine the most probable match of taxonomic records based on user input.
mo_matching_score(x, n)@@ -274,6 +274,16 @@
The grouping into human pathogenic prevalence (\(p\)) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. Group 1 (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is Enterococcus, Staphylococcus or Streptococcus. This group consequently contains all common Gram-negative bacteria, such as Pseudomonas and Legionella and all species within the order Enterobacterales. Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is Absidia, Acremonium, Actinotignum, Alternaria, Anaerosalibacter, Apophysomyces, Arachnia, Aspergillus, Aureobacterium, Aureobasidium, Bacteroides, Basidiobolus, Beauveria, Blastocystis, Branhamella, Calymmatobacterium, Candida, Capnocytophaga, Catabacter, Chaetomium, Chryseobacterium, Chryseomonas, Chrysonilia, Cladophialophora, Cladosporium, Conidiobolus, Cryptococcus, Curvularia, Exophiala, Exserohilum, Flavobacterium, Fonsecaea, Fusarium, Fusobacterium, Hendersonula, Hypomyces, Koserella, Lelliottia, Leptosphaeria, Leptotrichia, Malassezia, Malbranchea, Mortierella, Mucor, Mycocentrospora, Mycoplasma, Nectria, Ochroconis, Oidiodendron, Phoma, Piedraia, Pithomyces, Pityrosporum, Prevotella,\Pseudallescheria, Rhizomucor, Rhizopus, Rhodotorula, Scolecobasidium, Scopulariopsis, Scytalidium,Sporobolomyces, Stachybotrys, Stomatococcus, Treponema, Trichoderma, Trichophyton, Trichosporon, Tritirachium or Ureaplasma. Group 3 consists of all other microorganisms.
All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., "E. coli"
will return the microbial ID of Escherichia coli (\(m = 0.688\), a highly prevalent microorganism found in humans) and not Entamoeba coli (\(m = 0.079\), a less prevalent microorganism in humans), although the latter would alphabetically come first.
+The lifecycle of this function is stable. In a stable function, major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; removing arguments or changing the meaning of existing arguments will be avoided.
If the unlying code needs breaking changes, they will occur gradually. For example, a parameter will be deprecated and first continue to work, but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error.
+Matthijs S. Berends
as.mo("E. coli") diff --git a/docs/survey.html b/docs/survey.html index a200e3bc..a7216eac 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ diff --git a/man/like.Rd b/man/like.Rd index a7067402..073b8555 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -80,6 +80,11 @@ a \%like\% b if (require("dplyr")) { example_isolates \%>\% filter(mo_name(mo) \%like\% "^ent") + + example_isolates \%>\% + mutate(group = case_when(hospital_id \%like\% "A|D" ~ "Group 1", + mo_name(mo) \%not_like\% "^Staph" ~ "Group 2a", + TRUE ~ "Group 2b")) } } } diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd index db169e3a..91c7228f 100644 --- a/man/mo_matching_score.Rd +++ b/man/mo_matching_score.Rd @@ -12,7 +12,7 @@ mo_matching_score(x, n) \item{n}{A full taxonomic name, that exists in \code{\link[=microorganisms]{microorganisms$fullname}}} } \description{ -This helper function is used by \code{\link[=as.mo]{as.mo()}} to determine the most probable match of taxonomic records, based on user input. +This algorithm is used by \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions to determine the most probable match of taxonomic records based on user input. } \section{Matching score for microorganisms}{ @@ -35,6 +35,14 @@ The grouping into human pathogenic prevalence (\eqn{p}) is based on experience f All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., \code{"E. coli"} will return the microbial ID of \emph{Escherichia coli} (\eqn{m = 0.688}, a highly prevalent microorganism found in humans) and not \emph{Entamoeba coli} (\eqn{m = 0.079}, a less prevalent microorganism in humans), although the latter would alphabetically come first. } +\section{Stable lifecycle}{ + +\if{html}{\figure{lifecycle_stable.svg}{options: style=margin-bottom:5px} \cr} +The \link[=lifecycle]{lifecycle} of this function is \strong{stable}. In a stable function, major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; removing arguments or changing the meaning of existing arguments will be avoided. + +If the unlying code needs breaking changes, they will occur gradually. For example, a parameter will be deprecated and first continue to work, but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error. +} + \examples{ as.mo("E. coli") mo_uncertainties() @@ -42,3 +50,6 @@ mo_uncertainties() mo_matching_score(x = "E. coli", n = c("Escherichia coli", "Entamoeba coli")) } +\author{ +Matthijs S. Berends +}