From 423879c034b4d8cd928535333c88338a2848eb8d Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 5 Nov 2020 01:11:49 +0100 Subject: [PATCH] (v1.4.0.9012) reference_df fix --- DESCRIPTION | 4 +- NEWS.md | 6 +-- R/aa_helper_functions.R | 2 +- R/like.R | 6 ++- R/mo.R | 71 +++++++++++++-------------- R/mo_matching_score.R | 4 +- docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 14 +++--- docs/pkgdown.yml | 2 +- docs/reference/index.html | 2 +- docs/reference/like.html | 7 ++- docs/reference/mo_matching_score.html | 16 ++++-- docs/survey.html | 2 +- man/like.Rd | 5 ++ man/mo_matching_score.Rd | 13 ++++- 19 files changed, 98 insertions(+), 66 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 19b41c1b..3396a01c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.4.0.9011 -Date: 2020-10-27 +Version: 1.4.0.9012 +Date: 2020-11-05 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index a19ed118..b533a07b 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.4.0.9011 -## Last updated: 27 October 2020 +# AMR 1.4.0.9012 +## Last updated: 5 November 2020 ### New * Functions `is_gram_negative()` and `is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE`, thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. @@ -9,7 +9,7 @@ * For all function parameters in the code, it is now defined what the exact type of user input should be (inspired by the [`typed`](https://github.com/moodymudskipper/typed) package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined. * Deprecated function `p_symbol()` that not really fits the scope of this package. It will be removed in a future version. See [here](https://github.com/msberends/AMR/blob/v1.4.0/R/p_symbol.R) for the source code to preserve it. * Better determination of disk zones and MIC values when running `as.rsi()` on a data.frame -* Updated coagulase-negative staphylococci with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS +* Updated coagulase-negative staphylococci determination with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS * Fix for using parameter `reference_df` in `as.mo()` and `mo_*()` functions that contain old microbial codes (from previous package versions) ### Other diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index a3836393..bfe77618 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -87,7 +87,7 @@ addin_insert_like <- function() { current_row_txt <- context$contents[current_row] pos_preceded_by <- function(txt) { - substr(current_row_txt, current_col - nchar(txt), current_col) == txt + substr(current_row_txt, current_col - nchar(txt), current_col) %like% paste0("^", txt) } replace_pos <- function(old, with) { modifyRange(document_range(document_position(current_row, current_col - nchar(old)), diff --git a/R/like.R b/R/like.R index 3d1c8b8b..013e829c 100755 --- a/R/like.R +++ b/R/like.R @@ -67,6 +67,11 @@ #' if (require("dplyr")) { #' example_isolates %>% #' filter(mo_name(mo) %like% "^ent") +#' +#' example_isolates %>% +#' mutate(group = case_when(hospital_id %like% "A|D" ~ "Group 1", +#' mo_name(mo) %not_like% "^Staph" ~ "Group 2a", +#' TRUE ~ "Group 2b")) #' } #' } like <- function(x, pattern, ignore.case = TRUE) { @@ -168,7 +173,6 @@ like <- function(x, pattern, ignore.case = TRUE) { like(x, pattern, ignore.case = FALSE) } - #' @rdname like #' @export "%not_like_case%" <- function(x, pattern) { diff --git a/R/mo.R b/R/mo.R index 463e2dbd..3dccdf3c 100755 --- a/R/mo.R +++ b/R/mo.R @@ -199,31 +199,16 @@ as.mo <- function(x, uncertainty_level <- translate_allow_uncertain(allow_uncertain) if (mo_source_isvalid(reference_df) - & isFALSE(Becker) - & isFALSE(Lancefield) - & !is.null(reference_df) - & all(x %in% reference_df[, 1][[1]])) { + && isFALSE(Becker) + && isFALSE(Lancefield) + && !is.null(reference_df) + && all(x %in% unlist(reference_df), na.rm = TRUE)) { - # has valid own reference_df - reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) - # keep only first two columns, second must be mo - if (colnames(reference_df)[1] == "mo") { - reference_df <- reference_df[, c(2, 1)] - } else { - reference_df <- reference_df[, c(1, 2)] - } - # some microbial codes might be old - reference_df[, 1] <- as.mo(reference_df[, 1, drop = TRUE]) - - colnames(reference_df)[1] <- "x" - # remove factors, just keep characters - suppressWarnings( - reference_df[] <- lapply(reference_df, as.character) - ) + reference_df <- repair_reference_df(reference_df) suppressWarnings( y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>% pm_left_join(reference_df, by = "x") %pm>% - pm_pull("mo") + pm_pull(mo) ) } else if (all(x[!is.na(x)] %in% MO_lookup$mo) @@ -406,22 +391,7 @@ exec_as.mo <- function(x, # defined df to check for if (!is.null(reference_df)) { mo_source_isvalid(reference_df) - - reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) - # keep only first two columns, second must be named "mo" - if (colnames(reference_df)[1] == "mo") { - reference_df <- reference_df[, c(2, 1)] - } else { - reference_df <- reference_df[, c(1, 2)] - } - # some microbial codes might be old - reference_df[, 1] <- as.mo(reference_df[, 1, drop = TRUE]) - - colnames(reference_df)[1] <- "x" - # remove factors, just keep characters - suppressWarnings( - reference_df[] <- lapply(reference_df, as.character) - ) + reference_df <- repair_reference_df(reference_df) } # all empty @@ -1936,7 +1906,11 @@ replace_old_mo_codes <- function(x, property) { if (property != "mo") { message_(font_blue("NOTE: The input contained old microbial codes (from previous package versions). Please update your MO codes with as.mo().")) } else { - message_(font_blue("NOTE:", length(matched), "old microbial codes (from previous package versions) were updated to current used codes.")) + if (length(matched) == 1) { + message_(font_blue("NOTE: 1 old microbial code (from previous package versions) was updated to a current used code.")) + } else { + message_(font_blue("NOTE:", length(matched), "old microbial codes (from previous package versions) were updated to current used codes.")) + } } } x @@ -1955,6 +1929,27 @@ replace_ignore_pattern <- function(x, ignore_pattern) { x } +repair_reference_df <- function(reference_df) { + # has valid own reference_df + reference_df <- reference_df %pm>% + pm_filter(!is.na(mo)) + + # keep only first two columns, second must be mo + if (colnames(reference_df)[1] == "mo") { + reference_df <- reference_df %pm>% pm_select(2, "mo") + } else { + reference_df <- reference_df %pm>% pm_select(1, "mo") + } + # some microbial codes might be old + reference_df[, 2] <- as.mo(reference_df[, 2, drop = TRUE]) + # remove factors, just keep characters + suppressWarnings( + reference_df[] <- lapply(reference_df, as.character) + ) + colnames(reference_df)[1] <- "x" + reference_df +} + left_join_MO_lookup <- function(x, ...) { pm_left_join(x = x, y = MO_lookup, ...) } diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 1311c164..808450d3 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -25,7 +25,9 @@ #' Calculate the matching score for microorganisms #' -#' This helper function is used by [as.mo()] to determine the most probable match of taxonomic records, based on user input. +#' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input. +#' @inheritSection lifecycle Stable lifecycle +#' @author Matthijs S. Berends #' @param x Any user input value(s) #' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms] #' @section Matching score for microorganisms: diff --git a/docs/404.html b/docs/404.html index 9139204f..e52ba3cd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index cdd4b535..7997ba76 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 diff --git a/docs/articles/index.html b/docs/articles/index.html index 4e4046d8..176ce3e4 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 diff --git a/docs/authors.html b/docs/authors.html index ddd1d244..e40b20ca 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 diff --git a/docs/index.html b/docs/index.html index 6d36eee3..32495e37 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 diff --git a/docs/news/index.html b/docs/news/index.html index aec27d17..0f1ddc17 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012 @@ -236,13 +236,13 @@ Source: NEWS.md -
-

-AMR 1.4.0.9011 Unreleased +
+

+AMR 1.4.0.9012 Unreleased

-
+

-Last updated: 27 October 2020 +Last updated: 5 November 2020

@@ -259,7 +259,7 @@
  • For all function parameters in the code, it is now defined what the exact type of user input should be (inspired by the typed package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined.
  • Deprecated function p_symbol() that not really fits the scope of this package. It will be removed in a future version. See here for the source code to preserve it.
  • Better determination of disk zones and MIC values when running as.rsi() on a data.frame
  • -
  • Updated coagulase-negative staphylococci with Becker et al. 2020 (PMID 32056452), meaning that the species S. argensis, S. caeli, S. debuckii, S. edaphicus and S. pseudoxylosus are now all considered CoNS
  • +
  • Updated coagulase-negative staphylococci determination with Becker et al. 2020 (PMID 32056452), meaning that the species S. argensis, S. caeli, S. debuckii, S. edaphicus and S. pseudoxylosus are now all considered CoNS
  • Fix for using parameter reference_df in as.mo() and mo_*() functions that contain old microbial codes (from previous package versions)
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cd7b9a65..40ff0ae0 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2020-10-27T14:41Z +last_built: 2020-11-05T00:11Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/index.html b/docs/reference/index.html index e777a338..4745e7ac 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9011 + 1.4.0.9012
    diff --git a/docs/reference/like.html b/docs/reference/like.html index e7d875a9..3faa6ae6 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9008 + 1.4.0.9012
    @@ -322,6 +322,11 @@ The lifecycle of this function is stableif (require("dplyr")) { example_isolates %>% filter(mo_name(mo) %like% "^ent") + + example_isolates %>% + mutate(group = case_when(hospital_id %like% "A|D" ~ "Group 1", + mo_name(mo) %not_like% "^Staph" ~ "Group 2a", + TRUE ~ "Group 2b")) } # } diff --git a/docs/reference/mo_matching_score.html b/docs/reference/mo_matching_score.html index c42a9d6b..82c0baf8 100644 --- a/docs/reference/mo_matching_score.html +++ b/docs/reference/mo_matching_score.html @@ -49,7 +49,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9000 + 1.4.0.9012
    @@ -239,7 +239,7 @@

    -

    This helper function is used by as.mo() to determine the most probable match of taxonomic records, based on user input.

    +

    This algorithm is used by as.mo() and all the mo_* functions to determine the most probable match of taxonomic records based on user input.

    mo_matching_score(x, n)
    @@ -274,6 +274,16 @@

    The grouping into human pathogenic prevalence (\(p\)) is based on experience from several microbiological laboratories in the Netherlands in conjunction with international reports on pathogen prevalence. Group 1 (most prevalent microorganisms) consists of all microorganisms where the taxonomic class is Gammaproteobacteria or where the taxonomic genus is Enterococcus, Staphylococcus or Streptococcus. This group consequently contains all common Gram-negative bacteria, such as Pseudomonas and Legionella and all species within the order Enterobacterales. Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is Absidia, Acremonium, Actinotignum, Alternaria, Anaerosalibacter, Apophysomyces, Arachnia, Aspergillus, Aureobacterium, Aureobasidium, Bacteroides, Basidiobolus, Beauveria, Blastocystis, Branhamella, Calymmatobacterium, Candida, Capnocytophaga, Catabacter, Chaetomium, Chryseobacterium, Chryseomonas, Chrysonilia, Cladophialophora, Cladosporium, Conidiobolus, Cryptococcus, Curvularia, Exophiala, Exserohilum, Flavobacterium, Fonsecaea, Fusarium, Fusobacterium, Hendersonula, Hypomyces, Koserella, Lelliottia, Leptosphaeria, Leptotrichia, Malassezia, Malbranchea, Mortierella, Mucor, Mycocentrospora, Mycoplasma, Nectria, Ochroconis, Oidiodendron, Phoma, Piedraia, Pithomyces, Pityrosporum, Prevotella,\Pseudallescheria, Rhizomucor, Rhizopus, Rhodotorula, Scolecobasidium, Scopulariopsis, Scytalidium,Sporobolomyces, Stachybotrys, Stomatococcus, Treponema, Trichoderma, Trichophyton, Trichosporon, Tritirachium or Ureaplasma. Group 3 consists of all other microorganisms.

    All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., "E. coli" will return the microbial ID of Escherichia coli (\(m = 0.688\), a highly prevalent microorganism found in humans) and not Entamoeba coli (\(m = 0.079\), a less prevalent microorganism in humans), although the latter would alphabetically come first.

    +

    Stable lifecycle

    + + + +


    +The lifecycle of this function is stable. In a stable function, major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; removing arguments or changing the meaning of existing arguments will be avoided.

    +

    If the unlying code needs breaking changes, they will occur gradually. For example, a parameter will be deprecated and first continue to work, but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error.

    +

    Author

    + +

    Matthijs S. Berends

    Examples

    as.mo("E. coli")
    diff --git a/docs/survey.html b/docs/survey.html
    index a200e3bc..a7216eac 100644
    --- a/docs/survey.html
    +++ b/docs/survey.html
    @@ -81,7 +81,7 @@
           
           
             AMR (for R)
    -        1.4.0.9011
    +        1.4.0.9012
           
         
     
    diff --git a/man/like.Rd b/man/like.Rd
    index a7067402..073b8555 100755
    --- a/man/like.Rd
    +++ b/man/like.Rd
    @@ -80,6 +80,11 @@ a \%like\% b
     if (require("dplyr")) {
       example_isolates \%>\%
         filter(mo_name(mo) \%like\% "^ent")
    + 
    +  example_isolates \%>\% 
    +    mutate(group = case_when(hospital_id \%like\% "A|D"        ~ "Group 1",
    +                             mo_name(mo) \%not_like\% "^Staph" ~ "Group 2a",
    +                             TRUE                            ~ "Group 2b"))
     }
     }
     }
    diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd
    index db169e3a..91c7228f 100644
    --- a/man/mo_matching_score.Rd
    +++ b/man/mo_matching_score.Rd
    @@ -12,7 +12,7 @@ mo_matching_score(x, n)
     \item{n}{A full taxonomic name, that exists in \code{\link[=microorganisms]{microorganisms$fullname}}}
     }
     \description{
    -This helper function is used by \code{\link[=as.mo]{as.mo()}} to determine the most probable match of taxonomic records, based on user input.
    +This algorithm is used by \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions to determine the most probable match of taxonomic records based on user input.
     }
     \section{Matching score for microorganisms}{
     
    @@ -35,6 +35,14 @@ The grouping into human pathogenic prevalence (\eqn{p}) is based on experience f
     All matches are sorted descending on their matching score and for all user input values, the top match will be returned. This will lead to the effect that e.g., \code{"E. coli"} will return the microbial ID of \emph{Escherichia coli} (\eqn{m = 0.688}, a highly prevalent microorganism found in humans) and not \emph{Entamoeba coli} (\eqn{m = 0.079}, a less prevalent microorganism in humans), although the latter would alphabetically come first.
     }
     
    +\section{Stable lifecycle}{
    +
    +\if{html}{\figure{lifecycle_stable.svg}{options: style=margin-bottom:5px} \cr}
    +The \link[=lifecycle]{lifecycle} of this function is \strong{stable}. In a stable function, major changes are unlikely. This means that the unlying code will generally evolve by adding new arguments; removing arguments or changing the meaning of existing arguments will be avoided.
    +
    +If the unlying code needs breaking changes, they will occur gradually. For example, a parameter will be deprecated and first continue to work, but will emit an message informing you of the change. Next, typically after at least one newly released version on CRAN, the message will be transformed to an error.
    +}
    +
     \examples{
     as.mo("E. coli")
     mo_uncertainties()
    @@ -42,3 +50,6 @@ mo_uncertainties()
     mo_matching_score(x = "E. coli",
                       n = c("Escherichia coli", "Entamoeba coli"))
     }
    +\author{
    +Matthijs S. Berends
    +}