diff --git a/DESCRIPTION b/DESCRIPTION index 0db0e65c..febf3c3c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.9.0.9017 -Date: 2020-01-27 +Version: 0.9.0.9018 +Date: 2020-01-31 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 36f2c0d2..42a93202 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -86,7 +86,6 @@ export(availability) export(brmo) export(bug_drug_combinations) export(catalogue_of_life_version) -export(clear_mo_history) export(count_I) export(count_IR) export(count_R) @@ -329,5 +328,3 @@ importFrom(tidyr,pivot_wider) importFrom(utils,adist) importFrom(utils,browseURL) importFrom(utils,menu) -importFrom(utils,read.csv) -importFrom(utils,write.csv) diff --git a/NEWS.md b/NEWS.md index df4c45b7..8559b355 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 0.9.0.9017 -## Last updated: 27-Jan-2020 +# AMR 0.9.0.9018 +## Last updated: 31-Jan-2020 ### New * Support for LOINC and SNOMED codes @@ -24,6 +24,7 @@ * The repository of this package now contains a clean version of the EUCAST and CLSI guidelines from 2011-2019 to translate MIC and disk diffusion values to R/SI: https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt. This **allows for machine reading these guidelines**, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file is updated automatically. ### Changes +* The `as.mo()` function previously wrote to the package folder to improve calculation speed for previously calculated results. This is no longer the case, to comply with CRAN policies. Consequently, the function `clear_mo_history()` was removed. * Bugfix for some WHONET microorganism codes that were not interpreted correctly when using `as.rsi()` * Speed improvement for `as.mo()` (and consequently all `mo_*` functions that use `as.mo()` internally), especially for the *G. species* format (G for genus), like *E. coli* and *K penumoniae* * Better support for determination of *Salmonella* biovars diff --git a/R/ab.R b/R/ab.R index 6d60d1a4..1aa415af 100755 --- a/R/ab.R +++ b/R/ab.R @@ -327,7 +327,7 @@ as.ab <- function(x, ...) { #' @rdname as.ab #' @export is.ab <- function(x) { - identical(class(x), "ab") + inherits(x, "ab") } #' @exportMethod print.ab diff --git a/R/ab_property.R b/R/ab_property.R index 16c2ba5b..d0b93d83 100644 --- a/R/ab_property.R +++ b/R/ab_property.R @@ -197,11 +197,6 @@ ab_property <- function(x, property = "name", language = get_locale(), ...) { } ab_validate <- function(x, property, ...) { - if (!"AMR" %in% base::.packages()) { - library("AMR") - # check onLoad() in R/zzz.R: data tables are created there. - } - # try to catch an error when inputting an invalid parameter # so the 'call.' can be set to FALSE tryCatch(x[1L] %in% AMR::antibiotics[1, property], diff --git a/R/mo.R b/R/mo.R index 4620a33f..8554964e 100755 --- a/R/mo.R +++ b/R/mo.R @@ -98,14 +98,6 @@ #' Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is *Aspergillus*, *Bacteroides*, *Candida*, *Capnocytophaga*, *Chryseobacterium*, *Cryptococcus*, *Elisabethkingia*, *Flavobacterium*, *Fusobacterium*, *Giardia*, *Leptotrichia*, *Mycoplasma*, *Prevotella*, *Rhodotorula*, *Treponema*, *Trichophyton* or *Ureaplasma*. #' #' Group 3 (least prevalent microorganisms) consists of all other microorganisms. -#' -#' ## Self-learning algorithm -#' -#' The [as.mo()] function gains experience from previously determined microorganism IDs and learns from it. This drastically improves both speed and reliability. Use [clear_mo_history()] to reset the algorithms. Only experience from your current `AMR` package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. -#' -#' Usually, any guess after the first try runs 80-95% faster than the first try. -#' -#' This resets with every update of this `AMR` package since results are saved to your local package library folder. #' @inheritSection catalogue_of_life Catalogue of Life # (source as a section here, so it can be inherited by other man pages:) #' @section Source: @@ -189,11 +181,7 @@ as.mo <- function(x, x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ uncertainty_level <- translate_allow_uncertain(allow_uncertain) - mo_hist <- get_mo_history(x, - uncertainty_level, - force = isTRUE(list(...)$force_mo_history), - disable = isTRUE(list(...)$disable_mo_history)) - + if (mo_source_isvalid(reference_df) & isFALSE(Becker) & isFALSE(Lancefield) @@ -225,12 +213,6 @@ as.mo <- function(x, & isFALSE(Lancefield)) { y <- x - } else if (!any(is.na(mo_hist)) - & isFALSE(Becker) - & isFALSE(Lancefield)) { - # check previously found results - y <- mo_hist - } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary y <- mo_validate(x = x, property = "mo", @@ -249,7 +231,7 @@ to_class_mo <- function(x) { #' @rdname as.mo #' @export is.mo <- function(x) { - identical(class(x), class(to_class_mo(x))) + inherits(x, "mo") } #' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter distinct @@ -259,8 +241,6 @@ is.mo <- function(x) { # param property a column name of AMR::microorganisms # param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too # param dyslexia_mode logical - also check for characters that resemble others -# param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions) -# param disable_mo_history logical - whether set_mo_history and get_mo_history should be ignored # param debug logical - show different lookup texts while searching # param reference_data_to_use data.frame - the data set to check for exec_as.mo <- function(x, @@ -271,8 +251,6 @@ exec_as.mo <- function(x, property = "mo", initial_search = TRUE, dyslexia_mode = FALSE, - force_mo_history = FALSE, - disable_mo_history = getOption("AMR_disable_mo_history", FALSE), debug = FALSE, reference_data_to_use = microorganismsDT) { @@ -408,19 +386,6 @@ exec_as.mo <- function(x, } x <- y - } else if (all(toupper(x) %in% read_mo_history(uncertainty_level, - force = force_mo_history, - disable = disable_mo_history)$x)) { - - # previously found code - x <- data.frame(mo = get_mo_history(x, - uncertainty_level, - force = force_mo_history, - disable = disable_mo_history), - stringsAsFactors = FALSE) %>% - left_join(AMR::microorganisms, by = "mo") %>% - pull(property) - } else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") @@ -432,8 +397,6 @@ exec_as.mo <- function(x, # commonly used MO codes y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ] - # save them to history - set_mo_history(x, y$mo, 0, force = force_mo_history, disable = disable_mo_history) x <- reference_data_to_use[data.table(mo = y[["mo"]]), on = "mo", @@ -447,7 +410,6 @@ exec_as.mo <- function(x, y <- reference_data_to_use[data.table(mo = y), on = "mo", ..property][[1]] - # don't save to history, as all items are already in microorganisms.translation x <- y } else if (!all(x %in% AMR::microorganisms[, property])) { @@ -592,19 +554,6 @@ exec_as.mo <- function(x, } } - mo_hist <- get_mo_history(x_backup[i], uncertainty_level, force = force_mo_history, disable = disable_mo_history) - if (initial_search == TRUE & !any(is.na(mo_hist))) { - # previously found code - found <- data.frame(mo = mo_hist, - stringsAsFactors = FALSE) %>% - left_join(reference_data_to_use, by = "mo") %>% - pull(property) - if (length(found) > 0) { - x[i] <- found[1L] - next - } - } - if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)") { x[i] <- "UNKNOWN" next @@ -625,7 +574,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - # don't save to history, as all items are already in microorganisms.translation next } } @@ -636,7 +584,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - # don't save to history, as all items are already in microorganisms.codes next } } @@ -646,9 +593,6 @@ exec_as.mo <- function(x, # most probable: is exact match in fullname if (length(found) > 0) { x[i] <- found[1L] - if (initial_search == TRUE) { - # don't save valid fullnames to history (i.e. values that are in microorganisms$fullname) - } next } @@ -664,7 +608,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - # don't save to history, as all items are already in microorganisms next } } @@ -675,9 +618,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -686,9 +626,6 @@ exec_as.mo <- function(x, # is a valid Catalogue of Life ID if (NROW(found) > 0) { x[i] <- found[1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -698,9 +635,6 @@ exec_as.mo <- function(x, if (length(mo_found) > 0) { x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } } @@ -729,9 +663,6 @@ exec_as.mo <- function(x, # empty and nonsense values, ignore without warning x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -743,7 +674,6 @@ exec_as.mo <- function(x, ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } next } @@ -760,18 +690,12 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") { x[i] <- microorganismsDT[mo == "B_STPHY_AURS", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") | x_backup_without_spp[i] %like_case% " (mrse|msse) ") { x[i] <- microorganismsDT[mo == "B_STPHY_EPDR", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (toupper(x_backup_without_spp[i]) == "VRE" @@ -779,9 +703,6 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { x[i] <- microorganismsDT[mo == "B_ENTRC", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } # support for: @@ -801,9 +722,6 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { x[i] <- microorganismsDT[mo == "B_ESCHR_COLI", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (toupper(x_backup_without_spp[i]) == "MRPA" @@ -811,18 +729,12 @@ exec_as.mo <- function(x, # multi resistant P. aeruginosa x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (toupper(x_backup_without_spp[i]) == "CRSM") { # co-trim resistant S. maltophilia x[i] <- microorganismsDT[mo == "B_STNTR_MLTP", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") @@ -830,45 +742,30 @@ exec_as.mo <- function(x, # peni I, peni R, vanco I, vanco R: S. pneumoniae x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { # Streptococci in different languages, like "estreptococos grupo B" x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { # Streptococci in different languages, like "Group A Streptococci" x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "haemoly.*strept") { # Haemolytic streptococci in different languages x[i] <- microorganismsDT[mo == "B_STRPT_HAEM", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- @@ -878,9 +775,6 @@ exec_as.mo <- function(x, # coerce S. coagulase negative x[i] <- microorganismsDT[mo == "B_STPHY_CONS", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "[ck]oagulas[ea] positie?[vf]" @@ -889,9 +783,6 @@ exec_as.mo <- function(x, # coerce S. coagulase positive x[i] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } # streptococcal groups: milleri and viridans @@ -901,9 +792,6 @@ exec_as.mo <- function(x, # Milleri Group Streptococcus (MGS) x[i] <- microorganismsDT[mo == "B_STRPT_MILL", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_trimmed[i] %like_case% "strepto.* viridans" @@ -912,9 +800,6 @@ exec_as.mo <- function(x, # Viridans Group Streptococcus (VGS) x[i] <- microorganismsDT[mo == "B_STRPT_VIRI", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "gram[ -]?neg.*" @@ -923,9 +808,6 @@ exec_as.mo <- function(x, # coerce Gram negatives x[i] <- microorganismsDT[mo == "B_GRAMN", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "gram[ -]?pos.*" @@ -934,18 +816,12 @@ exec_as.mo <- function(x, # coerce Gram positives x[i] <- microorganismsDT[mo == "B_GRAMP", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { # coerce Gram positives x[i] <- microorganismsDT[mo == "B_MYCBC", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -954,9 +830,6 @@ exec_as.mo <- function(x, # Salmonella Group A to Z, just return S. species for now x[i] <- microorganismsDT[mo == "B_SLMNL", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE) & !x_backup[i] %like% "t[iy](ph|f)[iy]") { @@ -964,9 +837,6 @@ exec_as.mo <- function(x, # except for S. typhi, S. paratyphi, S. typhimurium x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } uncertainties <- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = 1, input = x_backup[i], @@ -980,27 +850,18 @@ exec_as.mo <- function(x, # coerce Neisseria meningitidis x[i] <- microorganismsDT[mo == "B_NESSR_MNNG", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if ("gonococcus" %like_case% x_trimmed[i]) { # coerce Neisseria gonorrhoeae x[i] <- microorganismsDT[mo == "B_NESSR_GNRR", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } if ("pneumococcus" %like_case% x_trimmed[i]) { # coerce Streptococcus penumoniae x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L] - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } } @@ -1026,9 +887,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } return(x[i]) } if (nchar(g.x_backup_without_spp) >= 6) { @@ -1036,9 +894,6 @@ exec_as.mo <- function(x, ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } return(x[i]) } } @@ -1051,7 +906,6 @@ exec_as.mo <- function(x, ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, a.x_backup) - set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } return(x[i]) } @@ -1145,7 +999,6 @@ exec_as.mo <- function(x, ref_old = found[1, ref], ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], mo = microorganismsDT[col_id == found[1, col_id_new], mo]) - # no set history on renames return(x[i]) } @@ -1196,7 +1049,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = microorganismsDT[col_id == found[1, col_id_new], mo])) - # no set history on renames return(x) } @@ -1222,9 +1074,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 1, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1251,9 +1100,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(x, property), 2, force = force_mo_history, disable = disable_mo_history) - } return(x) } } @@ -1282,9 +1128,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } @@ -1310,9 +1153,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } @@ -1345,9 +1185,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1378,9 +1215,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1399,9 +1233,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") { @@ -1413,9 +1244,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } # (9) try to strip off one element from start and check the remains (only allow >= 2-part name outcome) ---- @@ -1445,9 +1273,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1484,9 +1309,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1516,9 +1338,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1541,9 +1360,6 @@ exec_as.mo <- function(x, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, result_mo = found_result[1L])) - if (initial_search == TRUE) { - set_mo_history(a.x_backup, get_mo_code(found[1L], property), 3, force = force_mo_history, disable = disable_mo_history) - } return(found[1L]) } } @@ -1564,7 +1380,6 @@ exec_as.mo <- function(x, g.x_backup_without_spp = g.x_backup_without_spp, uncertain.reference_data_to_use = microorganismsDT[prevalence %in% c(1, 2)]) if (!empty_result(x[i])) { - # no set_mo_history here - it is already set in uncertain_fn() return(x[i]) } } else if (nrow(data_to_check) == nrow(microorganismsDT[prevalence == 3])) { @@ -1576,7 +1391,6 @@ exec_as.mo <- function(x, g.x_backup_without_spp = g.x_backup_without_spp, uncertain.reference_data_to_use = microorganismsDT[prevalence == 3]) if (!empty_result(x[i])) { - # no set_mo_history here - it is already set in uncertain_fn() return(x[i]) } } @@ -1598,9 +1412,6 @@ exec_as.mo <- function(x, h.x_species = x_species[i], i.x_trimmed_species = x_trimmed_species[i]) if (!empty_result(x[i])) { - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -1618,9 +1429,6 @@ exec_as.mo <- function(x, h.x_species = x_species[i], i.x_trimmed_species = x_trimmed_species[i]) if (!empty_result(x[i])) { - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -1637,9 +1445,6 @@ exec_as.mo <- function(x, h.x_species = x_species[i], i.x_trimmed_species = x_trimmed_species[i]) if (!empty_result(x[i])) { - if (initial_search == TRUE) { - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) - } next } @@ -1648,7 +1453,6 @@ exec_as.mo <- function(x, ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) - set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } } } @@ -2071,22 +1875,6 @@ unregex <- function(x) { gsub("[^a-zA-Z0-9 -]", "", x) } -get_mo_code <- function(x, property) { - if (property == "mo") { - unique(x) - } else if (property == "snomed") { - found <- unlist(lapply(microorganismsDT$snomed, - function(s) if (any(x %in% s, na.rm = TRUE)) { - TRUE - } else { - FALSE - })) - microorganismsDT$mo[found == TRUE] - } else { - microorganismsDT[get(property) == x, "mo"][[1]] - } -} - translate_allow_uncertain <- function(allow_uncertain) { if (isTRUE(allow_uncertain)) { # default to uncertainty level 2 diff --git a/R/mo_history.R b/R/mo_history.R deleted file mode 100644 index 68a39533..00000000 --- a/R/mo_history.R +++ /dev/null @@ -1,200 +0,0 @@ -# ==================================================================== # -# TITLE # -# Antimicrobial Resistance (AMR) Analysis # -# # -# SOURCE # -# https://gitlab.com/msberends/AMR # -# # -# LICENCE # -# (c) 2018-2020 Berends MS, Luz CF et al. # -# # -# This R package is free software; you can freely use and distribute # -# it for both personal and commercial purposes under the terms of the # -# GNU General Public License version 2.0 (GNU GPL-2), as published by # -# the Free Software Foundation. # -# # -# We created this package for both routine data analysis and academic # -# research and it was publicly released in the hope that it will be # -# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # -# Visit our website for more info: https://msberends.gitlab.io/AMR. # -# ==================================================================== # - -# print successful as.mo coercions to a options entry -#' @inheritSection lifecycle Stable lifecycle -#' @importFrom dplyr %>% distinct filter -#' @importFrom utils write.csv -set_mo_history <- function(x, mo, uncertainty_level, force = FALSE, disable = FALSE) { - if (isTRUE(disable)) { - return(base::invisible()) - } - - # don't save codes that are in a code data set already - mo <- mo[!x %in% microorganisms.codes$code & !x %in% microorganisms.translation$mo_old] - x <- x[!x %in% microorganisms.codes$code & !x %in% microorganisms.translation$mo_old] - - warning_new_write <- FALSE - - if (base::interactive() | force == TRUE) { - mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force) - df <- data.frame(x, mo, stringsAsFactors = FALSE) %>% - distinct(x, .keep_all = TRUE) %>% - filter(!is.na(x) & !is.na(mo)) - if (nrow(df) == 0) { - return(base::invisible()) - } - x <- toupper(df$x) - mo <- df$mo - for (i in seq_len(length(x))) { - # save package version too, as both the as.mo() algorithm and the reference data set may change - if (NROW(mo_hist[base::which(mo_hist$x == x[i] & - mo_hist$uncertainty_level >= uncertainty_level & - mo_hist$package_version == utils::packageVersion("AMR")), ]) == 0) { - if (is.null(mo_hist) & interactive()) { - warning_new_write <- TRUE - } - tryCatch(write.csv(rbind(mo_hist, - data.frame( - x = x[i], - mo = mo[i], - uncertainty_level = uncertainty_level, - package_version = base::as.character(utils::packageVersion("AMR")), - stringsAsFactors = FALSE)), - row.names = FALSE, - file = mo_history_file()), - error = function(e) { - warning_new_write <- FALSE; base::invisible() - }) - } - } - } - if (warning_new_write == TRUE) { - message(blue(paste0("NOTE: results are saved to ", mo_history_file(), "."))) - } - return(base::invisible()) -} - -get_mo_history <- function(x, uncertainty_level, force = FALSE, disable = FALSE) { - if (isTRUE(disable)) { - return(to_class_mo(NA)) - } - - history <- read_mo_history(uncertainty_level = uncertainty_level, force = force) - if (base::is.null(history)) { - result <- NA - } else { - result <- data.frame(x = as.character(toupper(x)), stringsAsFactors = FALSE) %>% - left_join(history, by = "x") %>% - pull(mo) - } - to_class_mo(result) -} - -#' @importFrom dplyr %>% filter distinct -#' @importFrom utils read.csv -read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = FALSE, disable = FALSE) { - if (isTRUE(disable)) { - return(NULL) - } - - if ((!base::interactive() & force == FALSE)) { - return(NULL) - } - uncertainty_level_param <- uncertainty_level - - history <- tryCatch(read.csv(mo_history_file(), stringsAsFactors = FALSE), - warning = function(w) invisible(), - error = function(e) NULL) - if (is.null(history)) { - return(NULL) - } - # Below: filter on current package version. - # Even current fullnames may be replaced by new taxonomic names, so new versions of - # the Catalogue of Life must not lead to data corruption. - - if (unfiltered == FALSE) { - history <- history %>% - filter(package_version == as.character(utils::packageVersion("AMR")), - # only take unknowns if uncertainty_level_param is higher - ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) | - (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>% - arrange(desc(uncertainty_level)) %>% - distinct(x, mo, .keep_all = TRUE) - } - - if (nrow(history) == 0) { - NULL - } else { - history - } -} - -#' @rdname as.mo -#' @importFrom crayon red -#' @importFrom utils menu -#' @export -clear_mo_history <- function(...) { - if (!is.null(read_mo_history())) { - if (interactive() & !isTRUE(list(...)$force)) { - q <- menu(title = paste("This will clear all", - format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","), - "previously determined microbial IDs. Are you sure?"), - choices = c("Yes", "No"), - graphics = FALSE) - if (q != 1) { - return(invisible()) - } - } - - success <- create_blank_mo_history() - if (!isFALSE(success)) { - cat(red(paste("File", mo_history_file(), "cleared."))) - } - } -} - -#' @importFrom utils write.csv -create_blank_mo_history <- function() { - tryCatch( - write.csv(x = data.frame(x = character(0), - mo = character(0), - uncertainty_level = integer(0), - package_version = character(0), - stringsAsFactors = FALSE), - row.names = FALSE, - file = mo_history_file()), - warning = function(w) invisible(), - error = function(e) TRUE) -} - - -# Borrowed all below code from the extrafont package, -# https://github.com/wch/extrafont/blob/254c3f99b02f11adb59affbda699a92aec8624f5/R/utils.r -inst_path <- function() { - envname <- environmentName(parent.env(environment())) - - # If installed in package, envname == "AMR" - # If loaded with load_all, envname == "package:AMR" - # (This is kind of strange) - if (envname == "AMR") { - system.file(package = "AMR") - } else { - srcfile <- attr(attr(inst_path, "srcref"), "srcfile") - file.path(dirname(dirname(srcfile$filename)), "inst") - } -} - - -# Get the path where extrafontdb is installed -db_path <- function() { - system.file(package = "AMR") -} - -# fonttable file -mo_history_file <- function() { - file.path(mo_history_path(), "mo_history.csv") -} - -# Path of fontmap directory -mo_history_path <- function() { - file.path(db_path(), "mo_history") -} diff --git a/cran-comments.md b/cran-comments.md index 7489d2ef..e9a2b76f 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,7 +1,5 @@ -# Version 0.9.0 +# Version 0.10.0 * For this specific version, nothing to mention. * Since version 0.3.0, CHECK returns a NOTE for having a data directory over 3 MB. This is needed to offer users reference data for the complete taxonomy of microorganisms - one of the most important features of this package. - -* Since version 0.8.0, this package writes lines to `[library path]/AMR/mo_history/mo_history.csv` when using the `as.mo()` function, in the exact same way (and borrowed from) the `extrafont` package on CRAN (version 0.17) writes to the user library path. Users are notified about this with a `message()`, and staged install on R >= 3.6.0 still works. The CSV file is never newly created or deleted by this package, it only changes this file to improve speed and reliability of the `as.mo()` function. See the source code of functions `set_mo_history()` and `clear_mo_history()` in file `R/mo_history.R`. diff --git a/docs/404.html b/docs/404.html index a7d64199..4d734bdd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index dfa51f6b..e4008131 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 00000000..fec453a0 Binary files /dev/null and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 00000000..bd398b79 Binary files /dev/null and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index ee082563..d535becd 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/docs/authors.html b/docs/authors.html index d70eea97..f446f6c7 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/docs/index.html b/docs/index.html index 9ce218eb..27d64047 100644 --- a/docs/index.html +++ b/docs/index.html @@ -45,7 +45,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index af1751af..85ab0545 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -85,7 +85,7 @@ AMR (for R) - 0.9.0.9016 + 0.9.0.9018 @@ -252,9 +252,7 @@ mo_uncertainties() -mo_renamed() - -clear_mo_history(...) +mo_renamed()

Arguments

@@ -353,13 +351,6 @@

Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacteria, Firmicutes, Actinobacteria or Sarcomastigophora, or where the taxonomic genus is Aspergillus, Bacteroides, Candida, Capnocytophaga, Chryseobacterium, Cryptococcus, Elisabethkingia, Flavobacterium, Fusobacterium, Giardia, Leptotrichia, Mycoplasma, Prevotella, Rhodotorula, Treponema, Trichophyton or Ureaplasma.

Group 3 (least prevalent microorganisms) consists of all other microorganisms.

-

Self-learning algorithm

- - -

The as.mo() function gains experience from previously determined microorganism IDs and learns from it. This drastically improves both speed and reliability. Use clear_mo_history() to reset the algorithms. Only experience from your current AMR package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge.

-

Usually, any guess after the first try runs 80-95% faster than the first try.

-

This resets with every update of this AMR package since results are saved to your local package library folder.

-

Source

diff --git a/docs/reference/example_isolates.html b/docs/reference/example_isolates.html index 353404a4..81b0b485 100644 --- a/docs/reference/example_isolates.html +++ b/docs/reference/example_isolates.html @@ -85,7 +85,7 @@ AMR (for R) - 0.9.0.9016 + 0.9.0.9018 diff --git a/docs/reference/index.html b/docs/reference/index.html index fd7eeaff..a6baeb3a 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 @@ -266,7 +266,7 @@ diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index a9676a3a..e75e4645 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -85,7 +85,7 @@ AMR (for R) - 0.9.0.9016 + 0.9.0.9018 diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 60caf1e1..2207c592 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -85,7 +85,7 @@ AMR (for R) - 0.9.0.9016 + 0.9.0.9018 diff --git a/docs/reference/rsi_translation.html b/docs/reference/rsi_translation.html index 2ab917b0..7c150553 100644 --- a/docs/reference/rsi_translation.html +++ b/docs/reference/rsi_translation.html @@ -85,7 +85,7 @@ AMR (for R) - 0.9.0.9017 + 0.9.0.9018 diff --git a/inst/mo_history/mo_history.csv b/inst/mo_history/mo_history.csv deleted file mode 100644 index a4d9a82e..00000000 --- a/inst/mo_history/mo_history.csv +++ /dev/null @@ -1 +0,0 @@ -"x","mo","uncertainty_level","package_version" diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 76892729..e108a125 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mo.R, R/mo_history.R +% Please edit documentation in R/mo.R \name{as.mo} \alias{as.mo} \alias{mo} @@ -7,7 +7,6 @@ \alias{mo_failures} \alias{mo_uncertainties} \alias{mo_renamed} -\alias{clear_mo_history} \title{Transform to microorganism ID} \usage{ as.mo( @@ -26,8 +25,6 @@ mo_failures() mo_uncertainties() mo_renamed() - -clear_mo_history(...) } \arguments{ \item{x}{a character vector or a \code{\link{data.frame}} with one or two columns} @@ -122,15 +119,6 @@ Group 2 consists of all microorganisms where the taxonomic phylum is Proteobacte Group 3 (least prevalent microorganisms) consists of all other microorganisms. } - -\subsection{Self-learning algorithm}{ - -The \code{\link[=as.mo]{as.mo()}} function gains experience from previously determined microorganism IDs and learns from it. This drastically improves both speed and reliability. Use \code{\link[=clear_mo_history]{clear_mo_history()}} to reset the algorithms. Only experience from your current \code{AMR} package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge. - -Usually, any guess after the first try runs 80-95\% faster than the first try. - -This resets with every update of this \code{AMR} package since results are saved to your local package library folder. -} } \section{Source}{ diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index 778577e0..6bb75363 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -24,9 +24,7 @@ context("mo.R") test_that("as.mo works", { skip_on_cran() - - clear_mo_history(force = TRUE) - + library(dplyr) MOs <- AMR::microorganisms %>% filter(!is.na(mo), nchar(mo) > 3) expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo))) diff --git a/vignettes/benchmarks.Rmd b/vignettes/benchmarks.Rmd index e67cd5bd..aeea8d79 100755 --- a/vignettes/benchmarks.Rmd +++ b/vignettes/benchmarks.Rmd @@ -22,7 +22,6 @@ knitr::opts_chunk$set( fig.height = 4.5, dpi = 75 ) -options(AMR_disable_mo_history = FALSE) ``` One of the most important features of this package is the complete microbial taxonomic database, supplied by the [Catalogue of Life](http://catalogueoflife.org). We created a function `as.mo()` that transforms any user input value to a valid microbial ID by using intelligent rules combined with the taxonomic tree of Catalogue of Life. @@ -69,9 +68,6 @@ The actual result is the same every time: it returns its microorganism code ``r But the calculation time differs a lot: -```{r, echo = FALSE} -clear_mo_history() -``` ```{r, warning=FALSE} S.aureus <- microbenchmark( as.mo("sau"), # WHONET code @@ -95,13 +91,10 @@ print(S.aureus, unit = "ms", signif = 2) ggplot.bm(S.aureus) ``` -In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred. +In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. -To achieve this speed, the `as.mo` function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of *Methanosarcina semesiae* (`B_MTHNSR_SEMS`), a bug probably never found before in humans: +To achieve this speed, the `as.mo` function also takes into account the prevalence of human pathogenic microorganisms. The downside of this is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of *Methanosarcina semesiae* (`B_MTHNSR_SEMS`), a bug probably never found before in humans: -```{r, echo = FALSE} -clear_mo_history() -``` ```{r, warning=FALSE} M.semesiae <- microbenchmark(as.mo("metsem"), as.mo("METSEM"), @@ -112,33 +105,11 @@ M.semesiae <- microbenchmark(as.mo("metsem"), print(M.semesiae, unit = "ms", signif = 4) ``` -That takes `r round(mean(M.semesiae$time, na.rm = TRUE) / mean(S.aureus$time, na.rm = TRUE), 1)` times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like *Methanosarcina semesiae*) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets. +That takes `r round(mean(M.semesiae$time, na.rm = TRUE) / mean(S.aureus$time, na.rm = TRUE), 1)` times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like *Methanosarcina semesiae*) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets. In the figure below, we compare *Escherichia coli* (which is very common) with *Prevotella brevis* (which is moderately common) and with *Methanosarcina semesiae* (which is uncommon): ```{r, echo = FALSE, fig.width=12} -clear_mo_history() -par(mar = c(5, 16, 4, 2)) -boxplot(microbenchmark( - 'as.mo("Methanosarcina semesiae")' = as.mo("Methanosarcina semesiae", force_mo_history = TRUE), - 'as.mo("Prevotella brevis")' = as.mo("Prevotella brevis", force_mo_history = TRUE), - 'as.mo("Escherichia coli")' = as.mo("Escherichia coli", force_mo_history = TRUE), - 'as.mo("M. semesiae")' = as.mo("M. semesiae", force_mo_history = TRUE), - 'as.mo("P. brevis")' = as.mo("P. brevis", force_mo_history = TRUE), - 'as.mo("E. coli")' = as.mo("E. coli", force_mo_history = TRUE), - times = 10), - horizontal = TRUE, las = 1, unit = "s", log = TRUE, - xlab = "", ylab = "Time in seconds (log)", - main = "Benchmarks per prevalence") -``` - -The highest outliers are the first times. All next determinations were done in only thousands of seconds, because the `as.mo()` function **learns from its own output to speed up determinations for next times**. - -In below figure, this effect was disabled to show the difference with the boxplot above: - -```{r, echo = FALSE, fig.width=12} -clear_mo_history() -options(AMR_disable_mo_history = TRUE) par(mar = c(5, 16, 4, 2)) boxplot(microbenchmark( 'as.mo("Methanosarcina semesiae")' = as.mo("Methanosarcina semesiae"), @@ -151,7 +122,6 @@ boxplot(microbenchmark( horizontal = TRUE, las = 1, unit = "s", log = TRUE, xlab = "", ylab = "Time in seconds (log)", main = "Benchmarks per prevalence") -options(AMR_disable_mo_history = FALSE) ``` Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: **repetitive results** and **already precalculated results**. @@ -220,7 +190,7 @@ run_it <- microbenchmark(A = mo_species("aureus"), print(run_it, unit = "ms", signif = 3) ``` -Of course, when running `mo_phylum("Firmicutes")` the function has zero knowledge about the actual microorganism, namely *S. aureus*. But since the result would be `"Firmicutes"` too, there is no point in calculating the result. And because this package 'knows' all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately. +Of course, when running `mo_phylum("Firmicutes")` the function has zero knowledge about the actual microorganism, namely *S. aureus*. But since the result would be `"Firmicutes"` anyway, there is no point in calculating the result. And because this package 'knows' all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately. ### Results in other languages
-

as.mo() is.mo() mo_failures() mo_uncertainties() mo_renamed() clear_mo_history()

+

as.mo() is.mo() mo_failures() mo_uncertainties() mo_renamed()

Transform to microorganism ID