diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7695a26a..7ffd1306 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,7 +51,7 @@ before_script: R-release: stage: build - allow_failure: true + allow_failure: false script: - Rscript -e 'sessionInfo()' # install missing and outdated packages diff --git a/DESCRIPTION b/DESCRIPTION index a5c20726..fe9d7878 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.6.1.9002 -Date: 2019-04-06 +Date: 2019-04-09 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 4ec62f58..8efd6b60 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,7 +71,6 @@ export(atc_umcg) export(availability) export(brmo) export(catalogue_of_life_version) -export(clean_mo_history) export(count_I) export(count_IR) export(count_R) @@ -293,4 +292,3 @@ importFrom(stats,sd) importFrom(utils,browseURL) importFrom(utils,browseVignettes) importFrom(utils,installed.packages) -importFrom(utils,menu) diff --git a/NEWS.md b/NEWS.md index 8eea15c4..5126e70e 100755 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Added ~5,000 more old taxonomic names to the `microorganisms.old` data set, which leads to better results finding when using the `as.mo()` function * Frequency tables of microbial IDs speed improvement * Removed all hardcoded EUCAST rules and replaced them with a new reference file: `./inst/eucast/eucast.tsv`. +* Added ceftazidim intrinsic resistance to *Streptococci* +* Changed default settings for `age_groups()`, to let groups of fives and tens end with 100+ instead of 120+. #### Other * Prevented [staged install](https://developer.r-project.org/Blog/public/2019/02/14/staged-install/index.html) in R 3.6.0 and later by adding `StagedInstall: false` to the DESCRIPTION file diff --git a/R/age.R b/R/age.R index 0eca8995..51f65865 100755 --- a/R/age.R +++ b/R/age.R @@ -72,9 +72,9 @@ age <- function(x, reference = Sys.Date()) { #' \item{A character:} #' \itemize{ #' \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.} -#' \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.} -#' \item{\code{"fives"}, equivalent of: \code{1:24 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120.} -#' \item{\code{"tens"}, equivalent of: \code{1:12 * 10}. This will split on 0-9, 10-19, 20-29 and so forth, until 120.} +#' \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85)}. This will split on 0-64, 65-74, 75-84, 85+.} +#' \item{\code{"fives"}, equivalent of: \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.} +#' \item{\code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.} #' } #' } #' @keywords age_group age @@ -92,11 +92,11 @@ age <- function(x, reference = Sys.Date()) { #' age_groups(ages, c(20, 50)) #' #' # split into groups of ten years -#' age_groups(ages, 1:12 * 10) +#' age_groups(ages, 1:10 * 10) #' age_groups(ages, split_at = "tens") #' #' # split into groups of five years -#' age_groups(ages, 1:24 * 5) +#' age_groups(ages, 1:20 * 5) #' age_groups(ages, split_at = "fives") #' #' # split specifically for children @@ -117,14 +117,14 @@ age <- function(x, reference = Sys.Date()) { age_groups <- function(x, split_at = c(12, 25, 55, 75)) { if (is.character(split_at)) { split_at <- split_at[1L] - if (split_at %like% "^child") { + if (split_at %like% "^(child|kid)") { split_at <- c(0, 1, 2, 4, 6, 13, 18) } else if (split_at %like% "^(elder|senior)") { - split_at <- c(65, 75, 85, 95) + split_at <- c(65, 75, 85) } else if (split_at %like% "^five") { - split_at <- 1:24 * 5 + split_at <- 1:20 * 5 } else if (split_at %like% "^ten") { - split_at <- 1:12 * 10 + split_at <- 1:10 * 10 } } split_at <- as.integer(split_at) diff --git a/R/mo_history.R b/R/mo_history.R index 0274e5c5..c0c50de7 100644 --- a/R/mo_history.R +++ b/R/mo_history.R @@ -25,50 +25,50 @@ set_mo_history <- function(x, mo, uncertainty_level, force = FALSE) { # disable function return(base::invisible()) - if (base::interactive() | force == TRUE) { - mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force) - df <- data.frame(x, mo, stringsAsFactors = FALSE) %>% - distinct(x, .keep_all = TRUE) %>% - filter(!is.na(x) & !is.na(mo)) - if (nrow(df) == 0) { - return(base::invisible()) - } - x <- toupper(df$x) - mo <- df$mo - for (i in 1:length(x)) { - # save package version too, as both the as.mo() algorithm and the reference data set may change - if (NROW(mo_hist[base::which(mo_hist$x == x[i] & - mo_hist$uncertainty_level >= uncertainty_level & - mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) { - tryCatch( - assign(x = "mo_history", - value = rbind(mo_hist, - data.frame( - x = x[i], - mo = mo[i], - uncertainty_level = uncertainty_level, - package_v = base::as.character(utils::packageVersion("AMR")), - stringsAsFactors = FALSE)), - envir = asNamespace("AMR")), - error = function(e) invisible()) - } - } - } - return(base::invisible()) + # if (base::interactive() | force == TRUE) { + # mo_hist <- read_mo_history(uncertainty_level = uncertainty_level, force = force) + # df <- data.frame(x, mo, stringsAsFactors = FALSE) %>% + # distinct(x, .keep_all = TRUE) %>% + # filter(!is.na(x) & !is.na(mo)) + # if (nrow(df) == 0) { + # return(base::invisible()) + # } + # x <- toupper(df$x) + # mo <- df$mo + # for (i in 1:length(x)) { + # # save package version too, as both the as.mo() algorithm and the reference data set may change + # if (NROW(mo_hist[base::which(mo_hist$x == x[i] & + # mo_hist$uncertainty_level >= uncertainty_level & + # mo_hist$package_v == utils::packageVersion("AMR")),]) == 0) { + # tryCatch( + # assign(x = "mo_history", + # value = rbind(mo_hist, + # data.frame( + # x = x[i], + # mo = mo[i], + # uncertainty_level = uncertainty_level, + # package_v = base::as.character(utils::packageVersion("AMR")), + # stringsAsFactors = FALSE)), + # envir = asNamespace("AMR")), + # error = function(e) invisible()) + # } + # } + # } + # return(base::invisible()) } get_mo_history <- function(x, uncertainty_level, force = FALSE) { # disable function return(NA) - history <- read_mo_history(uncertainty_level = uncertainty_level, force = force) - if (base::is.null(history)) { - NA - } else { - data.frame(x = toupper(x), stringsAsFactors = FALSE) %>% - left_join(history, by = "x") %>% - pull(mo) - } + # history <- read_mo_history(uncertainty_level = uncertainty_level, force = force) + # if (base::is.null(history)) { + # NA + # } else { + # data.frame(x = toupper(x), stringsAsFactors = FALSE) %>% + # left_join(history, by = "x") %>% + # pull(mo) + # } } #' @importFrom dplyr %>% filter distinct @@ -76,59 +76,59 @@ read_mo_history <- function(uncertainty_level = 2, force = FALSE, unfiltered = F # disable function return(NULL) - if ((!base::interactive() & force == FALSE)) { - return(NULL) - } - uncertainty_level_param <- uncertainty_level - - history <- tryCatch(get("mo_history", envir = asNamespace("AMR")), - error = function(e) NULL) - if (is.null(history)) { - return(NULL) - } - # Below: filter on current package version. - # Even current fullnames may be replaced by new taxonomic names, so new versions of - # the Catalogue of Life must not lead to data corruption. - - if (unfiltered == FALSE) { - history <- history %>% - filter(package_v == as.character(utils::packageVersion("AMR")), - # only take unknowns if uncertainty_level_param is higher - ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) | - (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>% - arrange(desc(uncertainty_level)) %>% - distinct(x, mo, .keep_all = TRUE) - } - - if (nrow(history) == 0) { - NULL - } else { - history - } + # if ((!base::interactive() & force == FALSE)) { + # return(NULL) + # } + # uncertainty_level_param <- uncertainty_level + # + # history <- tryCatch(get("mo_history", envir = asNamespace("AMR")), + # error = function(e) NULL) + # if (is.null(history)) { + # return(NULL) + # } + # # Below: filter on current package version. + # # Even current fullnames may be replaced by new taxonomic names, so new versions of + # # the Catalogue of Life must not lead to data corruption. + # + # if (unfiltered == FALSE) { + # history <- history %>% + # filter(package_v == as.character(utils::packageVersion("AMR")), + # # only take unknowns if uncertainty_level_param is higher + # ((mo == "UNKNOWN" & uncertainty_level_param == uncertainty_level) | + # (mo != "UNKNOWN" & uncertainty_level_param >= uncertainty_level))) %>% + # arrange(desc(uncertainty_level)) %>% + # distinct(x, mo, .keep_all = TRUE) + # } + # + # if (nrow(history) == 0) { + # NULL + # } else { + # history + # } } -#' @rdname as.mo -#' @importFrom crayon red -#' @importFrom utils menu -#' @export +# @rdname as.mo +# @importFrom crayon red +# @importFrom utils menu +# @export clean_mo_history <- function(...) { - if (!is.null(read_mo_history())) { - if (interactive() & !isTRUE(list(...)$force)) { - q <- menu(title = paste("This will remove all", - format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","), - "microbial IDs determined previously in this session. Are you sure?"), - choices = c("Yes", "No"), - graphics = FALSE) - if (q != 1) { - return(invisible()) - } - } - tryCatch( - assign(x = "mo_history", - value = NULL, - envir = asNamespace("AMR")), - error = function(e) invisible()) - cat(red("History removed.")) - } + # if (!is.null(read_mo_history())) { + # if (interactive() & !isTRUE(list(...)$force)) { + # q <- menu(title = paste("This will remove all", + # format(nrow(read_mo_history(999, unfiltered = TRUE)), big.mark = ","), + # "microbial IDs determined previously in this session. Are you sure?"), + # choices = c("Yes", "No"), + # graphics = FALSE) + # if (q != 1) { + # return(invisible()) + # } + # } + # tryCatch( + # assign(x = "mo_history", + # value = NULL, + # envir = asNamespace("AMR")), + # error = function(e) invisible()) + # cat(red("History removed.")) + # } } diff --git a/R/mo_property.R b/R/mo_property.R index 1cf4e6e3..3ee619b2 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -247,10 +247,13 @@ mo_phylum <- function(x, language = get_locale(), ...) { #' @rdname mo_property #' @export mo_kingdom <- function(x, language = get_locale(), ...) { + if (all(x %in% AMR::microorganisms$kingdom)) { + return(x) + } + x <- as.mo(x, language = "en", ...) kngdm <- mo_validate(x = x, property = "kingdom", ...) if (language != "en") { - unknowns <- as.mo(x, ...) == "UNKOWN" - kngdm[unknowns] <- mo_translate(kngdm[unknowns], language = language) + kngdm[x == "UNKNOWN"] <- mo_translate(kngdm[x == "UNKNOWN"], language = language) } kngdm } @@ -264,7 +267,6 @@ mo_type <- function(x, language = get_locale(), ...) { #' @rdname mo_property #' @export mo_gramstain <- function(x, language = get_locale(), ...) { - x.bak <- x x.mo <- as.mo(x, language = "en", ...) x.phylum <- mo_phylum(x.mo, language = "en") x[x.phylum %in% c("Actinobacteria", diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index c8ea0422..e982224f 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -192,7 +192,7 @@

Benchmarks

Matthijs S. Berends

-

05 April 2019

+

09 April 2019

@@ -217,14 +217,14 @@ times = 10) print(S.aureus, unit = "ms", signif = 2) #> Unit: milliseconds -#> expr min lq mean median uq max neval -#> as.mo("sau") 18.0 18.0 27 18.0 20.0 62 10 -#> as.mo("stau") 48.0 48.0 61 49.0 91.0 92 10 -#> as.mo("staaur") 18.0 18.0 18 18.0 18.0 19 10 -#> as.mo("STAAUR") 18.0 18.0 18 18.0 18.0 20 10 -#> as.mo("S. aureus") 29.0 29.0 29 29.0 29.0 29 10 -#> as.mo("S. aureus") 28.0 29.0 48 29.0 73.0 130 10 -#> as.mo("Staphylococcus aureus") 8.1 8.1 13 8.2 8.6 52 10 +#> expr min lq mean median uq max neval +#> as.mo("sau") 18 18.0 27 18.0 18.0 65 10 +#> as.mo("stau") 48 48.0 56 48.0 48.0 130 10 +#> as.mo("staaur") 18 18.0 24 18.0 18.0 76 10 +#> as.mo("STAAUR") 18 18.0 27 18.0 19.0 63 10 +#> as.mo("S. aureus") 28 28.0 33 28.0 29.0 72 10 +#> as.mo("S. aureus") 28 29.0 38 29.0 31.0 76 10 +#> as.mo("Staphylococcus aureus") 8 8.1 12 8.1 8.2 52 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL), a bug probably never found before in humans:

T.islandicus <- microbenchmark(as.mo("theisl"),
@@ -236,11 +236,11 @@
 print(T.islandicus, unit = "ms", signif = 2)
 #> Unit: milliseconds
 #>                         expr min  lq mean median  uq max neval
-#>              as.mo("theisl") 460 460  480    470 510 510    10
-#>              as.mo("THEISL") 460 470  490    490 510 540    10
-#>       as.mo("T. islandicus")  73  73   84     73  77 130    10
-#>      as.mo("T.  islandicus")  73  73   88     75 120 120    10
-#>  as.mo("Thermus islandicus")  73  73   80     73  74 130    10
+#> as.mo("theisl") 460 480 490 500 510 510 10 +#> as.mo("THEISL") 460 460 490 470 510 520 10 +#> as.mo("T. islandicus") 72 73 88 75 110 120 10 +#> as.mo("T. islandicus") 73 73 87 74 100 140 10 +#> as.mo("Thermus islandicus") 72 73 90 75 120 130 10

That takes 8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):

par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
@@ -256,10 +256,7 @@
         xlab = "", ylab = "Time in seconds", ylim = c(0, 0.5),
         main = "Benchmarks per prevalence")

-

In reality, the as.mo() functions learns from its own output to speed up determinations for next times. In above figure, this effect was disabled to show the difference with the boxplot below - when you would use as.mo() yourself:

-

-

The highest outliers are the first times. All next determinations were done in only thousands of seconds. For now, learning only works per session. If R is closed or terminated, the algorithms reset. This will probably be resolved in a next version.

-

Still, uncommon microorganisms take a lot more time than common microorganisms, especially the first time. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

+

Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

Repetitive results

@@ -290,8 +287,8 @@ print(run_it, unit = "ms", signif = 3) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> mo_fullname(x) 689 730 762 752 778 938 10
-

So transforming 500,000 values (!!) of 50 unique values only takes 0.75 seconds (751 ms). You only lose time on your unique input values.

+#> mo_fullname(x) 774 777 811 783 829 947 10 +

So transforming 500,000 values (!!) of 50 unique values only takes 0.78 seconds (783 ms). You only lose time on your unique input values.

@@ -304,9 +301,9 @@ print(run_it, unit = "ms", signif = 3) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> A 12.200 12.300 12.500 12.400 12.600 13.40 10 -#> B 25.800 26.300 26.800 26.600 26.900 28.50 10 -#> C 0.477 0.724 0.779 0.825 0.848 1.07 10

+#> A 12.000 12.100 12.500 12.400 12.800 13.20 10 +#> B 25.800 26.200 26.900 26.600 27.800 28.00 10 +#> C 0.476 0.691 0.758 0.812 0.848 1.03 10

So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0008 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
@@ -320,14 +317,14 @@
 print(run_it, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>  expr   min    lq  mean median    uq   max neval
-#>     A 0.376 0.386 0.436  0.436 0.479 0.516    10
-#>     B 0.467 0.501 0.561  0.567 0.599 0.700    10
-#>     C 0.503 0.782 0.850  0.918 0.971 1.040    10
-#>     D 0.403 0.471 0.488  0.491 0.525 0.588    10
-#>     E 0.343 0.429 0.456  0.445 0.485 0.638    10
-#>     F 0.380 0.403 0.447  0.453 0.491 0.520    10
-#>     G 0.385 0.421 0.458  0.447 0.487 0.575    10
-#>     H 0.396 0.455 0.484  0.491 0.515 0.549    10
+#> A 0.364 0.403 0.452 0.455 0.496 0.575 10 +#> B 0.478 0.533 0.566 0.565 0.594 0.645 10 +#> C 0.512 0.726 0.851 0.893 1.010 1.150 10 +#> D 0.409 0.460 0.490 0.507 0.530 0.544 10 +#> E 0.375 0.380 0.454 0.476 0.488 0.524 10 +#> F 0.395 0.411 0.463 0.442 0.522 0.587 10 +#> G 0.381 0.404 0.454 0.451 0.495 0.546 10 +#> H 0.232 0.268 0.304 0.297 0.336 0.391 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -354,13 +351,13 @@ print(run_it, unit = "ms", signif = 4) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> en 18.96 19.05 19.17 19.10 19.28 19.50 10 -#> de 31.11 31.58 40.75 32.11 33.36 75.57 10 -#> nl 31.18 31.55 39.10 31.77 32.52 75.92 10 -#> es 30.76 31.46 40.46 31.62 33.07 75.85 10 -#> it 31.02 31.41 36.06 31.67 31.95 76.19 10 -#> fr 31.02 31.66 36.36 31.84 32.46 76.22 10 -#> pt 31.13 31.58 31.78 31.65 31.74 33.00 10
+#> en 18.37 19.07 19.98 19.22 19.41 27.24 10 +#> de 30.58 31.06 35.93 31.42 31.54 77.27 10 +#> nl 31.01 31.33 37.73 31.41 31.59 93.52 10 +#> es 31.38 31.45 36.30 31.81 32.53 76.12 10 +#> it 30.98 31.40 36.99 31.58 32.30 84.46 10 +#> fr 31.19 31.21 35.82 31.45 31.62 75.52 10 +#> pt 31.02 31.36 40.85 32.22 33.84 76.22 10

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index d84e92d1..683d4169 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/news/index.html b/docs/news/index.html index bd2e78a3..746e5533 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -243,9 +243,19 @@ + +
+

+Other

+
@@ -401,7 +411,7 @@ These functions use as.atc() Using as.mo(..., allow_uncertain = 3) could lead to very unreliable results.
  • Implemented the latest publication of Becker et al. (2019), for categorising coagulase-negative Staphylococci
  • -
  • All microbial IDs that found are now saved to a local file ~/.Rhistory_mo. Use the new function clean_mo_history() to delete this file, which resets the algorithms.
  • +
  • All microbial IDs that found are now saved to a local file ~/.Rhistory_mo. Use the new function clean_mo_history() to delete this file, which resets the algorithms.
  • Incoercible results will now be considered ‘unknown’, MO code UNKNOWN. On foreign systems, properties of these will be translated to all languages already previously supported: German, Dutch, French, Italian, Spanish and Portuguese:

    mo_genus("qwerty", language = "es")
    @@ -488,9 +498,9 @@ Using as.mo(..., allow_uncertain = 3)if using different lengths of pattern and x in %like%, it will now return the call
  • -
    +

    -Other

    +Other
    • Updated licence text to emphasise GPL 2.0 and that this is an R package.
    @@ -613,9 +623,9 @@ Using as.mo(..., allow_uncertain = 3)

    Percentages will now will rounded more logically (e.g. in freq function)

    -
    +

    -Other

    +Other
    • New dependency on package crayon, to support formatted text in the console
    • Dependency tidyr is now mandatory (went to Import field) since portion_df and count_df rely on it
    • @@ -750,9 +760,9 @@ Using as.mo(..., allow_uncertain = 3)
    -
    +

    -Other

    +Other
    • More unit tests to ensure better integrity of functions
    @@ -879,9 +889,9 @@ Using as.mo(..., allow_uncertain = 3)Other small fixes
    -
    +

    -Other

    +Other
    • Added integration tests (check if everything works as expected) for all releases of R 3.1 and higher
    -
    +

    -Other

    +Other
    • Expanded README.md with more examples
    • Added ORCID of authors to DESCRIPTION file
    • diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 7d4a4cb5..6197aca6 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -80,7 +80,7 @@ AMR (for R) - 0.6.0 + 0.6.1.9002
    @@ -267,9 +267,9 @@ The default is to split on young children (0-11), youth (12-24), young adults (26-54), middle-aged adults (55-74) and elderly (75+).

  • A character:

    • "children", equivalent of: c(0, 1, 2, 4, 6, 13, 18). This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.

    • -
    • "elderly" or "seniors", equivalent of: c(65, 75, 85, 95). This will split on 0-64, 65-74, 75-84, 85-94 and 95+.

    • -
    • "fives", equivalent of: 1:24 * 5. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120.

    • -
    • "tens", equivalent of: 1:12 * 10. This will split on 0-9, 10-19, 20-29 and so forth, until 120.

    • +
    • "elderly" or "seniors", equivalent of: c(65, 75, 85). This will split on 0-64, 65-74, 75-84, 85+.

    • +
    • "fives", equivalent of: 1:20 * 5. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.

    • +
    • "tens", equivalent of: 1:10 * 10. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.

  • @@ -294,11 +294,11 @@ age_groups(ages, c(20, 50)) # split into groups of ten years -age_groups(ages, 1:12 * 10) +age_groups(ages, 1:10 * 10) age_groups(ages, split_at = "tens") # split into groups of five years -age_groups(ages, 1:24 * 5) +age_groups(ages, 1:20 * 5) age_groups(ages, split_at = "fives") # split specifically for children diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 32ca1fcb..dd3bfa63 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -250,9 +250,7 @@ mo_uncertainties() -mo_renamed() - -clean_mo_history(...) +mo_renamed()

    Arguments

    @@ -310,10 +308,6 @@ A microbial ID from this package (class: mo) typically looks like t

    Values that cannot be coered will be considered 'unknown' and will get the MO code UNKNOWN.

    Use the mo_property_* functions to get properties based on the returned code, see Examples.

    The algorithm uses data from the Catalogue of Life (see below) and from one other source (see ?microorganisms).

    -

    Self-learning algoritm
    -The as.mo() function gains experience from previously determined microbial IDs and learns from it. This drastically improves both speed and reliability. Use clean_mo_history() to reset the algorithms. Only experience from your current AMR package version is used. This is done because in the future the taxonomic tree (which is included in this package) may change for any organism and it consequently has to rebuild its knowledge.

    -

    Usually, any guess after the first try runs 80-95% faster than the first try.

    -

    For now, learning only works per session. If R is closed or terminated, the algorithms reset. This will probably be resolved in a next version.

    Intelligent rules
    This function uses intelligent rules to help getting fast and logical results. It tries to find matches in this order:

    • Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations

    • @@ -365,7 +359,7 @@ The intelligent rules takes into account microbial prevalence of pathogens in hu


      -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

      Read more on our website!

      diff --git a/docs/reference/catalogue_of_life.html b/docs/reference/catalogue_of_life.html index 61469b73..d5d35e06 100644 --- a/docs/reference/catalogue_of_life.html +++ b/docs/reference/catalogue_of_life.html @@ -246,7 +246,7 @@


      -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

      Included taxa

      diff --git a/docs/reference/catalogue_of_life_version.html b/docs/reference/catalogue_of_life_version.html index d4905038..0ecc05a4 100644 --- a/docs/reference/catalogue_of_life_version.html +++ b/docs/reference/catalogue_of_life_version.html @@ -80,7 +80,7 @@ AMR (for R) - 0.6.0 + 0.6.1.9002 @@ -256,7 +256,7 @@


      -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

      Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

      Read more on our website!

      diff --git a/docs/reference/index.html b/docs/reference/index.html index e818a76c..c3d3df8c 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -293,7 +293,7 @@
    diff --git a/docs/reference/microorganisms.codes.html b/docs/reference/microorganisms.codes.html index 5a8598c1..b0df9b2b 100644 --- a/docs/reference/microorganisms.codes.html +++ b/docs/reference/microorganisms.codes.html @@ -80,7 +80,7 @@ AMR (for R) - 0.6.0 + 0.6.1.9002 @@ -254,7 +254,7 @@


    -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

    Read more on our website!

    diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index a1e81a50..99b3e534 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -80,7 +80,7 @@ AMR (for R) - 0.6.0 + 0.6.1.9002 @@ -288,7 +288,7 @@


    -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

    Read more on our website!

    diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index f29bd10e..0549ab5c 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -260,7 +260,7 @@


    -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

    Read more on our website!

    diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 9fcf9e5d..e3c0a846 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -80,7 +80,7 @@ AMR (for R) - 0.6.0 + 0.6.1.9002 @@ -334,7 +334,7 @@


    -This package contains the complete taxonomic tree of almost all microorganisms (~60,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    +This package contains the complete taxonomic tree of almost all microorganisms (~65,000 species) from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). The Catalogue of Life is the most comprehensive and authoritative global index of species currently available.

    Click here for more information about the included taxa. The Catalogue of Life releases updates annually; check which version was included in this package with catalogue_of_life_version().

    Source

    diff --git a/inst/eucast/eucast_rules.tsv b/inst/eucast/eucast_rules.tsv index 651c3e5c..f68ea581 100644 --- a/inst/eucast/eucast_rules.tsv +++ b/inst/eucast/eucast_rules.tsv @@ -147,7 +147,7 @@ genus_species is Staphylococcus hominis cfta R Table 04: Intrinsic resistance genus_species is Staphylococcus haemolyticus cfta R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules genus_species is Staphylococcus intermedius cfta R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules genus_species is Staphylococcus pseudintermedius cfta R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules -genus is Streptococcus fusi, aminoglycosides R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules +genus is Streptococcus fusi, cfta, aminoglycosides R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules genus_species is Enterococcus faecalis fusi, cfta, cephalosporins_without_cfta, aminoglycosides, macrolides, clin, qida, trim, trsu R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules genus_species is Enterococcus gallinarum fusi, cfta, cephalosporins_without_cfta, aminoglycosides, macrolides, clin, qida, vanc, trim, trsu R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules genus_species is Enterococcus casseliflavus fusi, cfta, cephalosporins_without_cfta, aminoglycosides, macrolides, clin, qida, vanc, trim, trsu R Table 04: Intrinsic resistance in Gram-positive bacteria Expert Rules diff --git a/man/age_groups.Rd b/man/age_groups.Rd index 54767a3d..e6f0050c 100644 --- a/man/age_groups.Rd +++ b/man/age_groups.Rd @@ -25,9 +25,9 @@ To split ages, the input can be: \item{A character:} \itemize{ \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.} - \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.} - \item{\code{"fives"}, equivalent of: \code{1:24 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120.} - \item{\code{"tens"}, equivalent of: \code{1:12 * 10}. This will split on 0-9, 10-19, 20-29 and so forth, until 120.} + \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85)}. This will split on 0-64, 65-74, 75-84, 85+.} + \item{\code{"fives"}, equivalent of: \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.} + \item{\code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.} } } } @@ -46,11 +46,11 @@ age_groups(ages, 50) age_groups(ages, c(20, 50)) # split into groups of ten years -age_groups(ages, 1:12 * 10) +age_groups(ages, 1:10 * 10) age_groups(ages, split_at = "tens") # split into groups of five years -age_groups(ages, 1:24 * 5) +age_groups(ages, 1:20 * 5) age_groups(ages, split_at = "fives") # split specifically for children diff --git a/man/as.mo.Rd b/man/as.mo.Rd index fa970206..a3824e1a 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mo.R, R/mo_history.R +% Please edit documentation in R/mo.R \name{as.mo} \alias{as.mo} \alias{mo} @@ -7,7 +7,6 @@ \alias{mo_failures} \alias{mo_uncertainties} \alias{mo_renamed} -\alias{clean_mo_history} \title{Transform to microorganism ID} \usage{ as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, @@ -20,8 +19,6 @@ mo_failures() mo_uncertainties() mo_renamed() - -clean_mo_history(...) } \arguments{ \item{x}{a character vector or a \code{data.frame} with one or two columns}
    -

    as.mo() is.mo() mo_failures() mo_uncertainties() mo_renamed() clean_mo_history()

    +

    as.mo() is.mo() mo_failures() mo_uncertainties() mo_renamed()

    Transform to microorganism ID