algorithm update

2025-08-24 23:42:18 +02:00 · 2019-02-21 18:55:52 +01:00
parent c6e57ca456
commit 68a9a35ed6
112 changed files with 586 additions and 886 deletions
--- a/R/amr.R
+++ b/R/amr.R
@@ -0,0 +1,67 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# SOURCE                                                               #
+# https://gitlab.com/msberends/AMR                                     #
+#                                                                      #
+# LICENCE                                                              #
+# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)  #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+#                                                                      #
+# This R package was created for academic research and was publicly    #
+# released in the hope that it will be useful, but it comes WITHOUT    #
+# ANY WARRANTY OR LIABILITY.                                           #
+# Visit our website for more info: https://msberends.gitab.io/AMR.     #
+# ==================================================================== #
+
+#' The \code{AMR} Package
+#'
+#' Welcome to the \code{AMR} package.
+#' @details
+#' \code{AMR} is a free and open-source R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. It supports any table format, including WHONET/EARS-Net data.
+#'
+#' We created this package for both academic research and routine analysis at the Faculty of Medical Sciences of the University of Groningen and the Medical Microbiology & Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is actively maintained and free software; you can freely use and distribute it for both personal and commercial (but not patent) purposes under the terms of the GNU General Public License version 2.0 (GPL-2), as published by the Free Software Foundation.
+#'
+#' This package can be used for:
+#' \itemize{
+#'   \item{Reference for microorganisms, since it contains almost all 60,000 microbial (sub)species from the Catalogue of Life}
+#'   \item{Calculating antimicrobial resistance}
+#'   \item{Calculating empirical susceptibility of both mono therapy and combination therapy}
+#'   \item{Predicting future antimicrobial resistance using regression models}
+#'   \item{Getting properties for any microorganism (like Gram stain, species, genus or family)}
+#'   \item{Getting properties for any antibiotic (like name, ATC code, defined daily dose or trade name)}
+#'   \item{Plotting antimicrobial resistance}
+#'   \item{Determining first isolates to be used for AMR analysis}
+#'   \item{Applying EUCAST expert rules (not the translation from MIC to RSI values)}
+#'   \item{Determining multi-drug resistant organisms (MDRO)}
+#'   \item{Descriptive statistics: frequency tables, kurtosis and skewness}
+#' }
+#' @section Authors:
+#' Matthijs S. Berends[1,2] Christian F. Luz[1], Erwin E.A. Hassing[2],  Corinna Glasner[1],  Alex W. Friedrich[1],  Bhanu N.M. Sinha[1] \cr
+#'
+#' [1] Department of Medical Microbiology, University of Groningen, University Medical Center Groningen, Groningen, the Netherlands - \url{rug.nl} \url{umcg.nl} \cr
+#' [2] Certe Medical Diagnostics & Advice, Groningen, the Netherlands - \url{certe.nl}
+
+#' @section Read more on our website!:
+#' On our website \url{https://msberends.gitlab.io/AMR} you can find \href{https://msberends.gitlab.io/AMR/articles/AMR.html}{a comprehensive tutorial} about how to conduct AMR analysis, the \href{https://msberends.gitlab.io/AMR/reference}{complete documentation of all functions} (which reads a lot easier than here in R) and \href{https://msberends.gitlab.io/AMR/articles/WHONET.html}{an example analysis using WHONET data}.
+
+#' @section Contact us:
+#' For suggestions, comments or questions, please contact us at:
+#'
+#' Matthijs S. Berends \cr
+#' m.s.berends [at] umcg [dot] nl \cr
+#' Department of Medical Microbiology, University of Groningen \cr
+#' University Medical Center Groningen \cr
+#' Post Office Box 30001 \cr
+#' 9700 RB Groningen
+#'
+#' If you have found a bug, please file a new issue at: \cr
+#' \url{https://gitlab.com/msberends/AMR/issues}
+#' @name AMR
+#' @rdname AMR
+NULL
--- a/R/atc_online.R
+++ b/R/atc_online.R
@@ -56,6 +56,7 @@
 #' @export
 #' @rdname atc_online
 #' @importFrom dplyr %>% progress_estimated
+#' @inheritSection AMR Read more on our website!
 #' @source \url{https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/}
 #' @examples
 #' \donttest{
--- a/R/availability.R
+++ b/R/availability.R
@@ -24,6 +24,7 @@
 #' Easy check for availability of columns in a data set. This makes it easy to get an idea of which antibiotic combination can be used for calculation with e.g. \code{\link{portion_IR}}.
 #' @param tbl a \code{data.frame} or \code{list}
 #' @return \code{data.frame} with column names of \code{tbl} as row names and columns: \code{percent_IR}, \code{count}, \code{percent}, \code{visual_availability}.
+#' @inheritSection AMR Read more on our website!
 #' @export
 #' @examples
 #' availability(septic_patients)
--- a/R/data.R
+++ b/R/data.R
@@ -170,6 +170,7 @@ catalogue_of_life <- list(
 #' Version info of included Catalogue of Life
 #' @seealso \code{\link{microorganisms}}
 #' @inheritSection catalogue_of_life Catalogue of Life
+#' @inheritSection AMR Read more on our website!
 #' @export
 catalogue_of_life_version <- function() {
  list(version = catalogue_of_life$version,
--- a/R/mo.R
+++ b/R/mo.R
@@ -304,11 +304,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
    # add start en stop regex
    x <- paste0('^', x, '$')
    x_withspaces_start_only <- paste0('^', x_withspaces)
+    x_withspaces_end_only <- paste0(x_withspaces, '$')
    x_withspaces_start_end <- paste0('^', x_withspaces, '$')

    # cat(paste0('x                       "', x, '"\n'))
    # cat(paste0('x_species               "', x_species, '"\n'))
    # cat(paste0('x_withspaces_start_only "', x_withspaces_start_only, '"\n'))
+    # cat(paste0('x_withspaces_end_only   "', x_withspaces_end_only, '"\n'))
    # cat(paste0('x_withspaces_start_end  "', x_withspaces_start_end, '"\n'))
    # cat(paste0('x_backup                "', x_backup, '"\n'))
    # cat(paste0('x_trimmed               "', x_trimmed, '"\n'))
@@ -494,194 +496,113 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
        }
      }

-      # FIRST TRY SUPERPREVALENT IN HUMAN INFECTIONS ----
-      found <- microorganisms.superprevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
-      # most probable: is exact match in fullname
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.superprevDT[mo == toupper(x_backup[i]), ..property][[1]]
-      # is a valid mo
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.superprevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
+      check_per_prevalence <- function(data_to_check,
+                                       a.x_backup,
+                                       b.x_trimmed,
+                                       c.x_trimmed_without_group,
+                                       d.x_withspaces_start_end,
+                                       e.x_withspaces_start_only,
+                                       f.x_withspaces_end_only) {

-      # try any match keeping spaces ----
-      found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_end[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try any match keeping spaces, not ending with $ ----
-      found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try any match diregarding spaces ----
-      found <- microorganisms.superprevDT[fullname %like% x[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try splitting of characters in the middle and then find ID ----
-      # only when text length is 6 or lower
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
-      if (nchar(x_trimmed[i]) <= 6) {
-        x_length <- nchar(x_trimmed[i])
-        x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
-                       '.* ',
-                       x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
-        found <- microorganisms.superprevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
+        found <- data_to_check[tolower(fullname) %in% tolower(c(a.x_backup, b.x_trimmed)), ..property][[1]]
+        # most probable: is exact match in fullname
        if (length(found) > 0) {
-          x[i] <- found[1L]
-          next
+          return(found[1L])
        }
-      }

-      # try fullname without start and stop regex, to also find subspecies ----
-      # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- microorganisms.superprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # TRY PREVALENT IN HUMAN INFECTIONS ----
-      found <- microorganisms.prevDT[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), ..property][[1]]
-      # most probable: is exact match in fullname
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.prevDT[mo == toupper(x_backup[i]), ..property][[1]]
-      # is a valid mo
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.prevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try any match keeping spaces ----
-      found <- microorganisms.prevDT[fullname %like% x_withspaces_start_end[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try any match keeping spaces, not ending with $ ----
-      found <- microorganisms.prevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try any match diregarding spaces ----
-      found <- microorganisms.prevDT[fullname %like% x[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-
-      # try splitting of characters in the middle and then find ID ----
-      # only when text length is 6 or lower
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
-      if (nchar(x_trimmed[i]) <= 6) {
-        x_length <- nchar(x_trimmed[i])
-        x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
-                       '.* ',
-                       x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
-        found <- microorganisms.prevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
+        found <- data_to_check[mo == toupper(a.x_backup), ..property][[1]]
+        # is a valid mo
        if (length(found) > 0) {
-          x[i] <- found[1L]
-          next
+          return(found[1L])
        }
+        found <- data_to_check[tolower(fullname) == tolower(c.x_trimmed_without_group), ..property][[1]]
+        if (length(found) > 0) {
+          return(found[1L])
+        }
+
+        # try any match keeping spaces ----
+        found <- data_to_check[fullname %like% d.x_withspaces_start_end, ..property][[1]]
+        if (length(found) > 0 & nchar(b.x_trimmed) >= 6) {
+          return(found[1L])
+        }
+
+        # try any match keeping spaces, not ending with $ ----
+        found <- data_to_check[fullname %like% paste0(trimws(e.x_withspaces_start_only), " "), ..property][[1]]
+        if (length(found) > 0) {
+          return(found[1L])
+        }
+        found <- data_to_check[fullname %like% e.x_withspaces_start_only, ..property][[1]]
+        if (length(found) > 0 & nchar(b.x_trimmed) >= 6) {
+          return(found[1L])
+        }
+
+        # try any match keeping spaces, not start with ^ ----
+        found <- data_to_check[fullname %like% paste0(" ", trimws(f.x_withspaces_end_only)), ..property][[1]]
+        if (length(found) > 0) {
+          return(found[1L])
+        }
+        found <- data_to_check[fullname %like% f.x_withspaces_end_only, ..property][[1]]
+        if (length(found) > 0 & nchar(b.x_trimmed) >= 6) {
+          return(found[1L])
+        }
+
+        # try splitting of characters in the middle and then find ID ----
+        # only when text length is 6 or lower
+        # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
+        if (nchar(b.x_trimmed) <= 6) {
+          x_length <- nchar(b.x_trimmed)
+          x_split <- paste0("^",
+                            b.x_trimmed %>% substr(1, x_length / 2),
+                            '.* ',
+                            b.x_trimmed %>% substr((x_length / 2) + 1, x_length))
+          found <- data_to_check[fullname %like% x_split, ..property][[1]]
+          if (length(found) > 0) {
+            return(found[1L])
+          }
+        }
+
+        # try fullname without start and without nchar limit of >= 6 ----
+        # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
+        found <- data_to_check[fullname %like% e.x_withspaces_start_only, ..property][[1]]
+        if (length(found) > 0) {
+          return(found[1L])
+        }
+
+        # didn't found any
+        return(NA_character_)
      }

-      # try fullname without start and stop regex, to also find subspecies ----
-      # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- microorganisms.prevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
+      # FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
+      x[i] <- check_per_prevalence(data_to_check = microorganismsDT[prevalence == 1],
+                                   a.x_backup = x_backup[i],
+                                   b.x_trimmed = x_trimmed[i],
+                                   c.x_trimmed_without_group = x_trimmed_without_group[i],
+                                   d.x_withspaces_start_end = x_withspaces_start_end[i],
+                                   e.x_withspaces_start_only = x_withspaces_start_only[i],
+                                   f.x_withspaces_end_only = x_withspaces_end_only[i])
+      if (!is.na(x[i])) {
+        next
+      }
+      # THEN TRY PREVALENT IN HUMAN INFECTIONS ----
+      x[i] <- check_per_prevalence(data_to_check = microorganismsDT[prevalence == 2],
+                                   a.x_backup = x_backup[i],
+                                   b.x_trimmed = x_trimmed[i],
+                                   c.x_trimmed_without_group = x_trimmed_without_group[i],
+                                   d.x_withspaces_start_end = x_withspaces_start_end[i],
+                                   e.x_withspaces_start_only = x_withspaces_start_only[i],
+                                   f.x_withspaces_end_only = x_withspaces_end_only[i])
+      if (!is.na(x[i])) {
        next
      }
-
      # THEN UNPREVALENT IN HUMAN INFECTIONS ----
-      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_backup[i]), ..property][[1]]
-      # most probable: is exact match in fullname
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_trimmed[i]), ..property][[1]]
-      # most probable: is exact match in fullname
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.unprevDT[mo == toupper(x_backup[i]), ..property][[1]]
-      # is a valid mo
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      # try any match keeping spaces ----
-      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start_end[i], ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      # try any match keeping spaces, not ending with $ ----
-      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
-        next
-      }
-      # try any match diregarding spaces ----
-      found <- microorganisms.unprevDT[fullname %like% x[i], ..property][[1]]
-      if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
-        x[i] <- found[1L]
-        next
-      }
-      # try splitting of characters in the middle and then find ID ----
-      # only when text length is 6 or lower
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
-      if (nchar(x_trimmed[i]) <= 6) {
-        x_length <- nchar(x_trimmed[i])
-        x[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2),
-                       '.* ',
-                       x_trimmed[i] %>% substr((x_length / 2) + 1, x_length))
-        found <- microorganisms.unprevDT[fullname %like% paste0('^', x[i]), ..property][[1]]
-        if (length(found) > 0) {
-          x[i] <- found[1L]
-          next
-        }
-      }
-
-      # try fullname without start and stop regex, to also find subspecies ----
-      # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
-      found <- microorganisms.unprevDT[fullname %like% x_withspaces_start_only[i], ..property][[1]]
-      if (length(found) > 0) {
-        x[i] <- found[1L]
+      x[i] <- check_per_prevalence(data_to_check = microorganismsDT[prevalence == 3],
+                                   a.x_backup = x_backup[i],
+                                   b.x_trimmed = x_trimmed[i],
+                                   c.x_trimmed_without_group = x_trimmed_without_group[i],
+                                   d.x_withspaces_start_end = x_withspaces_start_end[i],
+                                   e.x_withspaces_start_only = x_withspaces_start_only[i],
+                                   f.x_withspaces_end_only = x_withspaces_end_only[i])
+      if (!is.na(x[i])) {
        next
      }

--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -125,7 +125,7 @@
 #'             language = "nl")              # "Streptococcus groep A"
 #'
 #'
-#' # Get a list with the complete taxonomy (subkingdom to subspecies)
+#' # Get a list with the complete taxonomy (kingdom to subspecies)
 #' mo_taxonomy("E. coli")
 mo_fullname <- function(x, language = get_locale(), ...) {
  x <- mo_validate(x = x, property = "fullname", ...)
--- a/R/zzz.R
+++ b/R/zzz.R