New mo algorithm, prepare for 2.0

2025-07-12 18:21:49 +02:00 · 2022-10-05 09:12:22 +02:00
parent 63fe160322
commit cd2acc4a29
182 changed files with 4054 additions and 90905 deletions
--- a/R/ab_from_text.R
+++ b/R/ab_from_text.R
@ -1,12 +1,16 @@
 # ==================================================================== #
 # TITLE                                                                #
-# Antimicrobial Resistance (AMR) Data Analysis for R                   #
+# AMR: An R Package for Working with Antimicrobial Resistance Data     #
 #                                                                      #
 # SOURCE                                                               #
 # https://github.com/msberends/AMR                                     #
 #                                                                      #
-# LICENCE                                                              #
-# (c) 2018-2022 Berends MS, Luz CF et al.                              #
+# CITE AS                                                              #
+# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C    #
+# (2022). AMR: An R Package for Working with Antimicrobial Resistance  #
+# Data. Journal of Statistical Software, 104(3), 1-31.                 #
+# doi:10.18637/jss.v104.i03                                            #
+#                                                                      #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
 # Diagnostics & Advice, and University Medical Center Groningen.       #
@ -110,7 +114,7 @@ ab_from_text <- function(text,
  meet_criteria(thorough_search, allow_class = "logical", has_length = 1, allow_NULL = TRUE)
  meet_criteria(info, allow_class = "logical", has_length = 1)

-  type <- tolower(trimws(type))
+  type <- tolower(trimws2(type))

  text <- tolower(as.character(text))
  text_split_all <- strsplit(text, "[ ;.,:\\|]")
@ -120,21 +124,21 @@ ab_from_text <- function(text,
  if (type %like% "(drug|ab|anti)") {
    translate_ab <- get_translate_ab(translate_ab)

-    if (isTRUE(thorough_search) |
-      (isTRUE(is.null(thorough_search)) & max(vapply(FUN.VALUE = double(1), text_split_all, length), na.rm = TRUE) <= 3)) {
+    if (isTRUE(thorough_search) ||
+      (isTRUE(is.null(thorough_search)) && max(vapply(FUN.VALUE = double(1), text_split_all, length), na.rm = TRUE) <= 3)) {
      text_split_all <- text_split_all[nchar(text_split_all) >= 4 & grepl("[a-z]+", text_split_all)]
      result <- lapply(text_split_all, function(text_split) {
        progress$tick()
        suppressWarnings(
-          out <- as.ab(text_split, ...)
+          as.ab(text_split, ...)
        )
      })
    } else {
      # no thorough search
-      abbr <- unlist(antibiotics$abbreviations)
+      abbr <- unlist(AMR::antibiotics$abbreviations)
      abbr <- abbr[nchar(abbr) >= 4]
-      names_atc <- substr(c(antibiotics$name, antibiotics$atc), 1, 5)
-      synonyms <- unlist(antibiotics$synonyms)
+      names_atc <- substr(c(AMR::antibiotics$name, AMR::antibiotics$atc), 1, 5)
+      synonyms <- unlist(AMR::antibiotics$synonyms)
      synonyms <- synonyms[nchar(synonyms) >= 4]
      # regular expression must not be too long, so split synonyms in two:
      synonyms_part1 <- synonyms[seq_len(0.5 * length(synonyms))]
@ -149,7 +153,7 @@ ab_from_text <- function(text,
      result <- lapply(text_split_all, function(text_split) {
        progress$tick()
        suppressWarnings(
-          out <- as.ab(
+          as.ab(
            unique(c(
              text_split[text_split %like_case% to_regex(abbr)],
              text_split[text_split %like_case% to_regex(names_atc)],
@ -176,7 +180,7 @@ ab_from_text <- function(text,
      }
    })
  } else if (type %like% "dos") {
-    text_split_all <- strsplit(text, " ")
+    text_split_all <- strsplit(text, " ", fixed = TRUE)
    result <- lapply(text_split_all, function(text_split) {
      text_split <- text_split[text_split %like% "^[0-9]{2,}(/[0-9]+)?[a-z]*$"]
      # only left part of "/", like 500 in  "500/125"