(v0.7.1.9038) mo algorithm inprovements

2025-06-08 14:33:59 +02:00 · 2019-08-12 19:07:15 +02:00 · 2019-08-12 19:07:15 +02:00 · 5f2733349e
commit 5f2733349e
parent c44c5e3913
10 changed files with 226 additions and 130 deletions
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 Package: AMR
-Version: 0.7.1.9037
+Version: 0.7.1.9038
 Date: 2019-08-12
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
--- a/NEWS.md
+++ b/NEWS.md
@ -1,4 +1,4 @@
-# AMR 0.7.1.9037
+# AMR 0.7.1.9038

 ### Breaking
 * Function `freq()` has moved to a new package, [`clean`](https://github.com/msberends/clean) ([CRAN link](https://cran.r-project.org/package=clean)). Creating frequency tables is actually not the scope of this package (never was) and this function has matured  a lot over the last two years. Therefore, a new package was created for data cleaning and checking and it perfectly fits the `freq()` function. The [`clean`](https://github.com/msberends/clean) package is available on CRAN and will be installed automatically when updating the `AMR` package, that now imports it. In a later stage, the `skewness()` and `kurtosis()` functions will be moved to the `clean` package too.
--- a/R/mo.R
+++ b/R/mo.R
@ -193,19 +193,19 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
    require("AMR")
    # check onLoad() in R/zzz.R: data tables are created there.
  }
-
+  
  # WHONET: xxx = no growth
  x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_
-
+  
  uncertainty_level <- translate_allow_uncertain(allow_uncertain)
  # mo_hist <- get_mo_history(x, uncertainty_level, force = isTRUE(list(...)$force_mo_history))
-
+  
  if (mo_source_isvalid(reference_df)
      & isFALSE(Becker)
      & isFALSE(Lancefield)
      & !is.null(reference_df)
      & all(x %in% reference_df[,1][[1]])) {
-
+    
    # has valid own reference_df
    # (data.table not faster here)
    reference_df <- reference_df %>% filter(!is.na(mo))
@ -225,18 +225,18 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
        left_join(reference_df, by = "x") %>%
        pull("mo")
    )
-
+    
  } else if (all(x %in% AMR::microorganisms$mo)
             & isFALSE(Becker)
             & isFALSE(Lancefield)) {
    y <- x
-
+    
    # } else if (!any(is.na(mo_hist))
    #            & isFALSE(Becker)
    #            & isFALSE(Lancefield)) {
    #   # check previously found results
    #   y <- mo_hist
-
+    
  } else if (all(tolower(x) %in% microorganismsDT$fullname_lower)
             & isFALSE(Becker)
             & isFALSE(Lancefield)) {
@ -257,7 +257,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
    }
    # save them to history
    set_mo_history(x, y, 0, force = isTRUE(list(...)$force_mo_history))
-
+    
  } else {
    # will be checked for mo class in validation and uses exec_as.mo internally if necessary
    y <- mo_validate(x = x, property = "mo",
@ -266,8 +266,8 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
                     force_mo_history = isTRUE(list(...)$force_mo_history),
                     ...)
  }
-
-
+  
+  
  to_class_mo(y)
 }

@ -286,6 +286,7 @@ is.mo <- function(x) {
 #' @importFrom crayon magenta red blue silver italic
 # param property a column name of AMR::microorganisms
 # param initial_search logical - is FALSE when coming from uncertain tries, which uses exec_as.mo internally too
+# param dyslexia_mode logical - also check for characters that resemble others
 # param force_mo_history logical - whether found result must be saved with set_mo_history (default FALSE on non-interactive sessions)
 # param debug logical - show different lookup texts while searching
 exec_as.mo <- function(x,
@ -295,23 +296,24 @@ exec_as.mo <- function(x,
                       reference_df = get_mo_source(),
                       property = "mo",
                       initial_search = TRUE,
+                       dyslexia_mode = FALSE,
                       force_mo_history = FALSE,
                       debug = FALSE) {
-
+  
  if (!"AMR" %in% base::.packages()) {
    require("AMR")
    # check onLoad() in R/zzz.R: data tables are created there.
  }
-
+  
  # WHONET: xxx = no growth
  x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_
-
+  
  if (initial_search == TRUE) {
    options(mo_failures = NULL)
    options(mo_uncertainties = NULL)
    options(mo_renamed = NULL)
  }
-
+  
  if (NCOL(x) == 2) {
    # support tidyverse selection like: df %>% select(colA, colB)
    # paste these columns together
@ -325,20 +327,20 @@ exec_as.mo <- function(x,
      stop('`x` can be 2 columns at most', call. = FALSE)
    }
    x[is.null(x)] <- NA
-
+    
    # support tidyverse selection like: df %>% select(colA)
    if (!is.vector(x) & !is.null(dim(x))) {
      x <- pull(x, 1)
    }
  }
-
+  
  notes <- character(0)
  uncertainties <- data.frame(input = character(0),
                              fullname = character(0),
                              mo = character(0))
  failures <- character(0)
  uncertainty_level <- translate_allow_uncertain(allow_uncertain)
-
+  
  x_input <- x
  # already strip leading and trailing spaces
  x <- trimws(x, which = "both")
@ -350,7 +352,7 @@ exec_as.mo <- function(x,
         & !is.null(x)
         & !identical(x, "")
         & !identical(x, "xxx")]
-
+  
  # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
  if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
    leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
@ -372,7 +374,7 @@ exec_as.mo <- function(x,
        pull(new)
    }
  }
-
+  
  # defined df to check for
  if (!is.null(reference_df)) {
    if (!mo_source_isvalid(reference_df)) {
@ -391,7 +393,7 @@ exec_as.mo <- function(x,
      reference_df[] <- lapply(reference_df, as.character)
    )
  }
-
+  
  # all empty
  if (all(identical(trimws(x_input), "") | is.na(x_input) | length(x) == 0)) {
    if (property == "mo") {
@ -399,7 +401,7 @@ exec_as.mo <- function(x,
    } else {
      return(rep(NA_character_, length(x_input)))
    }
-
+    
  } else if (all(x %in% reference_df[, 1][[1]])) {
    # all in reference df
    colnames(reference_df)[1] <- "x"
@ -409,7 +411,7 @@ exec_as.mo <- function(x,
        left_join(AMR::microorganisms, by = "mo") %>%
        pull(property)
    )
-
+    
  } else if (all(x %in% AMR::microorganisms$mo)) {
    # existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL")
    y <- microorganismsDT[prevalence == 1][data.table(mo = x), on = "mo", ..property][[1]]
@ -424,7 +426,7 @@ exec_as.mo <- function(x,
                                                       ..property][[1]]
    }
    x <- y
-
+    
  } else if (all(x %in% read_mo_history(uncertainty_level,
                                        force = force_mo_history)$x)) {
    # previously found code
@ -432,7 +434,7 @@ exec_as.mo <- function(x,
                                                         uncertainty_level,
                                                         force = force_mo_history)),
                          on = "mo", ..property][[1]]
-
+    
  } else if (all(tolower(x) %in% microorganismsDT$fullname_lower)) {
    # we need special treatment for very prevalent full names, they are likely!
    # e.g. as.mo("Staphylococcus aureus")
@ -448,30 +450,30 @@ exec_as.mo <- function(x,
                                                       ..property][[1]]
    }
    x <- y
-
+    
  } else if (all(toupper(x) %in% AMR::microorganisms.codes$code)) {
    # commonly used MO codes
    y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ]
    # save them to history
    set_mo_history(x, y$mo, 0, force = force_mo_history)
-
+    
    x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
-
+    
  } else if (!all(x %in% AMR::microorganisms[, property])) {
-
+    
    strip_whitespace <- function(x) {
      # all whitespaces (tab, new lines, etc.) should be one space
      # and spaces before and after should be omitted
      trimws(gsub("[\\s]+", " ", x, perl = TRUE), which = "both")
    }
-
+    
    x <- strip_whitespace(x)
    x_backup <- x
-
+    
    # remove spp and species
    x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x_backup, ignore.case = TRUE)
    x <- strip_whitespace(x)
-
+    
    x_backup_without_spp <- x
    x_species <- paste(x, "species")
    # translate to English for supported languages of mo_property
@ -490,7 +492,7 @@ exec_as.mo <- function(x,
    # remove genus as first word
    x <- gsub("^Genus ", "", x)
    # allow characters that resemble others ----
-    if (initial_search == FALSE) {
+    if (dyslexia_mode == TRUE) {
      x <- tolower(x)
      x <- gsub("[iy]+", "[iy]+", x)
      x <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x)
@ -512,7 +514,7 @@ exec_as.mo <- function(x,
      x <- gsub("e\\+n(?![a-z[])", "(e+n|u+(c|k|q|qu|s|z|x|ks)+)", x, ignore.case = TRUE, perl = TRUE)
    }
    x <- strip_whitespace(x)
-
+    
    x_trimmed <- x
    x_trimmed_species <- paste(x_trimmed, "species")
    x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, ignore.case = TRUE)
@ -526,7 +528,7 @@ exec_as.mo <- function(x,
    x_withspaces_start_only <- paste0('^', x_withspaces)
    x_withspaces_end_only <- paste0(x_withspaces, '$')
    x_withspaces_start_end <- paste0('^', x_withspaces, '$')
-
+    
    if (isTRUE(debug)) {
      cat(paste0('x                       "', x, '"\n'))
      cat(paste0('x_species               "', x_species, '"\n'))
@ -539,13 +541,13 @@ exec_as.mo <- function(x,
      cat(paste0('x_trimmed_species       "', x_trimmed_species, '"\n'))
      cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
    }
-
+    
    progress <- progress_estimated(n = length(x), min_time = 3)
-
+    
    for (i in 1:length(x)) {
-
+      
      progress$tick()$print()
-
+      
      if (initial_search == TRUE) {
        found <- microorganismsDT[mo == get_mo_history(x_backup[i],
                                                       uncertainty_level,
@ -557,14 +559,14 @@ exec_as.mo <- function(x,
          next
        }
      }
-
+      
      found <- microorganismsDT[mo == toupper(x_backup[i]), ..property][[1]]
      # is a valid MO code
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
      }
-
+      
      found <- microorganismsDT[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), ..property][[1]]
      # most probable: is exact match in fullname
      if (length(found) > 0) {
@ -574,7 +576,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      found <- microorganismsDT[col_id == x_backup[i], ..property][[1]]
      # is a valid Catalogue of Life ID
      if (NROW(found) > 0) {
@ -584,14 +586,14 @@ exec_as.mo <- function(x,
        }
        next
      }
-
-
+      
+      
      # WHONET: xxx = no growth
      if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) {
        x[i] <- NA_character_
        next
      }
-
+      
      if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
        # empty and nonsense values, ignore without warning
        x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -600,7 +602,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      # check for very small input, but ignore the O antigens of E. coli
      if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3
          & !x_backup_without_spp[i] %like% "O?(26|103|104|104|111|121|145|157)") {
@ -629,7 +631,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      if (x_backup_without_spp[i] %like% "virus") {
        # there is no fullname like virus, so don't try to coerce it
        x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -639,7 +641,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      # translate known trivial abbreviations to genus + species ----
      if (!is.na(x_trimmed[i])) {
        if (toupper(x_backup_without_spp[i]) %in% c('MRSA', 'MSSA', 'VISA', 'VRSA')) {
@ -830,7 +832,7 @@ exec_as.mo <- function(x,
          next
        }
      }
-
+      
      # FIRST TRY FULLNAMES AND CODES ----
      # if only genus is available, return only genus
      if (all(!c(x[i], x_trimmed[i]) %like% " ")) {
@ -854,7 +856,7 @@ exec_as.mo <- function(x,
        }
        # rest of genus only is in allow_uncertain part.
      }
-
+      
      # TRY OTHER SOURCES ----
      # WHONET and other common LIS codes
      if (toupper(x_backup[i]) %in% AMR::microorganisms.codes[, 1]) {
@ -879,7 +881,7 @@ exec_as.mo <- function(x,
          }
        }
      }
-
+      
      # allow no codes less than 4 characters long, was already checked for WHONET above
      if (nchar(x_backup_without_spp[i]) < 4) {
        x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
@ -889,7 +891,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      check_per_prevalence <- function(data_to_check,
                                       a.x_backup,
                                       b.x_trimmed,
@ -898,19 +900,19 @@ exec_as.mo <- function(x,
                                       e.x_withspaces_start_only,
                                       f.x_withspaces_end_only,
                                       g.x_backup_without_spp) {
-
+        
        # try probable: trimmed version of fullname ----
        found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), ..property][[1]]
        if (length(found) > 0) {
          return(found[1L])
        }
-
+        
        # try any match keeping spaces ----
        found <- data_to_check[fullname %like% d.x_withspaces_start_end, ..property][[1]]
        if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
          return(found[1L])
        }
-
+        
        # try any match keeping spaces, not ending with $ ----
        found <- data_to_check[fullname %like% paste0(trimws(e.x_withspaces_start_only), " "), ..property][[1]]
        if (length(found) > 0) {
@ -920,21 +922,21 @@ exec_as.mo <- function(x,
        if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
          return(found[1L])
        }
-
+        
        # try any match keeping spaces, not start with ^ ----
        found <- data_to_check[fullname %like% paste0(" ", trimws(f.x_withspaces_end_only)), ..property][[1]]
        if (length(found) > 0) {
          return(found[1L])
        }
-
+        
        # try a trimmed version
        found <- data_to_check[fullname_lower %like% b.x_trimmed
                               | fullname_lower %like% c.x_trimmed_without_group, ..property][[1]]
        if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
          return(found[1L])
        }
-
-
+        
+        
        # try splitting of characters in the middle and then find ID ----
        # only when text length is 6 or lower
        # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
@ -949,18 +951,18 @@ exec_as.mo <- function(x,
            return(found[1L])
          }
        }
-
+        
        # try fullname without start and without nchar limit of >= 6 ----
        # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH
        found <- data_to_check[fullname %like% e.x_withspaces_start_only, ..property][[1]]
        if (length(found) > 0) {
          return(found[1L])
        }
-
+        
        # didn't found any
        return(NA_character_)
      }
-
+      
      # FIRST TRY VERY PREVALENT IN HUMAN INFECTIONS ----
      x[i] <- check_per_prevalence(data_to_check = microorganismsDT[prevalence == 1],
                                   a.x_backup = x_backup[i],
@ -1006,9 +1008,9 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      # MISCELLANEOUS ----
-
+      
      # look for old taxonomic names ----
      found <- microorganisms.oldDT[fullname_lower == tolower(x_backup[i])
                                    | fullname %like% x_withspaces_start_end[i],]
@ -1032,7 +1034,7 @@ exec_as.mo <- function(x,
        }
        next
      }
-
+      
      # check for uncertain results ----
      uncertain_fn <- function(a.x_backup,
                               b.x_trimmed,
@ -1040,17 +1042,22 @@ exec_as.mo <- function(x,
                               d.x_withspaces_start_only,
                               f.x_withspaces_end_only,
                               g.x_backup_without_spp) {
-
+        
        if (uncertainty_level == 0) {
          # do not allow uncertainties
          return(NA_character_)
        }
-
+        
        if (uncertainty_level >= 1) {
+          now_checks_for_uncertainty_level <- 1
+          
          # (1) look again for old taxonomic names, now for G. species ----
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 1] (1) look again for old taxonomic names, now for G. species\n")
          }
+          if (isTRUE(debug)) {
+            message("Running '", c.x_withspaces_start_end, "' and '", d.x_withspaces_start_only, "'")
+          }
          found <- microorganisms.oldDT[fullname %like% c.x_withspaces_start_end
                                        | fullname %like% d.x_withspaces_start_only]
          if (NROW(found) > 0 & nchar(g.x_backup_without_spp) >= 6) {
@ -1068,7 +1075,7 @@ exec_as.mo <- function(x,
                        ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
                        mo = microorganismsDT[col_id == found[1, col_id_new], mo])
            uncertainties <<- rbind(uncertainties,
-                                    data.frame(uncertainty = 1,
+                                    data.frame(uncertainty = now_checks_for_uncertainty_level,
                                               input = a.x_backup,
                                               fullname = found[1, fullname],
                                               mo = paste("CoL", found[1, col_id])))
@ -1077,18 +1084,26 @@ exec_as.mo <- function(x,
            }
            return(x)
          }
-
+          
          # (2) Try with misspelled input ----
-          # just rerun with initial_search = FALSE will used the extensive regex part above
+          # just rerun with dyslexia_mode = TRUE will used the extensive regex part above
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 1] (2) Try with misspelled input\n")
          }
-          found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+          if (isTRUE(debug)) {
+            message("Running '", a.x_backup, "'")
+          }
+          # first try without dyslexia mode
+          found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+          if (empty_result(found)) {
+            # then with dyslexia mode
+            found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+          }
          if (!empty_result(found)) {
            found_result <- found
            found <- microorganismsDT[mo == found, ..property][[1]]
            uncertainties <<- rbind(uncertainties,
-                                    data.frame(uncertainty = 1,
+                                    data.frame(uncertainty = now_checks_for_uncertainty_level,
                                               input = a.x_backup,
                                               fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                               mo = found_result[1L]))
@ -1098,21 +1113,25 @@ exec_as.mo <- function(x,
            return(found[1L])
          }
        }
-
+        
        if (uncertainty_level >= 2) {
-
+          now_checks_for_uncertainty_level <- 2
+          
          # (3) look for genus only, part of name ----
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 2] (3) look for genus only, part of name\n")
          }
          if (nchar(g.x_backup_without_spp) > 4 & !b.x_trimmed %like% " ") {
            if (!grepl("^[A-Z][a-z]+", b.x_trimmed, ignore.case = FALSE)) {
+              if (isTRUE(debug)) {
+                message("Running '", paste(b.x_trimmed, "species"), "'")
+              }
              # not when input is like Genustext, because then Neospora would lead to Actinokineospora
              found <- microorganismsDT[fullname_lower %like% paste(b.x_trimmed, "species"), ..property][[1]]
              if (length(found) > 0) {
                x[i] <- found[1L]
                uncertainties <<- rbind(uncertainties,
-                                        data.frame(uncertainty = 2,
+                                        data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                   input = a.x_backup,
                                                   fullname = microorganismsDT[mo == found[1L], fullname][[1]],
                                                   mo = found[1L]))
@ -1123,19 +1142,27 @@ exec_as.mo <- function(x,
              }
            }
          }
-
+          
          # (4) strip values between brackets ----
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 2] (4) strip values between brackets\n")
          }
          a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup)
          a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped))
-          found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+          if (isTRUE(debug)) {
+            message("Running '", a.x_backup_stripped, "'")
+          }
+          # first try without dyslexia mode
+          found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+          if (empty_result(found)) {
+            # then with dyslexia mode
+            found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+          }
          if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) {
            found_result <- found
            found <- microorganismsDT[mo == found, ..property][[1]]
            uncertainties <<- rbind(uncertainties,
-                                    data.frame(uncertainty = 2,
+                                    data.frame(uncertainty = now_checks_for_uncertainty_level,
                                               input = a.x_backup,
                                               fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                               mo = found_result[1L]))
@ -1144,7 +1171,7 @@ exec_as.mo <- function(x,
            }
            return(found[1L])
          }
-
+          
          # (5a) try to strip off half an element from end and check the remains ----
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 2] (5a) try to strip off half an element from end and check the remains\n")
@ -1156,13 +1183,21 @@ exec_as.mo <- function(x,
              lastword_half <- substr(lastword, 1, as.integer(nchar(lastword) / 2))
              # remove last half of the second term
              x_strip_collapsed <- paste(c(x_strip[1:(length(x_strip) - i)], lastword_half), collapse = " ")
-              if (nchar(x_strip_collapsed) >= 4) {
-                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (nchar(x_strip_collapsed) >= 4 & nchar(lastword_half) > 2) {
+                if (isTRUE(debug)) {
+                  message("Running '", x_strip_collapsed, "'")
+                }
+                # first try without dyslexia mode
+                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+                if (empty_result(found)) {
+                  # then with dyslexia mode
+                  found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+                }
                if (!empty_result(found)) {
                  found_result <- found
                  found <- microorganismsDT[mo == found, ..property][[1]]
                  uncertainties <<- rbind(uncertainties,
-                                          data.frame(uncertainty = 2,
+                                          data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                     input = a.x_backup,
                                                     fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                                     mo = found_result[1L]))
@ -1181,13 +1216,21 @@ exec_as.mo <- function(x,
          if (length(x_strip) > 1) {
            for (i in 1:(length(x_strip) - 1)) {
              x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
-              if (nchar(x_strip_collapsed) >= 4) {
-                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (nchar(x_strip_collapsed) >= 6) {
+                if (isTRUE(debug)) {
+                  message("Running '", x_strip_collapsed, "'")
+                }
+                # first try without dyslexia mode
+                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+                if (empty_result(found)) {
+                  # then with dyslexia mode
+                  found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+                }
                if (!empty_result(found)) {
                  found_result <- found
                  found <- microorganismsDT[mo == found, ..property][[1]]
                  uncertainties <<- rbind(uncertainties,
-                                          data.frame(uncertainty = 2,
+                                          data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                     input = a.x_backup,
                                                     fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                                     mo = found_result[1L]))
@ -1208,7 +1251,7 @@ exec_as.mo <- function(x,
            found_result <- found
            found <- microorganismsDT[mo == found, ..property][[1]]
            uncertainties <<- rbind(uncertainties,
-                                    data.frame(uncertainty = 2,
+                                    data.frame(uncertainty = now_checks_for_uncertainty_level,
                                               input = a.x_backup,
                                               fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                               mo = found_result[1L]))
@ -1222,7 +1265,7 @@ exec_as.mo <- function(x,
            found_result <- found
            found <- microorganismsDT[mo == found, ..property][[1]]
            uncertainties <<- rbind(uncertainties,
-                                    data.frame(uncertainty = 2,
+                                    data.frame(uncertainty = now_checks_for_uncertainty_level,
                                               input = a.x_backup,
                                               fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                               mo = found_result[1L]))
@ -1239,14 +1282,22 @@ exec_as.mo <- function(x,
          if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
            for (i in 2:(length(x_strip))) {
              x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
-              found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (isTRUE(debug)) {
+                message("Running '", x_strip_collapsed, "'")
+              }
+              # first try without dyslexia mode
+              found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (empty_result(found)) {
+                # then with dyslexia mode
+                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+              }
              if (!empty_result(found)) {
                found_result <- found
                found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
                # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3)
                if (x_strip_collapsed %like% " ") {
                  uncertainties <<- rbind(uncertainties,
-                                          data.frame(uncertainty = 2,
+                                          data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                     input = a.x_backup,
                                                     fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                                     mo = found_result[1L]))
@ -1259,22 +1310,32 @@ exec_as.mo <- function(x,
            }
          }
        }
-
+        
        if (uncertainty_level >= 3) {
-          # (7) try to strip off one element from start and check the remains ----
+          now_checks_for_uncertainty_level <- 3
+          
+          # (7a) try to strip off one element from start and check the remains (any text size) ----
          if (isTRUE(debug)) {
-            cat("\n[UNCERTAINLY LEVEL 3] (7) try to strip off one element from start and check the remains\n")
+            cat("\n[UNCERTAINLY LEVEL 3] (7a) try to strip off one element from start and check the remains (any text size)\n")
          }
          x_strip <- a.x_backup %>% strsplit(" ") %>% unlist()
          if (length(x_strip) > 1 & nchar(g.x_backup_without_spp) >= 6) {
            for (i in 2:(length(x_strip))) {
              x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
-              found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (isTRUE(debug)) {
+                message("Running '", x_strip_collapsed, "'")
+              }
+              # first try without dyslexia mode
+              found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (empty_result(found)) {
+                # then with dyslexia mode
+                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+              }
              if (!empty_result(found)) {
                found_result <- found
                found <- microorganismsDT[mo == found, ..property][[1]]
                uncertainties <<- rbind(uncertainties,
-                                        data.frame(uncertainty = 3,
+                                        data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                   input = a.x_backup,
                                                   fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                                   mo = found_result[1L]))
@ -1285,18 +1346,53 @@ exec_as.mo <- function(x,
              }
            }
          }
-
+          # (7b) try to strip off one element from end and check the remains (any text size) ----
+          # (this is in fact 5b but without nchar limit of >=6)
+          if (isTRUE(debug)) {
+            cat("\n[UNCERTAINLY LEVEL 3] (7b) try to strip off one element from end and check the remains (any text size)\n")
+          }
+          if (length(x_strip) > 1) {
+            for (i in 1:(length(x_strip) - 1)) {
+              x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
+              if (isTRUE(debug)) {
+                message("Running '", x_strip_collapsed, "'")
+              }
+              # first try without dyslexia mode
+              found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug)))
+              if (empty_result(found)) {
+                # then with dyslexia mode
+                found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug)))
+              }
+              if (!empty_result(found)) {
+                found_result <- found
+                found <- microorganismsDT[mo == found, ..property][[1]]
+                uncertainties <<- rbind(uncertainties,
+                                        data.frame(uncertainty = now_checks_for_uncertainty_level,
+                                                   input = a.x_backup,
+                                                   fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+                                                   mo = found_result[1L]))
+                if (initial_search == TRUE) {
+                  set_mo_history(a.x_backup, get_mo_code(found[1L], property), 2, force = force_mo_history)
+                }
+                return(found[1L])
+              }
+            }
+          }
+          
          # (8) part of a name (very unlikely match) ----
          if (isTRUE(debug)) {
            cat("\n[UNCERTAINLY LEVEL 3] (8) part of a name (very unlikely match)\n")
          }
+          if (isTRUE(debug)) {
+            message("Running '", f.x_withspaces_end_only, "'")
+          }
          found <- microorganismsDT[fullname %like% f.x_withspaces_end_only]
          if (nrow(found) > 0) {
            found_result <- found[["mo"]]
            if (!empty_result(found_result) & nchar(g.x_backup_without_spp) >= 6) {
              found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
              uncertainties <<- rbind(uncertainties,
-                                      data.frame(uncertainty = 3,
+                                      data.frame(uncertainty = now_checks_for_uncertainty_level,
                                                 input = a.x_backup,
                                                 fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
                                                 mo = found_result[1L]))
@ -1307,7 +1403,7 @@ exec_as.mo <- function(x,
            }
          }
        }
-
+        
        # didn't found in uncertain results too
        return(NA_character_)
      }
@ -1321,7 +1417,7 @@ exec_as.mo <- function(x,
        # no set_mo_history here - it is already set in uncertain_fn()
        next
      }
-
+      
      # no results found: make them UNKNOWN ----
      x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
      if (initial_search == TRUE) {
@ -1330,7 +1426,7 @@ exec_as.mo <- function(x,
      }
    }
  }
-
+  
  # handling failures ----
  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0 & initial_search == TRUE) {
@ -1355,7 +1451,7 @@ exec_as.mo <- function(x,
  # handling uncertainties ----
  if (NROW(uncertainties) > 0 & initial_search == TRUE) {
    options(mo_uncertainties = as.list(distinct(uncertainties, input, .keep_all = TRUE)))
-
+    
    plural <- c("", "it")
    if (NROW(uncertainties) > 1) {
      plural <- c("s", "them")
@ -1366,7 +1462,7 @@ exec_as.mo <- function(x,
            call. = FALSE,
            immediate. = TRUE) # thus will always be shown, even if >= warnings
  }
-
+  
  # Becker ----
  if (Becker == TRUE | Becker == "all") {
    # See Source. It's this figure:
@ -1391,11 +1487,11 @@ exec_as.mo <- function(x,
                                     "pseudintermedius", "pseudointermedius",
                                     "schweitzeri", "argenteus")
                      | (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]]
-
+    
    # warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103)
    post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus")
    if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) {
-
+      
      warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ",
              italic(paste("S.",
                           sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))),
@ -1404,14 +1500,14 @@ exec_as.mo <- function(x,
              call. = FALSE,
              immediate. = TRUE)
    }
-
+    
    x[x %in% CoNS] <- microorganismsDT[mo == 'B_STPHY_CNS', ..property][[1]][1L]
    x[x %in% CoPS] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    if (Becker == "all") {
      x[x %in% microorganismsDT[mo %like% '^B_STPHY_AUR', ..property][[1]]] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
    }
  }
-
+  
  # Lancefield ----
  if (Lancefield == TRUE | Lancefield == "all") {
    # group A - S. pyogenes
@ -1435,37 +1531,37 @@ exec_as.mo <- function(x,
    # group K - S. salivarius
    x[x == microorganismsDT[mo == 'B_STRPT_SAL', ..property][[1]][1L]] <- microorganismsDT[mo == 'B_STRPT_GRK', ..property][[1]][1L]
  }
-
+  
  # Wrap up ----------------------------------------------------------------
-
+  
  # comply to x, which is also unique and without empty values
  x_input_unique_nonempty <- unique(x_input[!is.na(x_input)
                                            & !is.null(x_input)
                                            & !identical(x_input, "")
                                            & !identical(x_input, "xxx")])
-
+  
  # left join the found results to the original input values (x_input)
  df_found <- data.frame(input = as.character(x_input_unique_nonempty),
                         found = as.character(x),
                         stringsAsFactors = FALSE)
  df_input <- data.frame(input = as.character(x_input),
                         stringsAsFactors = FALSE)
-
+  
  suppressWarnings(
    x <- df_input %>%
      left_join(df_found,
                by = "input") %>%
      pull(found)
  )
-
+  
  if (property == "mo") {
    x <- to_class_mo(x)
  }
-
+  
  if (length(mo_renamed()) > 0) {
    print(mo_renamed())
  }
-
+  
  x
 }

@ -1494,7 +1590,7 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "")
  old_values <- gsub("et al.", italic("et al."), old_values)
  new_values <- paste0(italic(name_new), ref_new, mo)
  new_values <- gsub("et al.", italic("et al."), new_values)
-
+  
  names(new_values) <- old_values
  total <- c(getOption("mo_renamed"), new_values)
  options(mo_renamed = total[order(names(total))])
@ -1604,7 +1700,7 @@ print.mo_uncertainties <- function(x, ...) {
             "\n(1 = ", green("renamed/misspelled"),
             ", 2 = ", yellow("uncertain"),
             ", 3 = ", red("very uncertain"), ")\n"))
-
+  
  msg <- ""
  for (i in 1:nrow(x)) {
    if (x[i, "uncertainty"] == 1) {
@ -1633,11 +1729,11 @@ mo_renamed <- function() {
  if (is.null(items)) {
    return(NULL)
  }
-
+  
  items <- strip_style(items)
  names(items) <- strip_style(names(items))
  structure(.Data = items,
-             class = c("mo_renamed", "character"))
+            class = c("mo_renamed", "character"))
 }

 #' @exportMethod print.mo_renamed
@ -1666,7 +1762,7 @@ unregex <- function(x) {
 get_mo_code <- function(x, property) {
  # don't use right now
  return(NULL)
-
+  
  if (property == "mo") {
    unique(x)
  } else {
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

--- a/docs/authors.html
+++ b/docs/authors.html
@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

--- a/docs/index.html
+++ b/docs/index.html
@ -42,7 +42,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

--- a/docs/news/index.html
+++ b/docs/news/index.html
@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

@ -225,9 +225,9 @@
      
    </div>

-    <div id="amr-0-7-1-9037" class="section level1">
+    <div id="amr-0-7-1-9038" class="section level1">
 <h1 class="page-header">
-<a href="#amr-0-7-1-9037" class="anchor"></a>AMR 0.7.1.9037<small> Unreleased </small>
+<a href="#amr-0-7-1-9038" class="anchor"></a>AMR 0.7.1.9038<small> Unreleased </small>
 </h1>
 <div id="breaking" class="section level3">
 <h3 class="hasAnchor">
@ -1236,7 +1236,7 @@ Using <code><a href="../reference/as.mo.html">as.mo(..., allow_uncertain = 3)</a
    <div id="tocnav">
      <h2>Contents</h2>
      <ul class="nav nav-pills nav-stacked">
-        <li><a href="#amr-0-7-1-9037">0.7.1.9037</a></li>
+        <li><a href="#amr-0-7-1-9038">0.7.1.9038</a></li>
        <li><a href="#amr-0-7-1">0.7.1</a></li>
        <li><a href="#amr-0-7-0">0.7.0</a></li>
        <li><a href="#amr-0-6-1">0.6.1</a></li>
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@ -78,7 +78,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9037</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">0.7.1.9038</span>
      </span>
    </div>

--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@ -197,8 +197,8 @@ test_that("as.mo works", {
  print(mo_renamed())

  # check uncertain names
-  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), "UNKNOWN")
-  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
+  expect_equal(suppressWarnings(as.character(as.mo("staaur extratest", allow_uncertain = TRUE))), "B_STPHY_AUR")
+  expect_equal(suppressWarnings(as.character(as.mo("staaur extratest", allow_uncertain = FALSE))), "UNKNOWN")
  expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))
  expect_equal(suppressWarnings(as.character(as.mo("unexisting aureus", allow_uncertain = 3))), "B_STPHY_AUR")
  expect_equal(suppressWarnings(as.character(as.mo("unexisting staphy", allow_uncertain = 3))), "B_STPHY")
@ -270,7 +270,7 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("F_CANDD_GLB")), "F_CANDD_GLA")
  
  # debug mode
-  expect_output(print(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3))))
+  expect_output(print(suppressMessages(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3)))))

  # ..coccus
  expect_equal(as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))),