mirror of
https://github.com/msberends/AMR.git
synced 2025-07-09 04:42:22 +02:00
unknown codes, rsi fix
This commit is contained in:
5
R/data.R
5
R/data.R
@ -134,7 +134,7 @@
|
||||
#'
|
||||
#' A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using \code{\link{as.mo}}.
|
||||
#' @inheritSection catalogue_of_life Catalogue of Life
|
||||
#' @format A \code{\link{data.frame}} with 57,158 observations and 14 variables:
|
||||
#' @format A \code{\link{data.frame}} with 59,985 observations and 15 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{mo}}{ID of microorganism as used by this package}
|
||||
#' \item{\code{col_id}}{Catalogue of Life ID}
|
||||
@ -150,6 +150,7 @@
|
||||
#' \item{\code{rank}}{Taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}}
|
||||
#' \item{\code{ref}}{Author(s) and year of concerning scientific publication}
|
||||
#' \item{\code{species_id}}{ID of the species as used by the Catalogue of Life}
|
||||
#' \item{\code{prevalence}}{Prevalence of the microorganism, see \code{?as.mo}}
|
||||
#' }
|
||||
#' @source Catalogue of Life: Annual Checklist (public online database), \url{www.catalogueoflife.org}.
|
||||
#' @details Manually added were:
|
||||
@ -172,7 +173,7 @@ catalogue_of_life <- list(
|
||||
#'
|
||||
#' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by \code{\link{as.mo}}.
|
||||
#' @inheritSection catalogue_of_life Catalogue of Life
|
||||
#' @format A \code{\link{data.frame}} with 14,487 observations and 4 variables:
|
||||
#' @format A \code{\link{data.frame}} with 17,069 observations and 4 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{col_id}}{Catalogue of Life ID}
|
||||
#' \item{\code{tsn_new}}{New Catalogue of Life ID}
|
||||
|
70
R/mo.R
70
R/mo.R
@ -51,6 +51,8 @@
|
||||
#' F (Fungi), P (Protozoa), PL (Plantae) or V (Viruses)
|
||||
#' }
|
||||
#'
|
||||
#' Values that cannot be coered will be considered 'unknown' and have an MO code \code{UNKNOWN}.
|
||||
#'
|
||||
#' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
|
||||
#'
|
||||
#' \strong{Artificial Intelligence} \cr
|
||||
@ -275,7 +277,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# only check the uniques, which is way faster
|
||||
x <- unique(x)
|
||||
# remove empty values (to later fill them in again with NAs)
|
||||
x <- x[!is.na(x) & !is.null(x) & !identical(x, "")]
|
||||
# ("xxx" is WHONET code for 'no growth')
|
||||
x <- x[!is.na(x) & !is.null(x) & !identical(x, "") & !identical(x, "xxx")]
|
||||
|
||||
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
|
||||
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
|
||||
@ -367,8 +370,6 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
x_species <- paste(x, "species")
|
||||
# translate to English for supported languages of mo_property
|
||||
x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x, ignore.case = TRUE)
|
||||
# remove 'empty' genus and species values
|
||||
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
||||
# remove non-text in case of "E. coli" except dots and spaces
|
||||
x <- gsub("[^.a-zA-Z0-9/ \\-]+", "", x)
|
||||
# replace minus by a space
|
||||
@ -419,12 +420,17 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
next
|
||||
}
|
||||
|
||||
if (tolower(x_trimmed[i]) %in% c("", "xxx", "other", "none", "unknown")) {
|
||||
# empty and nonsense values, ignore without warning ("xxx" is WHONET code for 'no growth')
|
||||
if (any(x_trimmed[i] %in% c(NA, ""))) {
|
||||
x[i] <- NA_character_
|
||||
next
|
||||
}
|
||||
|
||||
if (tolower(x_trimmed[i]) %in% c("xxx", "other", "none", "unknown")) {
|
||||
# empty and nonsense values, ignore without warning
|
||||
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
|
||||
next
|
||||
}
|
||||
|
||||
if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3) {
|
||||
# check if search term was like "A. species", then return first genus found with ^A
|
||||
if (x_backup[i] %like% "[a-z]+ species" | x_backup[i] %like% "[a-z] spp[.]?") {
|
||||
@ -441,14 +447,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
}
|
||||
# fewer than 3 chars and not looked for species, add as failure
|
||||
x[i] <- NA_character_
|
||||
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
|
||||
failures <- c(failures, x_backup[i])
|
||||
next
|
||||
}
|
||||
|
||||
if (x_trimmed[i] %like% "virus") {
|
||||
# there is no fullname like virus, so don't try to coerce it
|
||||
x[i] <- NA_character_
|
||||
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
|
||||
failures <- c(failures, x_backup[i])
|
||||
next
|
||||
}
|
||||
@ -667,7 +673,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
d.x_withspaces_start_end = x_withspaces_start_end[i],
|
||||
e.x_withspaces_start_only = x_withspaces_start_only[i],
|
||||
f.x_withspaces_end_only = x_withspaces_end_only[i])
|
||||
if (!is.na(x[i])) {
|
||||
if (!empty_result(x[i])) {
|
||||
next
|
||||
}
|
||||
# THEN TRY PREVALENT IN HUMAN INFECTIONS ----
|
||||
@ -678,7 +684,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
d.x_withspaces_start_end = x_withspaces_start_end[i],
|
||||
e.x_withspaces_start_only = x_withspaces_start_only[i],
|
||||
f.x_withspaces_end_only = x_withspaces_end_only[i])
|
||||
if (!is.na(x[i])) {
|
||||
if (!empty_result(x[i])) {
|
||||
next
|
||||
}
|
||||
# THEN UNPREVALENT IN HUMAN INFECTIONS ----
|
||||
@ -689,7 +695,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
d.x_withspaces_start_end = x_withspaces_start_end[i],
|
||||
e.x_withspaces_start_only = x_withspaces_start_only[i],
|
||||
f.x_withspaces_end_only = x_withspaces_end_only[i])
|
||||
if (!is.na(x[i])) {
|
||||
if (!empty_result(x[i])) {
|
||||
next
|
||||
}
|
||||
|
||||
@ -765,7 +771,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
|
||||
# (3) not yet implemented taxonomic changes in Catalogue of Life ----
|
||||
found <- suppressMessages(suppressWarnings(exec_as.mo(TEMPORARY_TAXONOMY(b.x_trimmed), clear_options = FALSE, allow_uncertain = FALSE)))
|
||||
if (!is.na(found)) {
|
||||
if (!empty_result(found)) {
|
||||
found_result <- found
|
||||
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
@ -780,7 +786,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
a.x_backup_stripped <- gsub("( *[(].*[)] *)", " ", a.x_backup)
|
||||
a.x_backup_stripped <- trimws(gsub(" +", " ", a.x_backup_stripped))
|
||||
found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_stripped, clear_options = FALSE, allow_uncertain = FALSE)))
|
||||
if (!is.na(found) & nchar(b.x_trimmed) >= 6) {
|
||||
if (!empty_result(found) & nchar(b.x_trimmed) >= 6) {
|
||||
found_result <- found
|
||||
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
@ -797,7 +803,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
for (i in 1:(length(x_strip) - 1)) {
|
||||
x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
|
||||
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE, allow_uncertain = FALSE)))
|
||||
if (!is.na(found)) {
|
||||
if (!empty_result(found)) {
|
||||
found_result <- found
|
||||
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
@ -816,7 +822,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
for (i in 2:(length(x_strip))) {
|
||||
x_strip_collapsed <- paste(x_strip[i:length(x_strip)], collapse = " ")
|
||||
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE, allow_uncertain = FALSE)))
|
||||
if (!is.na(found)) {
|
||||
if (!empty_result(found)) {
|
||||
found_result <- found
|
||||
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
@ -833,13 +839,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
found <- microorganismsDT[fullname %like% f.x_withspaces_end_only]
|
||||
if (nrow(found) > 0) {
|
||||
found_result <- found[["mo"]]
|
||||
found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
data.frame(uncertainty = 3,
|
||||
input = a.x_backup,
|
||||
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
|
||||
mo = found_result[1L]))
|
||||
return(found[1L])
|
||||
if (!empty_result(found_result)) {
|
||||
found <- microorganismsDT[mo == found_result[1L], ..property][[1]]
|
||||
uncertainties <<- rbind(uncertainties,
|
||||
data.frame(uncertainty = 3,
|
||||
input = a.x_backup,
|
||||
fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
|
||||
mo = found_result[1L]))
|
||||
return(found[1L])
|
||||
}
|
||||
}
|
||||
|
||||
# didn't found in uncertain results too
|
||||
@ -847,13 +855,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
|
||||
x[i] <- uncertain_fn(x_backup[i], x_trimmed[i], x_withspaces_start_end[i], x_withspaces_start_only[i], x_withspaces_end_only[i])
|
||||
if (!is.na(x[i])) {
|
||||
if (!empty_result(x[i])) {
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# not found ----
|
||||
x[i] <- NA_character_
|
||||
x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]]
|
||||
failures <- c(failures, x_backup[i])
|
||||
}
|
||||
}
|
||||
@ -862,15 +870,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
||||
if (length(failures) > 0 & clear_options == TRUE) {
|
||||
options(mo_failures = sort(unique(failures)))
|
||||
plural <- c("value", "it")
|
||||
plural <- c("value", "it", "is")
|
||||
if (n_distinct(failures) > 1) {
|
||||
plural <- c("values", "them")
|
||||
plural <- c("values", "them", "are")
|
||||
}
|
||||
total_failures <- length(x_input[x_input %in% failures & !x_input %in% c(NA, NULL, NaN)])
|
||||
total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)])
|
||||
msg <- paste0("\n", nr2char(n_distinct(failures)), " unique input ", plural[1],
|
||||
msg <- paste0("\n", nr2char(n_distinct(failures)), " unique ", plural[1],
|
||||
" (^= ", percent(total_failures / total_n, round = 1, force_zero = TRUE),
|
||||
") could not be coerced to a valid MO code")
|
||||
") could not be coerced and ", plural[3], " considered 'unknown'")
|
||||
if (n_distinct(failures) <= 10) {
|
||||
msg <- paste0(msg, ": ", paste('"', unique(failures), '"', sep = "", collapse = ', '))
|
||||
}
|
||||
@ -887,7 +895,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
if (NROW(uncertainties) > 1) {
|
||||
plural <- c("values", "them")
|
||||
}
|
||||
msg <- paste0("\nResults of ", nr2char(NROW(uncertainties)), " input ", plural[1],
|
||||
msg <- paste0("\nResults of ", nr2char(NROW(uncertainties)), " ", plural[1],
|
||||
" was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
|
||||
warning(red(msg),
|
||||
call. = FALSE,
|
||||
@ -951,7 +959,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# Wrap up ----------------------------------------------------------------
|
||||
|
||||
# comply to x, which is also unique and without empty values
|
||||
x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "")])
|
||||
x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "") & !identical(x_input, "xxx")])
|
||||
|
||||
# left join the found results to the original input values (x_input)
|
||||
df_found <- data.frame(input = as.character(x_input_unique_nonempty),
|
||||
@ -984,6 +992,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
x
|
||||
}
|
||||
|
||||
empty_result <- function(x) {
|
||||
x %in% c(NA, "UNKNOWN")
|
||||
}
|
||||
|
||||
TEMPORARY_TAXONOMY <- function(x) {
|
||||
x[x %like% 'Cutibacterium'] <- gsub('Cutibacterium', 'Propionibacterium', x[x %like% 'Cutibacterium'])
|
||||
x
|
||||
|
@ -364,7 +364,7 @@ mo_translate <- function(x, language) {
|
||||
}
|
||||
|
||||
x_tobetranslated <- grepl(x = x,
|
||||
pattern = "(Coagulase Negative Staphylococcus|Coagulase Positive Staphylococcus|Beta-haemolytic Streptococcus|unknown Gram negatives|unknown Gram positives|CoNS|CoPS|no MO|Gram negative|Gram positive|Bacteria|Fungi|Protozoa|biogroup|biotype|vegetative|group|Group)")
|
||||
pattern = "(Coagulase Negative Staphylococcus|Coagulase Positive Staphylococcus|Beta-haemolytic Streptococcus|unknown Gram negatives|unknown Gram positives|unknown name|unknown kingdom|unknown phylum|unknown class|unknown order|unknown family|unknown genus|unknown species|unknown subspecies|unknown rank|CoNS|CoPS|Gram negative|Gram positive|Bacteria|Fungi|Protozoa|biogroup|biotype|vegetative|group|Group)")
|
||||
|
||||
if (sum(x_tobetranslated, na.rm = TRUE) == 0) {
|
||||
return(x)
|
||||
@ -379,9 +379,18 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Beta-h\u00e4molytischer Streptococcus", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "unbekannte Gramnegativen", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "unbekannte Grampositiven", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "unbekannte Name", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "unbekanntes Reich", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "unbekannter Stamm", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "unbekannte Klasse", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "unbekannte Ordnung", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "unbekannte Familie", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "unbekannte Gattung", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "unbekannte Art", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "unbekannte Unterart", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "unbekannter Rang", ., fixed = TRUE) %>%
|
||||
gsub("(CoNS)", "(KNS)", ., fixed = TRUE) %>%
|
||||
gsub("(CoPS)", "(KPS)", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(kein MO)", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gramnegativ", ., fixed = TRUE) %>%
|
||||
gsub("Gram positive", "Grampositiv", ., fixed = TRUE) %>%
|
||||
gsub("Bacteria", "Bakterien", ., fixed = TRUE) %>%
|
||||
@ -401,7 +410,16 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Beta-hemolytische Streptococcus", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "onbekende Gram-negatieven", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "onbekende Gram-positieven", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(geen MO)", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "onbekende naam", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "onbekend koninkrijk", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "onbekende fylum", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "onbekende klasse", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "onbekende orde", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "onbekende familie", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "onbekend geslacht", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "onbekende soort", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "onbekende ondersoort", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "onbekende rang", ., fixed = TRUE) %>%
|
||||
gsub("(CoNS)", "(CNS)", ., fixed = TRUE) %>%
|
||||
gsub("(CoPS)", "(CPS)", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gram-negatief", ., fixed = TRUE) %>%
|
||||
@ -423,7 +441,16 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "Gram negativos desconocidos", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "Gram positivos desconocidos", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(sin MO)", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "nombre desconocido", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "reino desconocido", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "filo desconocido", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "clase desconocida", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "orden desconocido", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "familia desconocida", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "g\u00e9nero desconocido", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "especie desconocida", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "subespecie desconocida", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "rango desconocido", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>%
|
||||
gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>%
|
||||
gsub("Bacteria", "Bacterias", ., fixed = TRUE) %>%
|
||||
@ -443,7 +470,16 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-emolitico", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "Gram negativi sconosciuti", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "Gram positivi sconosciuti", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(non MO)", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "nome sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "regno sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "phylum sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "classe sconosciuta", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "ordine sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "famiglia sconosciuta", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "genere sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "specie sconosciute", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "sottospecie sconosciute", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "grado sconosciuto", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>%
|
||||
gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>%
|
||||
gsub("Bacteria", "Batteri", ., fixed = TRUE) %>%
|
||||
@ -462,7 +498,16 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "Gram n\u00e9gatifs inconnus", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "Gram positifs inconnus", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(pas MO)", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "nom inconnu", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "r\u00e8gme inconnu", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "embranchement inconnu", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "classe inconnue", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "ordre inconnu", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "famille inconnue", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "genre inconnu", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "esp\u00e8ce inconnue", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "sous-esp\u00e8ce inconnue", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "rang inconnu", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gram n\u00e9gatif", ., fixed = TRUE) %>%
|
||||
gsub("Gram positive", "Gram positif", ., fixed = TRUE) %>%
|
||||
gsub("Bacteria", "Bact\u00e9ries", ., fixed = TRUE) %>%
|
||||
@ -482,7 +527,16 @@ mo_translate <- function(x, language) {
|
||||
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram negatives", "Gram negativos desconhecidos", ., fixed = TRUE) %>%
|
||||
gsub("unknown Gram positives", "Gram positivos desconhecidos", ., fixed = TRUE) %>%
|
||||
gsub("(no MO)", "(sem MO)", ., fixed = TRUE) %>%
|
||||
gsub("unknown name", "nome desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("unknown kingdom", "reino desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("unknown phylum", "filo desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("unknown class", "classe desconhecida", ., fixed = TRUE) %>%
|
||||
gsub("unknown order", "ordem desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("unknown family", "fam\u00edlia desconhecida", ., fixed = TRUE) %>%
|
||||
gsub("unknown genus", "g\u00eanero desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("unknown species", "esp\u00e9cies desconhecida", ., fixed = TRUE) %>%
|
||||
gsub("unknown subspecies", "subesp\u00e9cies desconhecida", ., fixed = TRUE) %>%
|
||||
gsub("unknown rank", "classifica\u00e7\u00e3o desconhecido", ., fixed = TRUE) %>%
|
||||
gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>%
|
||||
gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>%
|
||||
gsub("Bacteria", "Bact\u00e9rias", ., fixed = TRUE) %>%
|
||||
|
3
R/rsi.R
3
R/rsi.R
@ -76,6 +76,9 @@ as.rsi <- function(x) {
|
||||
x <- gsub(' +', '', x)
|
||||
# remove all MIC-like values: numbers, operators and periods
|
||||
x <- gsub('[0-9.,;:<=>]+', '', x)
|
||||
# remove everything between brackets, and 'high' and 'low'
|
||||
x <- gsub("([(].*[)])", "", x)
|
||||
x <- gsub("(high|low)", "", x, ignore.case = TRUE)
|
||||
# disallow more than 3 characters
|
||||
x[nchar(x) > 3] <- NA
|
||||
# set to capitals
|
||||
|
Reference in New Issue
Block a user