diff --git a/NEWS.md b/NEWS.md index f304ac52..4acd3280 100755 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,7 @@ * There will be looked for uncertain results at default - these results will be returned with an informative warning * Manual now contains more info about the algorithms * Progress bar will be shown when it takes more than 3 seconds to get results + * Support for formatted console text * Function `first_isolate`: * Will now use a column named like "patid" for the patient ID (parameter `col_patientid`), when this parameter was left blank * Will now use a column named like "key(...)ab" or "key(...)antibiotics" for the key antibiotics (parameter `col_keyantibiotics`), when this parameter was left blank diff --git a/R/mo.R b/R/mo.R index e13e07d7..344c3360 100644 --- a/R/mo.R +++ b/R/mo.R @@ -204,6 +204,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } } + notes <- character(0) failures <- character(0) x_input <- x # only check the uniques, which is way faster @@ -393,7 +394,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_trimmed[i])) { # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L] - base::message(magenta(paste0("Note: ", italic(x_trimmed[i]), " will be considered a subspecies of ", italic("Salmonella enterica"), " (B_SLMNL_ENT)"))) + notes <- c(notes, + magenta(paste0("Note: ", italic(x_trimmed[i]), + " was considered (a subspecies of) ", + italic("Salmonella enterica"), + " (B_SLMNL_ENT)"))) next } } @@ -605,75 +610,78 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } else { x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]] } - renamed_note(name_old = found[1, name], - name_new = microorganismsDT[tsn == found[1, tsn_new], fullname], - ref_old = found[1, ref], - ref_new = microorganismsDT[tsn == found[1, tsn_new], ref], - mo = microorganismsDT[tsn == found[1, tsn_new], mo]) + notes <- c(notes, + renamed_note(name_old = found[1, name], + name_new = microorganismsDT[tsn == found[1, tsn_new], fullname], + ref_old = found[1, ref], + ref_new = microorganismsDT[tsn == found[1, tsn_new], ref], + mo = microorganismsDT[tsn == found[1, tsn_new], mo])) next } # check for uncertain results ---- if (allow_uncertain == TRUE) { - # (1) look again for old taxonomic names, now for G. species ---- - found <- microorganisms.oldDT[name %like% x_withspaces[i] - | name %like% x_withspaces_start[i] - | name %like% x[i],] - if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) { - if (property == "ref") { - # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: - # mo_ref("Chlamydia psittaci) = "Page, 1968" (with warning) - # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999" - x[i] <- found[1, ref] - } else { - x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]] + + uncertain_fn <- function(x_backup, x_trimmed, x_withspaces, x_withspaces_start) { + # (1) look again for old taxonomic names, now for G. species ---- + found <- microorganisms.oldDT[name %like% x_withspaces + | name %like% x_withspaces_start + | name %like% x,] + if (NROW(found) > 0 & nchar(x_trimmed) >= 6) { + if (property == "ref") { + # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so: + # mo_ref("Chlamydia psittaci) = "Page, 1968" (with warning) + # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999" + x <- found[1, ref] + } else { + x <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]] + } + warning(red(paste0('UNCERTAIN - "', + x_backup, '" -> ', italic(found[1, name]))), + call. = FALSE, immediate. = FALSE) + notes <<- c(notes, + renamed_note(name_old = found[1, name], + name_new = microorganismsDT[tsn == found[1, tsn_new], fullname], + ref_old = found[1, ref], + ref_new = microorganismsDT[tsn == found[1, tsn_new], ref], + mo = microorganismsDT[tsn == found[1, tsn_new], mo])) + return(x) } - warning(red(paste0('UNCERTAIN - "', - x_backup[i], '" -> ', italic(found[1, name]))), - call. = FALSE, immediate. = TRUE) - renamed_note(name_old = found[1, name], - name_new = microorganismsDT[tsn == found[1, tsn_new], fullname], - ref_old = found[1, ref], - ref_new = microorganismsDT[tsn == found[1, tsn_new], ref], - mo = microorganismsDT[tsn == found[1, tsn_new], mo]) - next - } - # (2) strip values between brackets ---- - x_backup_stripped <- gsub("( [(].*[)])", "", x_backup[i]) - x_backup_stripped <- trimws(gsub(" ", " ", x_backup_stripped, fixed = TRUE)) - x_species_stripped <- gsub("( [(].*[)])", "", x_species[i]) - x_species_stripped <- trimws(gsub(" ", " ", x_species_stripped, fixed = TRUE)) + # (2) strip values between brackets ---- + x_backup_stripped <- gsub("( [(].*[)])", "", x_backup) + x_backup_stripped <- trimws(gsub(" ", " ", x_backup_stripped, fixed = TRUE)) + found <- suppressMessages(suppressWarnings(exec_as.mo(x_backup_stripped, clear_options = FALSE))) + if (!is.na(found) & nchar(x_trimmed) >= 6) { + found <- microorganismsDT[mo == found, ..property][[1]] + warning(red(paste0('UNCERTAIN - "', + x_backup, '" -> ', italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")), + call. = FALSE, immediate. = FALSE) + return(found[1L]) + } - found <- microorganismsDT[fullname %like% x_backup_stripped - | fullname %like% x_species_stripped,] - if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) { - x[i] <- found[1, ..property][[1]] - warning(red(paste0('UNCERTAIN - "', - x_backup[i], '" -> ', italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")), - call. = FALSE, immediate. = TRUE) - next - } - - # (3) try to strip off one element and check the remains ---- - look_for_part <- function(z) { - x_strip <- z %>% strsplit(" ") %>% unlist() - if (length(x_strip) > 1 & nchar(x_trimmed[i]) >= 6) { - for (i in 1:(length(x_strip) - 1)) { - x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ") - found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE))) - if (!is.na(found)) { - found <- microorganismsDT[mo == found, ..property][[1]] - warning(red(paste0('UNCERTAIN - "', - z, '" -> ', italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")), - call. = FALSE, immediate. = TRUE) - return(found[1L]) + # (3) try to strip off one element and check the remains ---- + look_for_part <- function(z) { + x_strip <- z %>% strsplit(" ") %>% unlist() + if (length(x_strip) > 1 & nchar(x_trimmed) >= 6) { + for (i in 1:(length(x_strip) - 1)) { + x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ") + found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE))) + if (!is.na(found)) { + found <- microorganismsDT[mo == found, ..property][[1]] + warning(red(paste0('UNCERTAIN - "', + z, '" -> ', italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")), + call. = FALSE, immediate. = FALSE) + return(found[1L]) + } } } + return(NA_character_) } - return(NA_character_) + return(look_for_part(x_backup)) } - x[i] <- look_for_part(x_backup[i]) + + x[i] <- uncertain_fn(x_backup[i], x_trimmed[i], x_withspaces[i], x_withspaces_start[i]) if (!is.na(x[i])) { next } @@ -773,6 +781,13 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, x <- as.integer(x) } + if (length(notes > 0)) { + notes <- sort(notes) + for (i in 1:length(notes)) { + base::message(notes[i]) + } + } + x } @@ -798,7 +813,7 @@ renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "" msg_plain <- paste0(name_old, ref_old, " -> ", name_new, ref_new) msg_plain <- c(getOption("mo_renamed", character(0)), msg_plain) options(mo_renamed = sort(msg_plain)) - base::message(blue(paste("Note:", msg))) + return(blue(paste("Note:", msg))) } #' @exportMethod print.mo