1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 11:51:59 +02:00

Fixes for Salmonella

This commit is contained in:
2022-12-17 14:31:33 +01:00
parent 5f3a7694aa
commit 23fe427cbc
42 changed files with 375 additions and 445 deletions

View File

@ -61,7 +61,7 @@ availability <- function(tbl, width = NULL) {
R_print[is.na(R)] <- ""
if (is.null(width)) {
width <- options()$width -
width <- getOption("width", 100) -
(max(nchar(colnames(tbl))) +
# count col
8 +

View File

@ -662,7 +662,7 @@ eucast_rules <- function(x,
if (rule_group_current %unlike% "other" && eucast_notification_shown == FALSE) {
cat(
paste0(
"\n", font_grey(strrep("-", 0.95 * options()$width)), "\n",
"\n", font_grey(strrep("-", 0.95 * getOption("width", 100))), "\n",
word_wrap("Rules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)")), "\n",
font_blue("https://eucast.org/"), "\n"
)
@ -936,7 +936,7 @@ eucast_rules <- function(x,
wouldve <- ""
}
cat(paste0("\n", font_grey(strrep("-", 0.95 * options()$width)), "\n"))
cat(paste0("\n", font_grey(strrep("-", 0.95 * getOption("width", 100))), "\n"))
cat(word_wrap(paste0(
"The rules ", paste0(wouldve, "affected "),
font_bold(
@ -1008,7 +1008,7 @@ eucast_rules <- function(x,
cat("\n")
}
cat(paste0(font_grey(strrep("-", 0.95 * options()$width)), "\n"))
cat(paste0(font_grey(strrep("-", 0.95 * getOption("width", 100))), "\n"))
if (isFALSE(verbose) && total_n_added + total_n_changed > 0) {
cat("\n", word_wrap("Use ", font_bold("eucast_rules(..., verbose = TRUE)"), " (on your original data) to get a data.frame with all specified edits instead."), "\n\n", sep = "")

63
R/mo.R
View File

@ -268,11 +268,11 @@ as.mo <- function(x,
# take out the parts, split by space
x_parts <- strsplit(gsub("-", " ", x_out, fixed = TRUE), " ", fixed = TRUE)[[1]]
# do a pre-match on first character (and if it contains a space, first chars of first two terms)
if (length(x_parts) %in% c(2, 3)) {
# for genus + species + subspecies
filtr <- which(AMR_env$MO_lookup$full_first == substr(x_parts[1], 1, 1) & AMR_env$MO_lookup$species_first == substr(x_parts[2], 1, 1))
filtr <- which(AMR_env$MO_lookup$full_first == substr(x_parts[1], 1, 1) & (AMR_env$MO_lookup$species_first == substr(x_parts[2], 1, 1) | AMR_env$MO_lookup$subspecies_first == substr(x_parts[2], 1, 1)))
} else if (length(x_parts) > 3) {
first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]")
filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
@ -307,9 +307,13 @@ as.mo <- function(x,
} else {
minimum_matching_score_current <- minimum_matching_score
}
m[m < minimum_matching_score_current] <- NA_real_
top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
if (sum(m >= minimum_matching_score_current) > 10) {
# at least 10 are left over, make the ones under `m` NA
m[m < minimum_matching_score_current] <- NA_real_
}
top_hits <- mo_to_search[order(m, decreasing = TRUE, na.last = NA)] # na.last = NA will remove the NAs
if (length(top_hits) == 0) {
warning_("No hits found for \"", x_search, "\" with minimum_matching_score = ", ifelse(is.null(minimum_matching_score), paste0("NULL (=", round(min(minimum_matching_score_current, na.rm = TRUE), 3), ")"), minimum_matching_score), ". Try setting this value lower or even to 0.", call = FALSE)
result_mo <- NA_character_
@ -357,7 +361,7 @@ as.mo <- function(x,
if (length(AMR_env$mo_uncertainties$original_input) <= 3) {
examples <- vector_and(paste0(
'"', AMR_env$mo_uncertainties$original_input,
'" (assumed ', font_italic(AMR_env$mo_uncertainties$fullname, collapse = NULL), ")"
'" (assumed ', italicise(AMR_env$mo_uncertainties$fullname), ")"
),
quotes = FALSE
)
@ -377,10 +381,12 @@ as.mo <- function(x,
} # end of loop over all yet unknowns
# Keep or replace synonyms ----
gbif_matches <- AMR::microorganisms$gbif_renamed_to[match(out, AMR::microorganisms$mo)]
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
# GBIF only for non-bacteria, since we use LPSN as primary source for bacteria
# (example is Strep anginosus, renamed according to GBIF, not according to LPSN)
gbif_matches <- AMR::microorganisms$gbif_renamed_to[AMR::microorganisms$kingdom != "Bacteria"][match(out, AMR::microorganisms$mo[AMR::microorganisms$kingdom != "Bacteria"])]
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
AMR_env$mo_renamed <- list(
old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
@ -795,7 +801,7 @@ print.mo_uncertainties <- function(x, ...) {
text[scores < 0.5] <- font_red_bg(text[scores < 0.5], collapse = NULL)
text
}
txt <- ""
for (i in seq_len(nrow(x))) {
if (x[i, ]$candidates != "") {
@ -803,7 +809,7 @@ print.mo_uncertainties <- function(x, ...) {
scores <- mo_matching_score(x = x[i, ]$input, n = candidates)
n_candidates <- length(candidates)
candidates_formatted <- font_italic(candidates, collapse = NULL)
candidates_formatted <- italicise(candidates)
scores_formatted <- trimws(formatC(round(scores, 3), format = "f", digits = 3))
scores_formatted <- score_set_colour(scores_formatted, scores)
@ -824,7 +830,8 @@ print.mo_uncertainties <- function(x, ...) {
""
)
),
extra_indent = nchar("Also matched: ")
extra_indent = nchar("Also matched: "),
width = 0.9 * getOption("width", 100)
)
} else {
candidates <- ""
@ -838,11 +845,11 @@ print.mo_uncertainties <- function(x, ...) {
txt <- paste(txt,
paste0(
paste0(
"", strrep(font_grey("-"), times = options()$width), "\n",
"", strrep(font_grey("-"), times = getOption("width", 100)), "\n",
'"', x[i, ]$original_input, '"',
" -> ",
paste0(
font_bold(font_italic(x[i, ]$fullname)),
font_bold(italicise(x[i, ]$fullname)),
" (", x[i, ]$mo, ", ", score_set_colour(score_formatted, score), ")"
)
),
@ -887,7 +894,7 @@ print.mo_renamed <- function(x, extra_txt = "", n = 25, ...) {
message_(
"The following microorganism", ifelse(NROW(x) > 1, "s were", " was"), " taxonomically renamed", extra_txt, ":\n",
paste0(" \u2022 ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows],
paste0(" ", AMR_env$bullet_icon, " ", font_italic(x$old[rows], collapse = NULL), x$ref_old[rows],
" -> ", font_italic(x$new[rows], collapse = NULL), x$ref_new[rows],
collapse = "\n"
),
@ -924,6 +931,18 @@ convert_colloquial_input <- function(x) {
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
out[x %like_case% "(viridans.* (strepto|^s).*|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# Salmonella in different languages, like "Salmonella grupo B"
out[x %like_case% "salmonella.* [bcd]$"] <- gsub(".*salmonella.* ([bcd])$",
"B_SLMNL_GRP\\U\\1",
x[x %like_case% "salmonella.* [bcd]$"],
perl = TRUE
)
out[x %like_case% "group [bcd] salmonella"] <- gsub(".*group ([bcd]) salmonella*",
"B_SLMNL_GRP\\U\\1",
x[x %like_case% "group [bcd] salmonella"],
perl = TRUE
)
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
@ -931,18 +950,15 @@ convert_colloquial_input <- function(x) {
# Gram stains
out[x %like_case% "gram[ -]?neg.*"] <- "B_GRAMN"
out[x %like_case% "( |^)gram[-]( |$)"] <- "B_GRAMN"
out[x %like_case% "gram[ -]?pos.*"] <- "B_GRAMP"
out[x %like_case% "( |^)gram[+]( |$)"] <- "B_GRAMP"
out[is.na(out) & x %like_case% "anaerob[a-z]+ (micro)?.*organism"] <- "B_ANAER"
# yeasts and fungi
out[x %like_case% "^yeast?"] <- "F_YEAST"
out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
# Salmonella city names, starting with capital species name - they are all S. enterica
out[x.bak %like_case% "[sS]almonella " & x %like% "paratyphi"] <- "B_SLMNL_ENTR"
out[x.bak %like_case% "[sS]almonella [A-Z][a-z]+ ?.*" & x %unlike% "typhi"] <- "B_SLMNL_ENTR"
out[x %like_case% "salmonella group"] <- "B_SLMNL"
# trivial names known to the field
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
@ -958,6 +974,15 @@ convert_colloquial_input <- function(x) {
out
}
italicise <- function(x) {
out <- font_italic(x, collapse = NULL)
out[x %like_case% "Salmonella [A-Z]"] <- paste(font_italic("Salmonella"),
gsub("Salmonella ", "", x[x %like_case% "Salmonella [A-Z]"]))
out[x %like_case% "Streptococcus [A-Z]"] <- paste(font_italic("Streptococcus"),
gsub("Streptococcus ", "", x[x %like_case% "Streptococcus [A-Z]"]))
out
}
nr2char <- function(x) {
if (x %in% c(1:10)) {
v <- c(

Binary file not shown.

View File

@ -214,8 +214,8 @@ create_MO_lookup <- function() {
MO_lookup$fullname_lower <- MO_FULLNAME_LOWER
}
MO_lookup$full_first <- substr(MO_lookup$fullname_lower, 1, 1)
MO_lookup$species_first <- substr(MO_lookup$species, 1, 1)
MO_lookup$species_first <- tolower(substr(MO_lookup$species, 1, 1)) # tolower for groups (Streptococcus, Salmonella)
MO_lookup$subspecies_first <- tolower(substr(MO_lookup$subspecies, 1, 1)) # tolower for Salmonella serovars
MO_lookup
}