1
0
mirror of https://github.com/msberends/AMR.git synced 2026-05-14 03:10:50 +02:00

Fix #287 (complex fallback) and #288 (species epithet scoring bias)

#287: as.mo() now strips " complex" from input when that exact complex
is not in the taxonomy and retries with the bare name, so inputs like
"Proteus vulgaris complex" no longer return NA.

#288: mo_matching_score() applies a ×2 bonus when the input has an
abbreviated genus (≤3 chars) and the candidate's species epithet exactly
matches the input species epithet. This ensures "S. apiospermum" resolves
to Scedosporium apiospermum rather than Staphylococcus aureus, overcoming
the kingdom/prevalence denominator bias in favour of common bacteria.

https://claude.ai/code/session_01VH4Ju4Xq9aW1AHuoVbjGEo
This commit is contained in:
Claude
2026-05-06 15:11:31 +00:00
parent 155c2707ce
commit b3b8d301ff
5 changed files with 46 additions and 5 deletions

9
R/mo.R
View File

@@ -322,6 +322,15 @@ as.mo <- function(x,
return(as.character(MO_lookup_current$mo[match(x_out, MO_lookup_current$fullname_lower)]))
}
# Issue #287: "X complex" is not a distinct taxon - strip " complex" and try "X"
if (grepl(" complex$", x_out, ignore.case = FALSE)) {
x_out <- sub(" complex$", "", x_out)
x_search_cleaned <- sub(" [Cc]omplex$", "", x_search_cleaned)
if (x_out %in% MO_lookup_current$fullname_lower) {
return(as.character(MO_lookup_current$mo[match(x_out, MO_lookup_current$fullname_lower)]))
}
}
# input must not be too short
if (nchar(x_out) < 3) {
return("UNKNOWN")