diff --git a/DESCRIPTION b/DESCRIPTION index 6cb646a5d..786d353e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 3.0.1.9079 +Version: 3.0.1.9080 Date: 2026-06-27 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 06b1b43b2..0d470520a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9079 +# AMR 3.0.1.9080 Planned as v3.1.0, end of June 2026. @@ -32,6 +32,7 @@ Planned as v3.1.0, end of June 2026. * `as.mo()`: * Input of the form `"X complex"` now falls back to `"X"` when the complex is not a distinct taxon in the database, preventing `NA` results for valid clinical descriptions such as `"Proteus vulgaris complex"` (#287) * Abbreviated-genus input (e.g. `"S. apiospermum"`) now correctly ranks candidates whose species epithet exactly matches the input above more-prevalent organisms whose species does not match; fixes `"S. apiospermum"` resolving to *Staphylococcus* instead of *Scedosporium apiospermum* (#288) + * Abbreviated-genus input for species that have subspecies (e.g. `"P. ovale"`) now collapses to the species-rank record instead of incorrectly matching a more-prevalent organism; explicit subspecies queries (e.g. `"P. ovale curtisi"`) are preserved (#288) * `get_author_year()` in the microorganism reproduction script now strips `emend.` and everything after it, so `ref` reflects the combination authority rather than the emendation author (e.g. *Rhodococcus equi* now returns "Goodfellow et al., 1977" instead of "Nouioui et al., 2018") * BRMO classification now includes bacterial complexes (#275) * Translation fixes for Italian CoNS/CoPS names (#256), Dutch antimicrobials, and `sir_df()` foreign-language output (#272) diff --git a/R/mo.R b/R/mo.R index e80c7b5a2..5a5c81217 100755 --- a/R/mo.R +++ b/R/mo.R @@ -352,16 +352,34 @@ as.mo <- function(x, (MO_lookup_current$species_first == substr(x_parts[2], 1, 1) | MO_lookup_current$subspecies_first == substr(x_parts[2], 1, 1) | MO_lookup_current$subspecies_first == substr(x_parts[3], 1, 1))) - # Issue #288: if the species (and subspecies) word(s) in the input exactly match - # exactly one candidate, use only that candidate and bypass the 0.55 cutoff. - # This prevents prevalent bacteria from outranking a rarer organism whose species - # epithet is an unambiguous exact match, e.g. "S. apiospermum" → Scedosporium. + # Issue #288 (extended): if the species (and subspecies) word(s) in the input + # exactly match candidates that all belong to one and the same genus, bypass the + # 0.55 cutoff. A species together with its subspecies/autonyms (e.g. Plasmodium + # ovale + curtisi + wallikeri) is the same taxon, so for a genus+species input we + # collapse to the species-rank record (subspecies == ""). This prevents prevalent + # bacteria from outranking a rarer organism whose species epithet is an + # unambiguous exact match, e.g. "S. apiospermum" -> Scedosporium, "P. ovale" -> + # Plasmodium ovale. If two different genera share the epithet, the genus check + # stays FALSE and the normal matching score arbitrates. sp_exact <- tolower(MO_lookup_current$species[filtr]) == x_parts[2] if (length(x_parts) == 3) { sp_exact <- sp_exact & tolower(MO_lookup_current$subspecies[filtr]) == x_parts[3] } - if (sum(sp_exact) == 1) { - filtr <- filtr[sp_exact] + exact_idx <- filtr[sp_exact] + if (length(exact_idx) >= 1 && + length(unique(MO_lookup_current$genus_lower[exact_idx])) == 1) { + if (length(x_parts) == 2) { + # genus + species only: collapse to the species-rank record (subspecies == "") + is_species_rank <- MO_lookup_current$subspecies[exact_idx] == "" + if (any(is_species_rank)) { + filtr <- exact_idx[is_species_rank][1] + } else { + filtr <- exact_idx[1] + } + } else { + # explicit subspecies given, unambiguous within the genus + filtr <- exact_idx[1] + } minimum_matching_score <- 0 } } else { diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index 5b4a787bd..babe42e81 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -142,3 +142,9 @@ test_that("test-data.R", { # x <- check_non_ascii() %>% # filter(file %unlike% "^(data-raw|docs|git_)") }) + +test_that("taxonomic name columns contain no NA (empty string is used instead)", { + for (col in c("domain", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies")) { + expect_false(anyNA(microorganisms[[col]]), info = col) + } +}) diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index 3f98ec6b5..49a90e5e8 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -338,3 +338,30 @@ test_that("test-mo.R", { ) } }) + +test_that("as.mo() resolves abbreviated genus when species carries subspecies (#288 follow-up)", { + # "P. ovale" must resolve to Plasmodium ovale, not a Pseudomonas species, + # even though P. ovale has subspecies (curtisi, wallikeri) sharing the epithet. + expect_identical( + as.mo("P. ovale", keep_synonyms = TRUE, info = FALSE), + as.mo("Plasmodium ovale", keep_synonyms = TRUE, info = FALSE) + ) + expect_identical( + mo_name("P. ovale", keep_synonyms = TRUE, language = NULL), + "Plasmodium ovale" + ) + + # Non-regression: the original #288 example must still work. + expect_identical( + mo_genus("S. apiospermum", keep_synonyms = TRUE, language = NULL), + "Scedosporium" + ) + + # Explicit subspecies must not be collapsed to species rank. + if (any(microorganisms$fullname == "Plasmodium ovale curtisi")) { + expect_identical( + mo_name("P. ovale curtisi", keep_synonyms = TRUE, language = NULL), + "Plasmodium ovale curtisi" + ) + } +})