mirror of
https://github.com/msberends/AMR.git
synced 2025-08-27 14:22:20 +02:00
(v2.1.1.9082) algorithm updates
This commit is contained in:
@@ -1547,6 +1547,7 @@ add_MO_lookup_to_AMR_env <- function() {
|
||||
MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
|
||||
MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 1.25
|
||||
MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 1.5
|
||||
MO_lookup[which(MO_lookup$kingdom == "Chromista"), "kingdom_index"] <- 1.75
|
||||
MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 2
|
||||
# all the rest
|
||||
MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 3
|
||||
|
@@ -670,6 +670,16 @@ duplicated_antibiogram <- function(antibiogram, points_threshold, ignore_I, type
|
||||
# fast return, only 1 isolate
|
||||
return(FALSE)
|
||||
}
|
||||
stop("Check R/first_isolate.R -> duplicated_antibiogram()")
|
||||
# first sort on data availability - count the dots and order that ascending
|
||||
number_dots <- vapply(FUN.VALUE = integer(1),
|
||||
antibiogram,
|
||||
function(x) sum(strsplit(x, "", fixed = TRUE)[[1]] == "."),
|
||||
USE.NAMES = FALSE)
|
||||
new_order <- order(number_dots, antibiogram)
|
||||
antibiogram.bak <- antibiogram
|
||||
antibiogram <- antibiogram[new_order]
|
||||
|
||||
out <- rep(NA, length(antibiogram))
|
||||
out[1] <- FALSE
|
||||
out[2] <- antimicrobials_equal(antibiogram[1], antibiogram[2],
|
||||
@@ -680,11 +690,6 @@ duplicated_antibiogram <- function(antibiogram, points_threshold, ignore_I, type
|
||||
return(out)
|
||||
}
|
||||
|
||||
# sort after the second one (since we already determined AB equality of the first two)
|
||||
original_sort <- c(1, 2, rank(antibiogram[3:length(antibiogram)]) + 2)
|
||||
antibiogram.bak <- antibiogram
|
||||
antibiogram <- c(antibiogram[1:2], sort(antibiogram[3:length(antibiogram)]))
|
||||
|
||||
# we can skip the duplicates - they are never unique antibiograms of course
|
||||
duplicates <- duplicated(antibiogram)
|
||||
out[3:length(out)][duplicates[3:length(out)] == TRUE] <- TRUE
|
||||
@@ -703,7 +708,7 @@ duplicated_antibiogram <- function(antibiogram, points_threshold, ignore_I, type
|
||||
type = type)))
|
||||
}
|
||||
|
||||
out <- out[original_sort]
|
||||
out <- out[order(new_order)]
|
||||
# rerun duplicated again
|
||||
duplicates <- duplicated(antibiogram.bak)
|
||||
out[duplicates == TRUE] <- TRUE
|
||||
|
3
R/mo.R
3
R/mo.R
@@ -1081,6 +1081,9 @@ convert_colloquial_input <- function(x) {
|
||||
out[x %like_case% "(^| )yeast?"] <- "F_YEAST"
|
||||
out[x %like_case% "(^| )fung(us|i)"] <- "F_FUNGUS"
|
||||
|
||||
# protozoa
|
||||
out[x %like_case% "protozo"] <- "P_PROTOZOAN" # to hit it with most languages, and "protozo" does not occur in the microorganisms data set for anything else
|
||||
|
||||
# trivial names known to the field
|
||||
out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
|
||||
out[x %like_case% "gono[ck]o[ck]"] <- "B_NESSR_GNRR"
|
||||
|
@@ -30,12 +30,11 @@
|
||||
#' Calculate the Matching Score for Microorganisms
|
||||
#'
|
||||
#' This algorithm is used by [as.mo()] and all the [`mo_*`][mo_property()] functions to determine the most probable match of taxonomic records based on user input.
|
||||
#' @author Dr. Matthijs Berends, 2018
|
||||
#' @param x Any user input value(s)
|
||||
#' @param n A full taxonomic name, that exists in [`microorganisms$fullname`][microorganisms]
|
||||
#' @note This algorithm was originally described in: Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}.
|
||||
#' @note This algorithm was originally developed in 2018 and subsequently described in: Berends MS *et al.* (2022). **AMR: An R Package for Working with Antimicrobial Resistance Data**. *Journal of Statistical Software*, 104(3), 1-31; \doi{10.18637/jss.v104.i03}.
|
||||
#'
|
||||
#' Later, the work of Bartlett A *et al.* about bacterial pathogens infecting humans (2022, \doi{10.1099/mic.0.001269}) was incorporated.
|
||||
#' Later, the work of Bartlett A *et al.* about bacterial pathogens infecting humans (2022, \doi{10.1099/mic.0.001269}) was incorporated, and optimalisations to the algorithm were made.
|
||||
#' @section Matching Score for Microorganisms:
|
||||
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
|
||||
#'
|
||||
@@ -50,16 +49,17 @@
|
||||
#' * \eqn{l_n} is the length of \eqn{n};
|
||||
#' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
|
||||
#' * \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
|
||||
#' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 1.25, Protozoa = 1.5, Archaea = 2, others = 3.
|
||||
#' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 1.25, Protozoa = 1.5, Chromista = 1.75, Archaea = 2, others = 3.
|
||||
#'
|
||||
#' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups:
|
||||
#'
|
||||
#' - **Established**, if a taxonomic species has infected at least three persons in three or more references. These records have `prevalence = 1.0` in the [microorganisms] data set;
|
||||
#' - **Established**, if a taxonomic species has infected at least three persons in three or more references. These records have `prevalence = 1.15` in the [microorganisms] data set;
|
||||
#' - **Putative**, if a taxonomic species has fewer than three known cases. These records have `prevalence = 1.25` in the [microorganisms] data set.
|
||||
#'
|
||||
#' Furthermore,
|
||||
#'
|
||||
#' - Any genus present in the **established** list also has `prevalence = 1.0` in the [microorganisms] data set;
|
||||
#' - Genera from the World Health Organization's (WHO) Priority Pathogen List have `prevalence = 1.0` in the [microorganisms] data set;
|
||||
#' - Any genus present in the **established** list also has `prevalence = 1.15` in the [microorganisms] data set;
|
||||
#' - Any other genus present in the **putative** list has `prevalence = 1.25` in the [microorganisms] data set;
|
||||
#' - Any other species or subspecies of which the genus is present in the two aforementioned groups, has `prevalence = 1.5` in the [microorganisms] data set;
|
||||
#' - Any *non-bacterial* genus, species or subspecies of which the genus is present in the following list, has `prevalence = 1.25` in the [microorganisms] data set: `r vector_or(MO_RELEVANT_GENERA, quotes = "*")`;
|
||||
|
@@ -442,8 +442,8 @@ mo_pathogenicity <- function(x, language = get_AMR_locale(), keep_synonyms = get
|
||||
kngd <- AMR_env$MO_lookup$kingdom[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||
rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||
|
||||
out <- factor(case_when_AMR(prev == 1 & kngd == "Bacteria" & rank != "genus" ~ "Pathogenic",
|
||||
(prev < 2 & kngd == "Fungi") ~ "Potentially pathogenic",
|
||||
out <- factor(case_when_AMR(prev <= 1.15 & kngd == "Bacteria" & rank != "genus" ~ "Pathogenic",
|
||||
prev < 2 & kngd == "Fungi" ~ "Potentially pathogenic",
|
||||
prev == 2 & kngd == "Bacteria" ~ "Non-pathogenic",
|
||||
kngd == "Bacteria" ~ "Potentially pathogenic",
|
||||
TRUE ~ "Unknown"),
|
||||
|
BIN
R/sysdata.rda
BIN
R/sysdata.rda
Binary file not shown.
Reference in New Issue
Block a user