mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 18:41:58 +02:00
AI improvements
This commit is contained in:
@ -23,12 +23,13 @@
|
||||
#' @param info print progress
|
||||
#' @param rules a character vector that specifies which rules should be applied - one or more of \code{c("breakpoints", "expert", "other", "all")}
|
||||
#' @param verbose a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected
|
||||
#' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Details
|
||||
#' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Antibiotics
|
||||
#' @param col_bactid deprecated, use \code{col_mo} instead.
|
||||
#' @param ... parameters that are passed on to \code{eucast_rules}
|
||||
#' @inheritParams first_isolate
|
||||
#' @details To define antibiotics column names, input a text or use \code{NA} to skip a column (e.g. \code{tica = NA}). Non-existing columns will anyway be skipped with a warning. See the Antibiotics section for an explanation of the abbreviations.
|
||||
#' @section Antibiotics:
|
||||
#' To define antibiotics column names, input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning.
|
||||
#'
|
||||
#' Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code})
|
||||
#'
|
||||
#' \strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}),
|
||||
|
2
R/mdro.R
2
R/mdro.R
@ -23,7 +23,7 @@
|
||||
#' @param country country code to determine guidelines. EUCAST rules will be used when left empty, see Details. Should be or a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).
|
||||
#' @param info print progress
|
||||
#' @inheritParams eucast_rules
|
||||
#' @param metr column name of an antibiotic. Use \code{NA} to skip a column, like \code{tica = NA}. Non-existing columns will anyway be skipped. See the Antibiotics section for an explanation of the abbreviations.
|
||||
#' @param metr column name of an antibiotic, see Antibiotics
|
||||
#' @param ... parameters that are passed on to methods
|
||||
#' @inheritSection eucast_rules Antibiotics
|
||||
#' @details When \code{country} will be left blank, guidelines will be taken from EUCAST Expert Rules Version 3.1 "Intrinsic Resistance and Exceptional Phenotypes Tables" (\url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf}).
|
||||
|
12
R/mic.R
12
R/mic.R
@ -200,12 +200,12 @@ summary.mic <- function(object, ...) {
|
||||
n_total <- x %>% length()
|
||||
x <- x[!is.na(x)]
|
||||
n <- x %>% length()
|
||||
lst <- c('mic',
|
||||
n_total - n,
|
||||
sort(x)[1] %>% as.character(),
|
||||
sort(x)[n] %>% as.character())
|
||||
names(lst) <- c("Mode", "<NA>", "Min.", "Max.")
|
||||
lst
|
||||
c(
|
||||
"Class" = 'mic',
|
||||
"<NA>" = n_total - n,
|
||||
"Min." = sort(x)[1] %>% as.character(),
|
||||
"Max." = sort(x)[n] %>% as.character()
|
||||
)
|
||||
}
|
||||
|
||||
#' @exportMethod plot.mic
|
||||
|
2
R/misc.R
2
R/misc.R
@ -51,7 +51,7 @@ percent <- function(x, round = 1, force_zero = FALSE, ...) {
|
||||
|
||||
check_available_columns <- function(tbl, col.list, info = TRUE) {
|
||||
# check columns
|
||||
col.list <- col.list[!is.na(col.list)]
|
||||
col.list <- col.list[!is.na(col.list) & !is.null(col.list)]
|
||||
names(col.list) <- col.list
|
||||
col.list.bak <- col.list
|
||||
# are they available as upper case or lower case then?
|
||||
|
70
R/mo.R
70
R/mo.R
@ -26,7 +26,7 @@
|
||||
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
|
||||
#'
|
||||
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
|
||||
#' @param allow_uncertain a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.
|
||||
#' @param allow_uncertain a logical to indicate whether the input should be checked for less possible results, see Details
|
||||
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.
|
||||
#' @rdname as.mo
|
||||
#' @aliases mo
|
||||
@ -34,11 +34,11 @@
|
||||
#' @details
|
||||
#' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
|
||||
#' \preformatted{
|
||||
#' Code Full name
|
||||
#' --------------- --------------------------------------
|
||||
#' B_KLBSL Klebsiella
|
||||
#' B_KLBSL_PNE Klebsiella pneumoniae
|
||||
#' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis
|
||||
#' Code Full name
|
||||
#' --------------- --------------------------------------
|
||||
#' B_KLBSL Klebsiella
|
||||
#' B_KLBSL_PNE Klebsiella pneumoniae
|
||||
#' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis
|
||||
#' | | | |
|
||||
#' | | | |
|
||||
#' | | | ----> subspecies, a 3-4 letter acronym
|
||||
@ -57,7 +57,7 @@
|
||||
#' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
|
||||
#' }
|
||||
#'
|
||||
#' A couple of effects because of these rules
|
||||
#' A couple of effects because of these rules:
|
||||
#' \itemize{
|
||||
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
|
||||
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
|
||||
@ -66,6 +66,13 @@
|
||||
#' }
|
||||
#' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
|
||||
#'
|
||||
#' When using \code{allow_uncertain = TRUE} (which is the default setting), it will use additional rules if all previous AI rules failed to get valid results. Examples:
|
||||
#' \itemize{
|
||||
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPTC_GRB}) needs review.}
|
||||
#' \item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
|
||||
#' \item{\code{"D. spartina"}. This is the abbreviation of an old taxonomic name: \emph{Didymosphaeria spartinae} (the last "e" was missing from the input). This fungus was renamed to \emph{Leptosphaeria obiones}, so a warning will be thrown that this result (\code{F_LPTSP_OBI}) needs review.}
|
||||
#' }
|
||||
#'
|
||||
#' \code{guess_mo} is an alias of \code{as.mo}.
|
||||
#' @section ITIS:
|
||||
#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
|
||||
@ -94,6 +101,7 @@
|
||||
#' as.mo("S. aureus")
|
||||
#' as.mo("S aureus")
|
||||
#' as.mo("Staphylococcus aureus")
|
||||
#' as.mo("Staphylococcus aureus (MRSA)")
|
||||
#' as.mo("MRSA") # Methicillin Resistant S. aureus
|
||||
#' as.mo("VISA") # Vancomycin Intermediate S. aureus
|
||||
#' as.mo("VRSA") # Vancomycin Resistant S. aureus
|
||||
@ -136,7 +144,7 @@
|
||||
#' df <- df %>%
|
||||
#' mutate(mo = as.mo(paste(genus, species)))
|
||||
#' }
|
||||
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
|
||||
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = NULL) {
|
||||
mo <- mo_validate(x = x, property = "mo",
|
||||
Becker = Becker, Lancefield = Lancefield,
|
||||
allow_uncertain = allow_uncertain, reference_df = reference_df)
|
||||
@ -155,11 +163,11 @@ is.mo <- function(x) {
|
||||
#' @export
|
||||
guess_mo <- as.mo
|
||||
|
||||
#' @importFrom dplyr %>% pull left_join n_distinct
|
||||
#' @importFrom dplyr %>% pull left_join n_distinct progress_estimated
|
||||
#' @importFrom data.table data.table as.data.table setkey
|
||||
#' @importFrom crayon magenta red italic
|
||||
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
allow_uncertain = FALSE, reference_df = NULL,
|
||||
allow_uncertain = TRUE, reference_df = NULL,
|
||||
property = "mo", clear_options = TRUE) {
|
||||
|
||||
if (!"AMR" %in% base::.packages()) {
|
||||
@ -272,7 +280,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
# cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
|
||||
# cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
|
||||
|
||||
progress <- progress_estimated(n = length(x), min_time = 3)
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
|
||||
progress$tick()$print()
|
||||
|
||||
if (identical(x_trimmed[i], "")) {
|
||||
# empty values
|
||||
x[i] <- NA_character_
|
||||
@ -615,8 +628,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
} else {
|
||||
x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
|
||||
}
|
||||
warning(red(paste0("UNCERTAIN - '",
|
||||
x_backup[i], "' -> ", italic(found[1, name]))),
|
||||
warning(red(paste0('UNCERTAIN - "',
|
||||
x_backup[i], '" -> ', italic(found[1, name]))),
|
||||
call. = FALSE, immediate. = TRUE)
|
||||
renamed_note(name_old = found[1, name],
|
||||
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
|
||||
@ -627,13 +640,17 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
}
|
||||
|
||||
# (2) strip values between brackets ----
|
||||
found <- microorganismsDT[fullname %like% gsub("( [(].*[)]) ", " ", x_withspaces[i])
|
||||
| fullname %like% gsub("( [(].*[)]) ", " ", x_backup[i])
|
||||
| fullname %like% gsub("( [(].*[)]) ", " ", x[i]),]
|
||||
x_backup_stripped <- gsub("( [(].*[)])", "", x_backup[i])
|
||||
x_backup_stripped <- trimws(gsub(" ", " ", x_backup_stripped, fixed = TRUE))
|
||||
x_species_stripped <- gsub("( [(].*[)])", "", x_species[i])
|
||||
x_species_stripped <- trimws(gsub(" ", " ", x_species_stripped, fixed = TRUE))
|
||||
|
||||
found <- microorganismsDT[fullname %like% x_backup_stripped
|
||||
| fullname %like% x_species_stripped,]
|
||||
if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||
x[i] <- found[1, ..property][[1]]
|
||||
warning(red(paste0("UNCERTAIN - '",
|
||||
x_backup[i], "' -> ", italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")),
|
||||
warning(red(paste0('UNCERTAIN - "',
|
||||
x_backup[i], '" -> ', italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")),
|
||||
call. = FALSE, immediate. = TRUE)
|
||||
next
|
||||
}
|
||||
@ -647,8 +664,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE)))
|
||||
if (!is.na(found)) {
|
||||
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||
warning(red(paste0("UNCERTAIN - '",
|
||||
z, "' -> ", italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")),
|
||||
warning(red(paste0('UNCERTAIN - "',
|
||||
z, '" -> ', italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")),
|
||||
call. = FALSE, immediate. = TRUE)
|
||||
return(found[1L])
|
||||
}
|
||||
@ -795,6 +812,21 @@ print.mo <- function(x, ...) {
|
||||
print.default(x, quote = FALSE)
|
||||
}
|
||||
|
||||
#' @exportMethod summary.mo
|
||||
#' @export
|
||||
#' @noRd
|
||||
summary.mo <- function(object, ...) {
|
||||
# unique and top 1-3
|
||||
x <- object
|
||||
top_3 <- unname(top_freq(freq(x), 3))
|
||||
c("Class" = "mo",
|
||||
"<NA>" = length(x[is.na(x)]),
|
||||
"Unique" = dplyr::n_distinct(x[!is.na(x)]),
|
||||
"#1" = top_3[1],
|
||||
"#2" = top_3[2],
|
||||
"#3" = top_3[3])
|
||||
}
|
||||
|
||||
#' @exportMethod as.data.frame.mo
|
||||
#' @export
|
||||
#' @noRd
|
||||
|
19
R/rsi.R
19
R/rsi.R
@ -39,14 +39,20 @@
|
||||
#' barplot(rsi_data) # for frequencies
|
||||
#' freq(rsi_data) # frequency table with informative header
|
||||
#'
|
||||
#' # fastest way to transform all columns with already valid AB results to class `rsi`:
|
||||
#' # using dplyr's mutate
|
||||
#' library(dplyr)
|
||||
#' septic_patients %>%
|
||||
#' mutate_at(vars(peni:rifa), as.rsi)
|
||||
#'
|
||||
#' # fastest way to transform all columns with already valid AB results to class `rsi`:
|
||||
#' septic_patients %>%
|
||||
#' mutate_if(is.rsi.eligible,
|
||||
#' as.rsi)
|
||||
as.rsi <- function(x) {
|
||||
if (is.rsi(x)) {
|
||||
x
|
||||
} else if (identical(levels(x), c("S", "I", "R"))) {
|
||||
structure(x, class = c('rsi', 'ordered', 'factor'))
|
||||
} else {
|
||||
|
||||
x <- x %>% unlist()
|
||||
@ -102,14 +108,15 @@ is.rsi.eligible <- function(x) {
|
||||
| is.numeric(x)
|
||||
| is.mo(x)
|
||||
| identical(class(x), "Date")
|
||||
| identical(levels(x), c("S", "I", "R"))) {
|
||||
| is.rsi(x)) {
|
||||
# no transformation needed
|
||||
FALSE
|
||||
} else {
|
||||
# check all but a-z
|
||||
x <- unique(gsub("[^RSIrsi]+", "", unique(x)))
|
||||
all(x %in% c("R", "I", "S", "", NA_character_)) &
|
||||
!all(x %in% c("", NA_character_))
|
||||
y <- unique(gsub("[^RSIrsi]+", "", unique(x)))
|
||||
!all(y %in% c("", NA_character_)) &
|
||||
all(y %in% c("R", "I", "S", "", NA_character_)) &
|
||||
max(nchar(as.character(x)), na.rm = TRUE) < 8
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,7 +135,7 @@ print.rsi <- function(x, ...) {
|
||||
summary.rsi <- function(object, ...) {
|
||||
x <- object
|
||||
c(
|
||||
"Mode" = 'rsi',
|
||||
"Class" = 'rsi',
|
||||
"<NA>" = sum(is.na(x)),
|
||||
"Sum S" = sum(x == "S", na.rm = TRUE),
|
||||
"Sum IR" = sum(x %in% c("I", "R"), na.rm = TRUE),
|
||||
|
Reference in New Issue
Block a user