1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 14:01:55 +02:00

new class bactid

This commit is contained in:
2018-07-23 14:14:03 +02:00
parent 40de1b4ac2
commit 8421638b60
21 changed files with 408 additions and 251 deletions

View File

@ -16,37 +16,58 @@
# GNU General Public License for more details. #
# ==================================================================== #
#' Find bacteria ID based on genus/species
#' Transform to bacteria ID
#'
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' @param x character vector or a dataframe with one or two columns
#' Use this function to determine a valid ID based on a genus (and species). This input can be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' @param x a character vector or a dataframe with one or two columns
#' @rdname as.bactid
#' @details Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. For example:
#' \itemize{
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
#' \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
#' }
#' Moreover, this function also supports ID's based on only Gram stain, when the species is not known. \cr
#' For example, \code{"Gram negative rods"} and \code{"GNR"} will both return the ID of a Gram negative rod: \code{GNR}.
#' @export
#' @importFrom dplyr %>% filter pull
#' @return Character (vector).
#' @return Character (vector) with class \code{"bactid"}. Unknown values will return \code{NA}.
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' guess_bactid("stau")
#' guess_bactid("STAU")
#' guess_bactid("staaur")
#' guess_bactid("S. aureus")
#' guess_bactid("S aureus")
#' guess_bactid("Staphylococcus aureus")
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
#' as.bactid("stau")
#' as.bactid("STAU")
#' as.bactid("staaur")
#' as.bactid("S. aureus")
#' as.bactid("S aureus")
#' as.bactid("Staphylococcus aureus")
#' as.bactid("MRSA") # Methicillin Resistant S. aureus
#' as.bactid("VISA") # Vancomycin Intermediate S. aureus
#' as.bactid("VRSA") # Vancomycin Resistant S. aureus
#'
#' \dontrun{
#' df$bactid <- guess_bactid(df$microorganism_name)
#' df$bactid <- as.bactid(df$microorganism_name)
#'
#' # the select function of tidyverse is also supported:
#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
#' library(dplyr)
#' df$bactid <- df %>%
#' select(microorganism_name) %>%
#' as.bactid()
#'
#' # and can even contain 2 columns, which is convenient for genus/species combinations:
#' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
#' df$bactid <- df %>%
#' select(genus, species) %>%
#' as.bactid()
#'
#' # same result:
#' df <- df %>% mutate(bactid = paste(genus, species)) %>% guess_bactid())
#' df <- df %>%
#' mutate(bactid = paste(genus, species) %>%
#' as.bactid())
#' }
guess_bactid <- function(x) {
as.bactid <- function(x) {
failures <- character(0)
if (NCOL(x) == 2) {
# support tidyverse selection like: df %>% select(colA, colB)
@ -60,17 +81,19 @@ guess_bactid <- function(x) {
if (NCOL(x) > 2) {
stop('`x` can be 2 columns at most', call. = FALSE)
}
# support tidyverse selection like: df %>% select(colA)
if (!is.vector(x)) {
x <- pull(x, 1)
}
}
x.fullbackup <- x
# remove dots and other non-text in case of "E. coli" except spaces
x <- gsub("[^a-zA-Z ]+", "", x)
x <- gsub("[^a-zA-Z0-9 ]+", "", x)
# but spaces before and after should be omitted
x <- trimws(x, which = "both")
x.bak <- x
x.backup <- x
# replace space by regex sign
x <- gsub(" ", ".*", x, fixed = TRUE)
# add start and stop
@ -96,42 +119,44 @@ guess_bactid <- function(x) {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
if (tolower(x[i]) %like% 'coagulase') {
# coerce S. coagulase negative
if (tolower(x[i]) %like% 'coagulase'
| tolower(x[i]) %like% 'cns'
| tolower(x[i]) %like% 'cons') {
# coerce S. coagulase negative, also as CNS and CoNS
x[i] <- 'Coagulase Negative Staphylococcus (CNS)'
}
# translate known trivial names to genus+species
if (!is.na(x.bak[i])) {
if (toupper(x.bak[i]) == 'MRSA'
| toupper(x.bak[i]) == 'VISA'
| toupper(x.bak[i]) == 'VRSA') {
if (!is.na(x.backup[i])) {
if (toupper(x.backup[i]) == 'MRSA'
| toupper(x.backup[i]) == 'VISA'
| toupper(x.backup[i]) == 'VRSA') {
x[i] <- 'Staphylococcus aureus'
}
if (toupper(x.bak[i]) == 'MRSE') {
if (toupper(x.backup[i]) == 'MRSE') {
x[i] <- 'Staphylococcus epidermidis'
}
if (toupper(x.bak[i]) == 'VRE') {
if (toupper(x.backup[i]) == 'VRE') {
x[i] <- 'Enterococcus'
}
if (toupper(x.bak[i]) == 'MRPA') {
if (toupper(x.backup[i]) == 'MRPA') {
# multi resistant P. aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
if (toupper(x.bak[i]) == 'PISP'
| toupper(x.bak[i]) == 'PRSP') {
if (toupper(x.backup[i]) == 'PISP'
| toupper(x.backup[i]) == 'PRSP') {
# peni resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
if (toupper(x.bak[i]) == 'VISP'
| toupper(x.bak[i]) == 'VRSP') {
if (toupper(x.backup[i]) == 'VISP'
| toupper(x.backup[i]) == 'VRSP') {
# vanco resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
}
# let's try the ID's first
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
found <- AMR::microorganisms %>% filter(bactid == x.backup[i])
if (nrow(found) == 0) {
# now try exact match
@ -152,38 +177,82 @@ guess_bactid <- function(x) {
}
if (nrow(found) == 0) {
# search for GLIMS code
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
if (toupper(x.backup[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.backup[i]))
}
}
if (nrow(found) == 0) {
# try splitting of characters and then find ID
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
x_split <- x
x_length <- nchar(x.bak[i])
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
x_length <- nchar(x.backup[i])
x_split[i] <- paste0(x.backup[i] %>% substr(1, x_length / 2) %>% trimws(),
'.* ',
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
x.backup[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
}
if (nrow(found) == 0) {
# try any match with text before and after original search string
# so "negative rods" will be "GNR"
if (x.bak[i] %like% "^Gram") {
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
if (x.backup[i] %like% "^Gram") {
x.backup[i] <- gsub("^Gram", "", x.backup[i], ignore.case = TRUE)
# remove leading and trailing spaces again
x.bak[i] <- trimws(x.bak[i], which = "both")
x.backup[i] <- trimws(x.backup[i], which = "both")
}
if (!is.na(x.bak[i])) {
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
if (!is.na(x.backup[i])) {
found <- AMR::microorganisms %>% filter(fullname %like% x.backup[i])
}
}
if (nrow(found) != 0) {
if (nrow(found) != 0 & x.backup[i] != "") {
x[i] <- as.character(found[1, 'bactid'])
} else {
x[i] <- ""
x[i] <- NA_character_
failures <- c(failures, x.fullbackup[i])
}
}
failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0) {
warning("These values could not be coerced to a valid bactid: ",
paste('"', unique(failures), '"', sep = "", collapse = ', '),
".",
call. = FALSE)
}
class(x) <- "bactid"
attr(x, 'package') <- 'AMR'
attr(x, 'package.version') <- packageDescription('AMR')$Version
x
}
#' @rdname as.bactid
#' @export
guess_bactid <- as.bactid
#' @rdname as.bactid
#' @export
is.bactid <- function(x) {
identical(class(x), "bactid")
}
#' @exportMethod print.bactid
#' @export
#' @noRd
print.bactid <- function(x, ...) {
cat("Class 'bactid'\n")
print.default(as.character(x), quote = FALSE)
}
#' @exportMethod as.data.frame.bactid
#' @export
#' @noRd
as.data.frame.bactid <- function (x, ...) {
# same as as.data.frame.character but with removed stringsAsFactors
nm <- paste(deparse(substitute(x), width.cutoff = 500L),
collapse = " ")
if (!"nm" %in% names(list(...))) {
as.data.frame.vector(x, ..., nm = nm)
} else {
as.data.frame.vector(x, ...)
}
}

View File

@ -201,9 +201,10 @@ EUCAST_rules <- function(tbl,
}
# join to microorganisms table
joinby <- colnames(AMR::microorganisms)[1]
names(joinby) <- col_bactid
tbl <- tbl %>% left_join(y = AMR::microorganisms, by = joinby, suffix = c("_tempmicroorganisms", ""))
if (!tbl %>% pull(col_bactid) %>% is.bactid()) {
tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid()
}
tbl <- tbl %>% left_join_microorganisms(by = col_bactid, suffix = c("_tempmicroorganisms", ""))
# antibiotic classes
aminoglycosides <- c(tobr, gent, kana, neom, neti, siso)

View File

@ -22,7 +22,7 @@
#' @param tbl a \code{data.frame} containing isolates.
#' @param col_date column name of the result date (or date that is was received on the lab)
#' @param col_patient_id column name of the unique IDs of the patients
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
#' @param col_bactid column name of the unique IDs of the microorganisms: \code{bactid}'s. If this column has another class than \code{"bactid"}, values will be coerced using \code{\link{as.bactid}}.
#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
#' @param col_specimen column name of the specimen type or group
#' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
@ -126,7 +126,7 @@ first_isolate <- function(tbl,
# bactid OR genus+species must be available
if (is.na(col_bactid) & (is.na(col_genus) | is.na(col_species))) {
stop('`col_bactid or both `col_genus` and `col_species` must be available.')
stop('`col_bactid` or both `col_genus` and `col_species` must be available.')
}
# check if columns exist
@ -152,6 +152,9 @@ first_isolate <- function(tbl,
check_columns_existance(col_keyantibiotics)
if (!is.na(col_bactid)) {
if (!tbl %>% pull(col_bactid) %>% is.bactid()) {
tbl[, col_bactid] <- tbl %>% pull(col_bactid) %>% as.bactid()
}
tbl <- tbl %>% left_join_microorganisms(by = col_bactid)
col_genus <- "genus"
col_species <- "species"

View File

@ -273,8 +273,11 @@ frequency_tbl <- function(x,
} else {
NAs <- x[is.na(x)]
}
if (na.rm == TRUE) {
x_class <- class(x)
x <- x[!x %in% NAs]
class(x) <- x_class
}
if (missing(sort.count) & 'factor' %in% class(x)) {

View File

@ -26,8 +26,8 @@
#' df2 <- left_join_microorganisms(df, "bacteria_id")
#' colnames(df2)
inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -36,7 +36,9 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
} else {
joinby <- by
}
join <- dplyr::inner_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
join <- suppressWarnings(
dplyr::inner_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
)
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
@ -46,8 +48,8 @@ inner_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -56,7 +58,9 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
} else {
joinby <- by
}
join <- dplyr::left_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
join <- suppressWarnings(
dplyr::left_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
)
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
@ -66,8 +70,8 @@ left_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -76,7 +80,9 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
} else {
joinby <- by
}
join <- dplyr::right_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
join <- suppressWarnings(
dplyr::right_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
)
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
@ -86,8 +92,8 @@ right_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...
#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -96,7 +102,9 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
} else {
joinby <- by
}
join <- dplyr::full_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
join <- suppressWarnings(
dplyr::full_join(x = x, y = AMR::microorganisms, by = joinby, suffix = c("2", ""), ...)
)
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
@ -106,8 +114,8 @@ full_join_microorganisms <- function(x, by = 'bactid', suffix = c("2", ""), ...)
#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = 'bactid', ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -116,14 +124,16 @@ semi_join_microorganisms <- function(x, by = 'bactid', ...) {
} else {
joinby <- by
}
dplyr::semi_join(x = x, y = AMR::microorganisms, by = joinby, ...)
suppressWarnings(
dplyr::semi_join(x = x, y = AMR::microorganisms, by = joinby, ...)
)
}
#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = 'bactid', ...) {
if (any(class(x) %in% c('character', 'factor'))) {
x <- data.frame(bactid = x, stringsAsFactors = FALSE)
if (!any(class(x) %in% c("bactid", "data.frame", "matrix"))) {
x <- data.frame(bactid = as.bactid(x), stringsAsFactors = FALSE)
}
# no name set to `by` parameter
if (is.null(names(by))) {
@ -132,5 +142,7 @@ anti_join_microorganisms <- function(x, by = 'bactid', ...) {
} else {
joinby <- by
}
dplyr::anti_join(x = x, y = AMR::microorganisms, by = joinby, ...)
suppressWarnings(
dplyr::anti_join(x = x, y = AMR::microorganisms, by = joinby, ...)
)
}