From fe803f7279f42202207ef4259b6e452c549caf82 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 19 Mar 2018 21:23:21 +0100 Subject: [PATCH] use guess_bactid for GLIMS codes --- R/data.R | 4 ++-- R/first_isolates.R | 14 ++++++++++---- R/globals.R | 1 + man/bactlist.umcg.Rd | 4 ++-- man/first_isolate.Rd | 8 ++++---- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/R/data.R b/R/data.R index 5dfb1ca0..435f86f2 100644 --- a/R/data.R +++ b/R/data.R @@ -85,14 +85,14 @@ #' Translation table for UMCG with ~1100 microorganisms #' -#' A dataset containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{bactlist}$bactid}, using \code{\link{left_join_bactlist}}. +#' A dataset containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{bactlist}$bactid} (using \code{\link{left_join_bactlist}}). GLIMS codes can also be translated to valid \code{bactid}'s with \code{\link{guess_bactid}}. #' @format A data.frame with 1090 observations and 2 variables: #' \describe{ #' \item{\code{mocode}}{Code of microorganism according to UMCG MMB} #' \item{\code{bactid}}{Code of microorganism in \code{\link{bactlist}}} #' } #' @source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl} -#' @seealso \code{\link{bactlist}} +#' @seealso \code{\link{guess_bactid}} \code{\link{bactlist}} "bactlist.umcg" #' Dataset with 2000 blood culture isolates of septic patients diff --git a/R/first_isolates.R b/R/first_isolates.R index c8d8bbe0..67799511 100644 --- a/R/first_isolates.R +++ b/R/first_isolates.R @@ -41,10 +41,10 @@ #' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. #' #' \strong{DETERMINING WEIGHTED ISOLATES} \cr -#' \strong{1. Using \code{type = "keyantibiotics"} and parameter \code{ignore_I}} \cr -#' To determine weighted isolates, the difference between key antibiotics will be checked. Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I == FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable and fast method. \cr -#' \strong{2. Using \code{type = "points"} and parameter \code{points_threshold}} \cr -#' To determine weighted isolates, difference between antimicrobial interpretations will be measured with points. A difference from I to S|R (or vice versa) means 0.5 points. A difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate. This method is being used by the Infection Prevention department (Dr M. Lokate) of the University Medical Center Groningen (UMCG). +#' \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr +#' To determine weighted isolates, the difference between key antibiotics will be checked. Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr +#' \strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr +#' To determine weighted isolates, difference between antimicrobial interpretations will be measured with points. A difference from I to S|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate. This method is being used by the Infection Prevention department (Dr M. Lokate) of the University Medical Center Groningen (UMCG). #' @keywords isolate isolates first #' @export #' @importFrom dplyr arrange_at lag between row_number filter mutate arrange @@ -676,6 +676,12 @@ guess_bactid <- function(x) { # try only genus, with 'species' attached found <- AMR::bactlist %>% filter(fullname %like% x_species[i]) } + if (nrow(found) == 0) { + # search for GLIMS code + if (toupper(x.bak[i]) %in% toupper(AMR::bactlist.umcg$mocode)) { + found <- AMR::bactlist.umcg %>% filter(toupper(mocode) == toupper(x.bak[i])) + } + } if (nrow(found) == 0) { # try splitting of characters and then find ID # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus diff --git a/R/globals.R b/R/globals.R index ece183b2..2eb98d09 100644 --- a/R/globals.R +++ b/R/globals.R @@ -30,6 +30,7 @@ globalVariables(c('.', 'key_ab_lag', 'key_ab_other', 'mic', + 'mocode', 'n', 'other_pat_or_mo', 'patient_id', diff --git a/man/bactlist.umcg.Rd b/man/bactlist.umcg.Rd index 0e789c65..bc14b777 100644 --- a/man/bactlist.umcg.Rd +++ b/man/bactlist.umcg.Rd @@ -16,9 +16,9 @@ MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - bactlist.umcg } \description{ -A dataset containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{bactlist}$bactid}, using \code{\link{left_join_bactlist}}. +A dataset containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{bactlist}$bactid} (using \code{\link{left_join_bactlist}}). GLIMS codes can also be translated to valid \code{bactid}'s with \code{\link{guess_bactid}}. } \seealso{ -\code{\link{bactlist}} +\code{\link{guess_bactid}} \code{\link{bactlist}} } \keyword{datasets} diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 7083c81d..f8054f89 100644 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -59,10 +59,10 @@ Determine first (weighted) isolates of all microorganisms of every patient per e To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. \strong{DETERMINING WEIGHTED ISOLATES} \cr - \strong{1. Using \code{type = "keyantibiotics"} and parameter \code{ignore_I}} \cr - To determine weighted isolates, the difference between key antibiotics will be checked. Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I == FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable and fast method. \cr - \strong{2. Using \code{type = "points"} and parameter \code{points_threshold}} \cr - To determine weighted isolates, difference between antimicrobial interpretations will be measured with points. A difference from I to S|R (or vice versa) means 0.5 points. A difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate. This method is being used by the Infection Prevention department (Dr M. Lokate) of the University Medical Center Groningen (UMCG). + \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr + To determine weighted isolates, the difference between key antibiotics will be checked. Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr + \strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr + To determine weighted isolates, difference between antimicrobial interpretations will be measured with points. A difference from I to S|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate. This method is being used by the Infection Prevention department (Dr M. Lokate) of the University Medical Center Groningen (UMCG). } \examples{ # septic_patients is a dataset available in the AMR package