From 1ce4b72dd2be1b30a86627247c06d22c6dd06357 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 8 Aug 2019 22:39:42 +0200 Subject: [PATCH] (v0.7.1.9031) include_unknown for first_isolate() --- DESCRIPTION | 65 +++------ NEWS.md | 7 +- R/ab.R | 2 +- R/first_isolate.R | 193 +++++++++++++------------- R/mo.R | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/SPSS.html | 2 +- docs/articles/index.html | 2 +- docs/articles/resistance_predict.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 12 +- docs/reference/first_isolate.html | 10 +- docs/reference/index.html | 2 +- git_merge.sh | 4 +- man/first_isolate.Rd | 8 +- tests/testthat/test-first_isolate.R | 20 +++ 17 files changed, 173 insertions(+), 164 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bf339b15..173ee8d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,53 +1,26 @@ Package: AMR -Version: 0.7.1.9030 +Version: 0.7.1.9031 Date: 2019-08-08 Title: Antimicrobial Resistance Analysis Authors@R: c( - person( - given = c("Matthijs", "S."), - family = "Berends", - email = "m.s.berends@umcg.nl", - role = c("aut", "cre"), - comment = c(ORCID = "0000-0001-7620-1800")), - person( - given = c("Christian", "F."), - family = "Luz", - email = "c.f.luz@umcg.nl", - role = "aut", - comment = c(ORCID = "0000-0001-5809-5995")), - person( - given = "Corinna", - family = "Glasner", - email = "c.glasner@umcg.nl", - role = c("aut", "ths"), - comment = c(ORCID = "0000-0003-1241-1328")), - person( - given = c("Alex", "W."), - family = "Friedrich", - email = "alex.friedrich@umcg.nl", - role = c("aut", "ths"), - comment = c(ORCID = "0000-0003-4881-038X")), - person( - given = c("Bhanu", "N.", "M."), - family = "Sinha", - email = "b.sinha@umcg.nl", - role = c("aut", "ths"), - comment = c(ORCID = "0000-0003-1634-0010")), - person( - given = c("Erwin", "E.", "A."), - family = "Hassing", - email = "e.hassing@certe.nl", - role = "ctb"), - person( - given = c("Bart", "C."), - family = "Meijer", - email = "b.meijerg@certe.nl", - role = "ctb"), - person( - given = "Dennis", - family = "Souverein", - email = "d.souvereing@streeklabhaarlem.nl", - role = "ctb")) + person(role = c("aut", "cre"), + family = "Berends", given = c("Matthijs", "S."), email = "m.s.berends@umcg.nl", comment = c(ORCID = "0000-0001-7620-1800")), + person(role = "aut", + family = "Luz", given = c("Christian", "F."), email = "c.f.luz@umcg.nl", comment = c(ORCID = "0000-0001-5809-5995")), + person(role = c("aut", "ths"), + family = "Glasner", given = "Corinna", email = "c.glasner@umcg.nl", comment = c(ORCID = "0000-0003-1241-1328")), + person(role = c("aut", "ths"), + family = "Friedrich", given = c("Alex", "W."), email = "alex.friedrich@umcg.nl", comment = c(ORCID = "0000-0003-4881-038X")), + person(role = c("aut", "ths"), + family = "Sinha", given = c("Bhanu", "N.", "M."), email = "b.sinha@umcg.nl", comment = c(ORCID = "0000-0003-1634-0010")), + person(role = "ctb", + family = "Hassing", given = c("Erwin", "E.", "A."), email = "e.hassing@certe.nl"), + person(role = "ctb", + family = "Lenglet", given = "Annick", email = "annick.lenglet@amsterdam.msf.org"), + person(role = "ctb", + family = "Meijer", given = c("Bart", "C."), email = "b.meijerg@certe.nl"), + person(role = "ctb", + family = "Souverein", given = "Dennis", email = "d.souvereing@streeklabhaarlem.nl")) Description: Functions to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial properties by using evidence-based methods. diff --git a/NEWS.md b/NEWS.md index 8d177188..011d851c 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,8 @@ -# AMR 0.7.1.9030 +# AMR 0.7.1.9031 ### Breaking * Function `freq()` has moved to a new package, [`clean`](https://github.com/msberends/clean) ([CRAN link](https://cran.r-project.org/package=clean)). Creating frequency tables is actually not the scope of this package (never was) and this function has matured a lot over the last two years. Therefore, a new package was created for data cleaning and checking and it perfectly fits the `freq()` function. The [`clean`](https://github.com/msberends/clean) package is available on CRAN and will be installed automatically when updating the `AMR` package, that now imports it. In a later stage, the `skewness()` and `kurtosis()` functions will be moved to the `clean` package too. +* Determination of first isolates now **excludes** all 'unknown' microorganisms at default, i.e. microbial code `"UNKNOWN"`. They can be included with the new parameter `include_unknown`: `first_isolates(..., include_unknown = TRUE)`. For WHONET users, this means that all records with organism code `"con"` (*contamination*) will be excluded at default, since `as.mo("con") = "UNKNOWN"`. ### New * Additional way to calculate co-resistance, i.e. when using multiple antibiotics as input for `portion_*` functions or `count_*` functions. This can be used to determine the empiric susceptibily of a combination therapy. A new parameter `only_all_tested` (**which defaults to `FALSE`**) replaces the old `also_single_tested` and can be used to select one of the two methods to count isolates and calculate portions. The difference can be seen in this example table (which is also on the `portion` and `count` help pages), where the %SI is being determined: @@ -36,7 +37,7 @@ ### Changed * Added more informative errors and warnings to `eucast_rules()` -* Fixed a bug in `eucast_rules()` where antibiotic columns would be read as lists instead of characters +* Fixed a bug in `eucast_rules()` for *Yersinia pseudotuberculosis* * Added tibble printing support for classes `rsi`, `mic`, `ab` and `mo`. When using tibbles containing antibiotic columns, values `S` will print in green, values `I` will print in yellow and values `R` will print in red: ```r (run this on your own console, as this page does not support colour printing) @@ -65,7 +66,7 @@ * Using factors as input for `eucast_rules()` now adds missing factors levels when the function changes antibiotic results #### Other -* Added Dr Bart Meijer and Dr Dennis Souverein as contributors +* Added Dr Bart Meijer, Dr Dennis Souverein and Annick Lenglet as contributors # AMR 0.7.1 diff --git a/R/ab.R b/R/ab.R index b2481507..19936a30 100755 --- a/R/ab.R +++ b/R/ab.R @@ -297,6 +297,6 @@ type_sum.ab <- function(x) { #' @export pillar_shaft.ab <- function(x, ...) { out <- format(x) - out[is.na(x)] <- NA + out[is.na(x)] <- pillar::style_na("NA") pillar::new_pillar_shaft_simple(out, align = "left", min_width = 4) } diff --git a/R/first_isolate.R b/R/first_isolate.R index 2625e455..f9abcd15 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -30,7 +30,7 @@ #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU) #' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this. -#' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again +#' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see Source. #' @param testcodes_exclude character vector with test codes that should be excluded (case-insensitive) #' @param icu_exclude logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu}) #' @param specimen_group value in column \code{col_specimen} to filter on @@ -38,10 +38,13 @@ #' @param ignore_I logical to determine whether antibiotic interpretations with \code{"I"} will be ignored when \code{type = "keyantibiotics"}, see Details #' @param points_threshold points until the comparison of key antibiotics will lead to inclusion of an isolate when \code{type = "points"}, see Details #' @param info print progress +#' @param include_unknown logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code \code{"UNKNOWN"}, which defaults to \code{FALSE}. For WHONET users, this means that all records with organism code \code{"con"} (\emph{contamination}) will be excluded at default. Isolates with a microbial ID of \code{NA} will always be excluded as first isolate. #' @param ... parameters passed on to the \code{first_isolate} function #' @details \strong{WHY THIS IS SO IMPORTANT} \cr #' To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. #' +#' All isolates with a microbial ID of \code{NA} will be excluded as first isolate. +#' #' The functions \code{filter_first_isolate} and \code{filter_first_weighted_isolate} are helper functions to quickly filter on first isolates. The function \code{filter_first_isolate} is essentially equal to: #' \preformatted{ #' x \%>\% @@ -170,6 +173,7 @@ first_isolate <- function(x, ignore_I = TRUE, points_threshold = 2, info = TRUE, + include_unknown = FALSE, ...) { if (!is.data.frame(x)) { @@ -215,7 +219,7 @@ first_isolate <- function(x, # WHONET support x <- x %>% mutate(patient_id = paste(`First name`, `Last name`, Sex)) col_patient_id <- "patient_id" - message(blue(paste0("NOTE: Using combined columns ", bold("`First name`, `Last name` and `Sex`"), " as input for `col_patient_id`."))) + message(blue(paste0("NOTE: Using combined columns `", bold("First name"), "`, `", bold("Last name"), "` and `", bold("Sex"), "` as input for `col_patient_id`."))) } else { col_patient_id <- search_type_in_df(x = x, type = "patient_id") } @@ -260,15 +264,14 @@ first_isolate <- function(x, check_columns_existance(col_icu) check_columns_existance(col_keyantibiotics) - # join to microorganisms data set + # create new dataframe with original row index x <- x %>% - mutate_at(vars(col_mo), as.mo) %>% - left_join_microorganisms(by = col_mo) %>% - # empty species will lead to first = FALSE, so put in text there if genus is available - mutate(species = ifelse(!is.na(genus) & species == "", "species", species)) - col_genus <- "genus" - col_species <- "species" - + mutate(newvar_row_index = 1:nrow(x), + newvar_mo = x %>% pull(col_mo) %>% as.mo(), + newvar_genus_species = paste(mo_genus(newvar_mo), mo_species(newvar_mo)), + newvar_date = x %>% pull(col_date), + newvar_patient_id = x %>% pull(col_patient_id)) + if (is.null(col_testcode)) { testcodes_exclude <- NULL } @@ -303,16 +306,7 @@ first_isolate <- function(x, testcodes_exclude <- '' } - # create new dataframe with original row index and right sorting - x <- x %>% - mutate(first_isolate_row_index = 1:nrow(x), - date_lab = x %>% pull(col_date), - patient_id = x %>% pull(col_patient_id), - species = x %>% pull(col_species), - genus = x %>% pull(col_genus)) %>% - mutate(species = if_else(is.na(species) | species == "(no MO)", "", species), - genus = if_else(is.na(genus) | genus == "(no MO)", "", genus)) - + # arrange data to the right sorting if (is.null(specimen_group)) { # not filtering on specimen if (icu_exclude == FALSE) { @@ -320,10 +314,9 @@ first_isolate <- function(x, cat('[Criterion] Included isolates from ICU.\n') } x <- x %>% - arrange_at(c(col_patient_id, - col_genus, - col_species, - col_date)) + arrange(newvar_patient_id, + newvar_genus_species, + newvar_date) row.start <- 1 row.end <- nrow(x) } else { @@ -332,10 +325,9 @@ first_isolate <- function(x, } x <- x %>% arrange_at(c(col_icu, - col_patient_id, - col_genus, - col_species, - col_date)) + "newvar_patient_id", + "newvar_genus_species", + "newvar_date")) suppressWarnings( row.start <- which(x %>% pull(col_icu) == FALSE) %>% min(na.rm = TRUE) @@ -353,10 +345,9 @@ first_isolate <- function(x, } x <- x %>% arrange_at(c(col_specimen, - col_patient_id, - col_genus, - col_species, - col_date)) + "newvar_patient_id", + "newvar_genus_species", + "newvar_date")) suppressWarnings( row.start <- which(x %>% pull(col_specimen) == specimen_group) %>% min(na.rm = TRUE) ) @@ -370,10 +361,9 @@ first_isolate <- function(x, x <- x %>% arrange_at(c(col_icu, col_specimen, - col_patient_id, - col_genus, - col_species, - col_date)) + "newvar_patient_id", + "newvar_genus_species", + "newvar_date")) suppressWarnings( row.start <- which(x %>% pull(col_specimen) == specimen_group & x %>% pull(col_icu) == FALSE) %>% min(na.rm = TRUE) @@ -386,27 +376,28 @@ first_isolate <- function(x, } + # no isolates found if (abs(row.start) == Inf | abs(row.end) == Inf) { if (info == TRUE) { message(paste("=> Found", bold("no isolates"))) } - # NAs where genus is unavailable - return(x %>% - mutate(real_first_isolate = if_else(genus == '', NA, FALSE)) %>% - pull(real_first_isolate) - ) + return(rep(FALSE, nrow(x))) } + + # did find some isolates - add new index numbers of rows + x <- x %>% mutate(newvar_row_index_sorted = 1:nrow(.)) # suppress warnings because dplyr wants us to use library(dplyr) when using filter(row_number()) - suppressWarnings( - scope.size <- x %>% - filter( - row_number() %>% between(row.start, - row.end), - genus != "", - species != "") %>% - nrow() - ) + #suppressWarnings( + scope.size <- row.end - row.start + 1 + # x %>% + # filter( + # row_number() %>% between(row.start, + # row.end), + # newvar_genus != "", + # newvar_species != "") %>% + # nrow() + # ) identify_new_year = function(x, episode_days) { # I asked on StackOverflow: @@ -432,15 +423,13 @@ first_isolate <- function(x, # Analysis of first isolate ---- all_first <- x %>% - mutate(other_pat_or_mo = if_else(patient_id == lag(patient_id) - & genus == lag(genus) - & species == lag(species), + mutate(other_pat_or_mo = if_else(newvar_patient_id == lag(newvar_patient_id) + & newvar_genus_species == lag(newvar_genus_species), FALSE, TRUE)) %>% - group_by_at(vars(patient_id, - genus, - species)) %>% - mutate(more_than_episode_ago = identify_new_year(x = date_lab, + group_by(newvar_patient_id, + newvar_genus_species) %>% + mutate(more_than_episode_ago = identify_new_year(x = newvar_date, episode_days = episode_days)) %>% ungroup() @@ -461,41 +450,36 @@ first_isolate <- function(x, } } type_param <- type - # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) - suppressWarnings( - all_first <- all_first %>% - mutate(key_ab_lag = lag(key_ab)) %>% - mutate(key_ab_other = !key_antibiotics_equal(y = key_ab, - z = key_ab_lag, - type = type_param, - ignore_I = ignore_I, - points_threshold = points_threshold, - info = info)) %>% - mutate( - real_first_isolate = - if_else( - between(row_number(), row.start, row.end) - & genus != "" - & species != "" - & (other_pat_or_mo | more_than_episode_ago | key_ab_other), - TRUE, - FALSE)) - ) + + all_first <- all_first %>% + mutate(key_ab_lag = lag(key_ab)) %>% + mutate(key_ab_other = !key_antibiotics_equal(y = key_ab, + z = key_ab_lag, + type = type_param, + ignore_I = ignore_I, + points_threshold = points_threshold, + info = info)) %>% + mutate( + real_first_isolate = + if_else( + newvar_row_index_sorted %>% between(row.start, row.end) + & newvar_genus_species != "" + & (other_pat_or_mo | more_than_episode_ago | key_ab_other), + TRUE, + FALSE)) + } else { # no key antibiotics - # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) - suppressWarnings( - all_first <- all_first %>% - mutate( + all_first <- all_first %>% + mutate( real_first_isolate = if_else( - between(row_number(), row.start, row.end) - & genus != "" - & species != "" + newvar_row_index_sorted %>% between(row.start, row.end) + & newvar_genus_species != "" & (other_pat_or_mo | more_than_episode_ago), TRUE, FALSE)) - ) + } # first one as TRUE @@ -507,18 +491,39 @@ first_isolate <- function(x, if (icu_exclude == TRUE) { all_first[which(all_first[, col_icu] == TRUE), 'real_first_isolate'] <- FALSE } - - # NAs where genus is unavailable + + decimal.mark <- getOption("OutDec") + big.mark <- ifelse(decimal.mark != ",", ",", ".") + + # handle empty microorganisms + if (any(all_first$newvar_mo == "UNKNOWN", na.rm = TRUE)) { + if (include_unknown == TRUE) { + message(blue(paste0("NOTE: Included ", format(sum(all_first$newvar_mo == "UNKNOWN"), + decimal.mark = decimal.mark, big.mark = big.mark), + ' isolates with a microbial ID "UNKNOWN" (column `', bold(col_mo), '`).'))) + } else { + message(blue(paste0("NOTE: Excluded ", format(sum(all_first$newvar_mo == "UNKNOWN"), + decimal.mark = decimal.mark, big.mark = big.mark), + ' isolates with a microbial ID "UNKNOWN" (column `', bold(col_mo), '`).'))) + + } + } + all_first[which(all_first$newvar_mo == "UNKNOWN"), 'real_first_isolate'] <- include_unknown + + # exclude all NAs + if (any(is.na(all_first$newvar_mo))) { + message(blue(paste0("NOTE: Excluded ", format(sum(is.na(all_first$newvar_mo)), + decimal.mark = decimal.mark, big.mark = big.mark), + ' isolates with a microbial ID "NA" (column `', bold(col_mo), '`).'))) + } + all_first[which(is.na(all_first$newvar_mo)), 'real_first_isolate'] <- FALSE + + # arrange back according to original sorting again all_first <- all_first %>% - mutate(real_first_isolate = if_else(genus %in% c('', '(no MO)', NA), NA, real_first_isolate)) - - all_first <- all_first %>% - arrange(first_isolate_row_index) %>% + arrange(newvar_row_index) %>% pull(real_first_isolate) - + if (info == TRUE) { - decimal.mark <- getOption("OutDec") - big.mark <- ifelse(decimal.mark != ",", ",", ".") n_found <- base::sum(all_first, na.rm = TRUE) p_found_total <- percent(n_found / nrow(x), force_zero = TRUE) p_found_scope <- percent(n_found / scope.size, force_zero = TRUE) diff --git a/R/mo.R b/R/mo.R index 3ba245dc..1c183c58 100755 --- a/R/mo.R +++ b/R/mo.R @@ -1520,7 +1520,7 @@ type_sum.mo <- function(x) { #' @export pillar_shaft.mo <- function(x, ...) { out <- format(x) - out[is.na(x)] <- NA + out[is.na(x)] <- pillar::style_na("NA") pillar::new_pillar_shaft_simple(out, align = "left", min_width = 11) } diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 41b7e4cd..64f51976 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031 diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index 3f3fbfee..f55c4ba2 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -40,7 +40,7 @@ AMR (for R) - 0.7.1.9029 + 0.7.1.9031 diff --git a/docs/articles/index.html b/docs/articles/index.html index 8a032ca7..4604b553 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031 diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html index acea92ff..1772b39f 100644 --- a/docs/articles/resistance_predict.html +++ b/docs/articles/resistance_predict.html @@ -40,7 +40,7 @@ AMR (for R) - 0.7.1.9029 + 0.7.1.9031 diff --git a/docs/authors.html b/docs/authors.html index a10c8647..fdede158 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031 diff --git a/docs/index.html b/docs/index.html index 4223d3d4..fcda1106 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031 diff --git a/docs/news/index.html b/docs/news/index.html index 59be947b..4de949ed 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031 @@ -225,15 +225,16 @@ -
+

-AMR 0.7.1.9030 Unreleased +AMR 0.7.1.9031 Unreleased

Breaking

  • Function freq() has moved to a new package, clean (CRAN link). Creating frequency tables is actually not the scope of this package (never was) and this function has matured a lot over the last two years. Therefore, a new package was created for data cleaning and checking and it perfectly fits the freq() function. The clean package is available on CRAN and will be installed automatically when updating the AMR package, that now imports it. In a later stage, the skewness() and kurtosis() functions will be moved to the clean package too.
  • +
  • Selection of first isolates now excludes all ‘unknown’ microorganisms at default, i.e. microbial codes NA and "UNKNOWN". They can be included with the new parameter include_unknown: first_isolates(..., include_unknown = TRUE). For WHONET users, this means that all records with microbial codes "xxx" (no growth) and "con" (contamination) will be excluded at default.
@@ -275,7 +276,8 @@
  • Added more informative errors and warnings to eucast_rules()
  • -
  • Fixed a bug in eucast_rules() where antibiotic columns would be read as lists instead of characters
  • +
  • Fixed a bug in eucast_rules() for Yersinia pseudotuberculosis +
  • Added tibble printing support for classes rsi, mic, ab and mo. When using tibbles containing antibiotic columns, values S will print in green, values I will print in yellow and values R will print in red:

    (run this on your own console, as this page does not support colour printing)
    @@ -1226,7 +1228,7 @@ Using as.mo(..., allow_uncertain = 3)
           

    Contents

    @@ -239,7 +239,7 @@ col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365, testcodes_exclude = NULL, icu_exclude = FALSE, specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE, - points_threshold = 2, info = TRUE, ...) + points_threshold = 2, info = TRUE, include_unknown = FALSE, ...) filter_first_isolate(x, col_date = NULL, col_patient_id = NULL, col_mo = NULL, ...) @@ -285,7 +285,7 @@ episode_days -

    episode in days after which a genus/species combination will be determined as 'first isolate' again

    +

    episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see Source.

    testcodes_exclude @@ -315,6 +315,10 @@ info

    print progress

    + + include_unknown +

    logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code "UNKNOWN", which defaults to FALSE. For WHONET users, this means that all records with organism code "con" (contamination) will be excluded at default.

    + ...

    parameters passed on to the first_isolate function

    diff --git a/docs/reference/index.html b/docs/reference/index.html index 5aec4a66..ff6bb240 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9030 + 0.7.1.9031
diff --git a/git_merge.sh b/git_merge.sh index cc6755f7..69ff7fe8 100755 --- a/git_merge.sh +++ b/git_merge.sh @@ -10,15 +10,15 @@ # git stash --quiet # go to master -git add . -git commit -a -m "website update" --quiet git checkout master --quiet echo "• changed branch to master" + # import everything from premaster git merge premaster --quiet # and send it to git git push --quiet echo "• pushed changes to master" + # return to premaster git checkout premaster --quiet echo "• changed branch back to premaster" diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index 47d8435d..fa95c148 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -14,7 +14,7 @@ first_isolate(x, col_date = NULL, col_patient_id = NULL, col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365, testcodes_exclude = NULL, icu_exclude = FALSE, specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE, - points_threshold = 2, info = TRUE, ...) + points_threshold = 2, info = TRUE, include_unknown = FALSE, ...) filter_first_isolate(x, col_date = NULL, col_patient_id = NULL, col_mo = NULL, ...) @@ -40,7 +40,7 @@ filter_first_weighted_isolate(x, col_date = NULL, \item{col_keyantibiotics}{column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.} -\item{episode_days}{episode in days after which a genus/species combination will be determined as 'first isolate' again} +\item{episode_days}{episode in days after which a genus/species combination will be determined as 'first isolate' again. The default of 365 days is based on the guideline by CLSI, see Source.} \item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)} @@ -56,6 +56,8 @@ filter_first_weighted_isolate(x, col_date = NULL, \item{info}{print progress} +\item{include_unknown}{logical to determine whether 'unknown' microorganisms should be included too, i.e. microbial code \code{"UNKNOWN"}, which defaults to \code{FALSE}. For WHONET users, this means that all records with organism code \code{"con"} (\emph{contamination}) will be excluded at default. Isolates with a microbial ID of \code{NA} will always be excluded as first isolate.} + \item{...}{parameters passed on to the \code{first_isolate} function} } \value{ @@ -68,6 +70,8 @@ Determine first (weighted) isolates of all microorganisms of every patient per e \strong{WHY THIS IS SO IMPORTANT} \cr To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}. +All isolates with a microbial ID of \code{NA} will be excluded as first isolate. + The functions \code{filter_first_isolate} and \code{filter_first_weighted_isolate} are helper functions to quickly filter on first isolates. The function \code{filter_first_isolate} is essentially equal to: \preformatted{ x \%>\% diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index bd2283c0..8572f2f2 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -187,5 +187,25 @@ test_that("first isolates work", { info = TRUE), na.rm = TRUE), 1322) + + # unknown MOs + expect_equal(septic_patients %>% + mutate(mo = ifelse(mo == "B_ESCHR_COL", "UNKNOWN", mo)) %>% + mutate(first = first_isolate(., include_unknown = FALSE)) %>% + .$first %>% + sum(), + 1062) + expect_equal(septic_patients %>% + mutate(mo = ifelse(mo == "B_ESCHR_COL", "UNKNOWN", mo)) %>% + mutate(first = first_isolate(., include_unknown = TRUE)) %>% + .$first %>% + sum(), + 1529) + expect_equal(septic_patients %>% + mutate(mo = ifelse(mo == "B_ESCHR_COL", NA, mo)) %>% + mutate(first = first_isolate(.)) %>% + .$first %>% + sum(), + 1062) })