diff --git a/DESCRIPTION b/DESCRIPTION index ee33b2ab..2cbd24a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.9.0.9004 -Date: 2019-12-20 +Version: 0.9.0.9005 +Date: 2019-12-21 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 7c4b99cb..3beea51d 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ -# AMR 0.9.0.9004 -## Last updated: 20-Dec-2019 +# AMR 0.9.0.9005 +## Last updated: 21-Dec-2019 ### Changes -* Speed improvement for `as.mo()` (and consequently all `mo_*` functions that use `as.mo()` internally) +* Speed improvement for `as.mo()` (and consequently all `mo_*` functions that use `as.mo()` internally), especially for the *G. species* format (G for genus), like *E. coli* and *K penumoniae* +* Input values for `as.disk()` limited to a maximum of 50 millimeters # AMR 0.9.0 diff --git a/R/disk.R b/R/disk.R index 2104dc44..15e32ad3 100644 --- a/R/disk.R +++ b/R/disk.R @@ -21,12 +21,12 @@ #' Class 'disk' #' -#' This transforms a vector to a new class [`disk`], which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 99. +#' This transforms a vector to a new class [`disk`], which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 50. #' @rdname as.disk #' @param x vector #' @param na.rm a logical indicating whether missing values should be removed #' @details Interpret disk values as RSI values with [as.rsi()]. It supports guidelines from EUCAST and CLSI. -#' @return Ordered integer factor with new class [`disk`] +#' @return An [`integer`] with additional new class [`disk`] #' @aliases disk #' @export #' @seealso [as.rsi()] @@ -56,8 +56,8 @@ as.disk <- function(x, na.rm = FALSE) { # force it to be integer x <- suppressWarnings(as.integer(x)) - # disks can never be less than 9 mm (size of a disk) or more than 50 mm - x[x < 6 | x > 99] <- NA_integer_ + # disks can never be less than 6 mm (size of smallest disk) or more than 50 mm + x[x < 6 | x > 50] <- NA_integer_ na_after <- length(x[is.na(x)]) if (na_before != na_after) { diff --git a/R/mo.R b/R/mo.R index 8962fcf0..b2a11fa6 100755 --- a/R/mo.R +++ b/R/mo.R @@ -228,7 +228,7 @@ as.mo <- function(x, & isFALSE(Lancefield)) { # check previously found results y <- mo_hist - + } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary y <- mo_validate(x = x, property = "mo", @@ -273,7 +273,7 @@ exec_as.mo <- function(x, disable_mo_history = getOption("AMR_disable_mo_history", FALSE), debug = FALSE, reference_data_to_use = microorganismsDT) { - + load_AMR_package() # WHONET: xxx = no growth @@ -391,7 +391,9 @@ exec_as.mo <- function(x, } else if (all(x %in% reference_data_to_use$mo)) { # existing mo codes when not looking for property "mo", like mo_genus("B_ESCHR_COL") - y <- reference_data_to_use[prevalence == 1][data.table(mo = x), on = "mo", ..property][[1]] + y <- reference_data_to_use[prevalence == 1][data.table(mo = x), + on = "mo", + ..property][[1]] if (any(is.na(y))) { y[is.na(y)] <- reference_data_to_use[prevalence == 2][data.table(mo = x[is.na(y)]), on = "mo", @@ -420,21 +422,29 @@ exec_as.mo <- function(x, } else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") - x <- reference_data_to_use[data.table(fullname_lower = tolower(x)), on = "fullname_lower", ..property][[1]] + x <- reference_data_to_use[data.table(fullname_lower = tolower(x)), + on = "fullname_lower", + ..property][[1]] } else if (all(toupper(x) %in% AMR::microorganisms.codes$code)) { # commonly used MO codes - y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), on = "code", ] + y <- as.data.table(AMR::microorganisms.codes)[data.table(code = toupper(x)), + on = "code", ] # save them to history set_mo_history(x, y$mo, 0, force = force_mo_history, disable = disable_mo_history) - x <- reference_data_to_use[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]] + x <- reference_data_to_use[data.table(mo = y[["mo"]]), + on = "mo", + ..property][[1]] } else if (all(x %in% microorganisms.translation$mo_old)) { # is an old mo code, used in previous versions of this package old_mo_warning <- TRUE - y <- as.data.table(microorganisms.translation)[data.table(mo_old = x), on = "mo_old", "mo_new"][[1]] - y <- reference_data_to_use[data.table(mo = y), on = "mo", ..property][[1]] + y <- as.data.table(microorganisms.translation)[data.table(mo_old = x), + on = "mo_old", "mo_new"][[1]] + y <- reference_data_to_use[data.table(mo = y), + on = "mo", + ..property][[1]] # don't save to history, as all items are already in microorganisms.translation x <- y @@ -557,7 +567,7 @@ exec_as.mo <- function(x, } progress <- progress_estimated(n = length(x), min_time = 3) - + for (i in seq_len(length(x))) { progress$tick()$print() @@ -580,7 +590,8 @@ exec_as.mo <- function(x, next } - found <- reference_data_to_use[mo == toupper(x_backup[i]), ..property][[1]] + found <- reference_data_to_use[mo == toupper(x_backup[i]), + ..property][[1]] # is a valid MO code if (length(found) > 0) { x[i] <- found[1L] @@ -590,17 +601,19 @@ exec_as.mo <- function(x, if (x_backup[i] %in% microorganisms.translation$mo_old) { # is an old mo code, used in previous versions of this package old_mo_warning <- TRUE - found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"], ..property][[1]] + found <- reference_data_to_use[mo == microorganisms.translation[which(microorganisms.translation$mo_old == x_backup[i]), "mo_new"], + ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] # don't save to history, as all items are already in microorganisms.translation next } } - + if (toupper(x_backup_untouched[i]) %in% microorganisms.codes$code) { # is a WHONET code, like "HA-" - found <- microorganismsDT[mo == microorganisms.codes[which(microorganisms.codes$code == toupper(x_backup_untouched[i])), "mo"][1L], ..property][[1]] + found <- microorganismsDT[mo == microorganisms.codes[which(microorganisms.codes$code == toupper(x_backup_untouched[i])), "mo"][1L], + ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] # don't save to history, as all items are already in microorganisms.codes @@ -608,7 +621,8 @@ exec_as.mo <- function(x, } } - found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), ..property][[1]] + found <- reference_data_to_use[fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), + ..property][[1]] # most probable: is exact match in fullname if (length(found) > 0) { x[i] <- found[1L] @@ -618,7 +632,20 @@ exec_as.mo <- function(x, next } - found <- reference_data_to_use[col_id == x_backup[i], ..property][[1]] + found <- reference_data_to_use[g_species %in% gsub("[^a-z0-9/ \\-]+", "", + tolower(c(x_backup[i], x_backup_without_spp[i]))), + ..property][[1]] + # very probable: is G. species + if (length(found) > 0) { + x[i] <- found[1L] + if (initial_search == TRUE) { + set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) + } + next + } + + found <- reference_data_to_use[col_id == x_backup[i], + ..property][[1]] # is a valid Catalogue of Life ID if (NROW(found) > 0) { x[i] <- found[1L] @@ -632,19 +659,22 @@ exec_as.mo <- function(x, if (any(toupper(c(x_backup[i], x_backup_without_spp[i])) %in% AMR::microorganisms.codes$code)) { mo_found <- AMR::microorganisms.codes[which(AMR::microorganisms.codes$code %in% toupper(c(x_backup[i], x_backup_without_spp[i]))), "mo"][1L] if (length(mo_found) > 0) { - x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] + x[i] <- microorganismsDT[mo == mo_found, + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } next } } + if (!is.null(reference_df)) { # self-defined reference if (x_backup[i] %in% reference_df[, 1]) { ref_mo <- reference_df[reference_df[, 1] == x_backup[i], "mo"][[1L]] if (ref_mo %in% microorganismsDT[, mo]) { - x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L] + x[i] <- microorganismsDT[mo == ref_mo, + ..property][[1]][1L] next } else { warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE) @@ -660,7 +690,8 @@ exec_as.mo <- function(x, if (tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { # empty and nonsense values, ignore without warning - x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + x[i] <- microorganismsDT[mo == "UNKNOWN", + ..property][[1]] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -671,7 +702,8 @@ exec_as.mo <- function(x, if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 & !x_backup_without_spp[i] %like_case% "[Oo]?(26|103|104|104|111|121|145|157)") { # fewer than 3 chars and not looked for species, add as failure - x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + x[i] <- microorganismsDT[mo == "UNKNOWN", + ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) @@ -689,7 +721,8 @@ exec_as.mo <- function(x, if (!is.na(x_trimmed[i])) { if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA") | x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") { - x[i] <- microorganismsDT[mo == "B_STPHY_AURS", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STPHY_AURS", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -697,7 +730,8 @@ exec_as.mo <- function(x, } if (toupper(x_backup_without_spp[i]) %in% c("MRSE", "MSSE") | x_backup_without_spp[i] %like_case% " (mrse|msse) ") { - x[i] <- microorganismsDT[mo == "B_STPHY_EPDR", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STPHY_EPDR", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -706,7 +740,8 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) == "VRE" | x_backup_without_spp[i] %like_case% " vre " | x_backup_without_spp[i] %like_case% "(enterococci|enterokok|enterococo)[a-z]*?$") { - x[i] <- microorganismsDT[mo == "B_ENTRC", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_ENTRC", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -727,7 +762,8 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) %in% c("AIEC", "ATEC", "DAEC", "EAEC", "EHEC", "EIEC", "EPEC", "ETEC", "NMEC", "STEC", "UPEC") # also support O-antigens of E. coli: O26, O103, O104, O111, O121, O145, O157 | x_backup_without_spp[i] %like_case% "o?(26|103|104|111|121|145|157)") { - x[i] <- microorganismsDT[mo == "B_ESCHR_COLI", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_ESCHR_COLI", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -736,7 +772,8 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) == "MRPA" | x_backup_without_spp[i] %like_case% " mrpa ") { # multi resistant P. aeruginosa - x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_PSDMN_ARGN", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -744,7 +781,8 @@ exec_as.mo <- function(x, } if (toupper(x_backup_without_spp[i]) == "CRSM") { # co-trim resistant S. maltophilia - x[i] <- microorganismsDT[mo == "B_STNTR_MLTP", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STNTR_MLTP", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -753,7 +791,8 @@ exec_as.mo <- function(x, if (toupper(x_backup_without_spp[i]) %in% c("PISP", "PRSP", "VISP", "VRSP") | x_backup_without_spp[i] %like_case% " (pisp|prsp|visp|vrsp) ") { # peni I, peni R, vanco I, vanco R: S. pneumoniae - x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -761,7 +800,8 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "^g[abcdfghk]s$") { # Streptococci, like GBS = Group B Streptococci (B_STRPT_GRPB) - x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L] + x[i] <- microorganismsDT[mo == toupper(gsub("g([abcdfghk])s", "B_STRPT_GRP\\1", x_backup_without_spp[i])), + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -769,7 +809,8 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { # Streptococci in different languages, like "estreptococos grupo B" - x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), ..property][[1]][1L] + x[i] <- microorganismsDT[mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -777,7 +818,8 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { # Streptococci in different languages, like "Group A Streptococci" - x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), ..property][[1]][1L] + x[i] <- microorganismsDT[mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -785,7 +827,8 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "haemoly.*strept") { # Haemolytic streptococci in different languages - x[i] <- microorganismsDT[mo == "B_STRPT_HAEM", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STRPT_HAEM", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -796,7 +839,8 @@ exec_as.mo <- function(x, | x_trimmed[i] %like_case% "[ck]oagulas[ea] negatie?[vf]" | x_backup_without_spp[i] %like_case% "[ck]o?ns[^a-z]?$") { # coerce S. coagulase negative - x[i] <- microorganismsDT[mo == "B_STPHY_CONS", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STPHY_CONS", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -806,7 +850,8 @@ exec_as.mo <- function(x, | x_trimmed[i] %like_case% "[ck]oagulas[ea] positie?[vf]" | x_backup_without_spp[i] %like_case% "[ck]o?ps[^a-z]?$") { # coerce S. coagulase positive - x[i] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STPHY_COPS", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -817,7 +862,8 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "strepto.* mil+er+i" | x_backup_without_spp[i] %like_case% "mgs[^a-z]?$") { # Milleri Group Streptococcus (MGS) - x[i] <- microorganismsDT[mo == "B_STRPT_MILL", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STRPT_MILL", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -827,7 +873,8 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "strepto.* viridans" | x_backup_without_spp[i] %like_case% "vgs[^a-z]?$") { # Viridans Group Streptococcus (VGS) - x[i] <- microorganismsDT[mo == "B_STRPT_VIRI", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STRPT_VIRI", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -837,7 +884,8 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "negatie?[vf]" | x_trimmed[i] %like_case% "gram[ -]?neg.*") { # coerce Gram negatives - x[i] <- microorganismsDT[mo == "B_GRAMN", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_GRAMN", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -847,7 +895,8 @@ exec_as.mo <- function(x, | x_backup_without_spp[i] %like_case% "positie?[vf]" | x_trimmed[i] %like_case% "gram[ -]?pos.*") { # coerce Gram positives - x[i] <- microorganismsDT[mo == "B_GRAMP", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_GRAMP", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -855,7 +904,8 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "mycoba[ck]teri.[nm]?$") { # coerce Gram positives - x[i] <- microorganismsDT[mo == "B_MYCBC", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_MYCBC", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -865,14 +915,16 @@ exec_as.mo <- function(x, if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { if (x_backup_without_spp[i] %like_case% "salmonella group") { # Salmonella Group A to Z, just return S. species for now - x[i] <- microorganismsDT[mo == "B_SLMNL", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_SLMNL", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } next } else if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_backup[i], ignore.case = FALSE)) { # Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica - x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_SLMNL_ENTR", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -887,7 +939,8 @@ exec_as.mo <- function(x, # trivial names known to the field: if ("meningococcus" %like_case% x_trimmed[i]) { # coerce Neisseria meningitidis - x[i] <- microorganismsDT[mo == "B_NESSR_MNNG", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_NESSR_MNNG", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -895,7 +948,8 @@ exec_as.mo <- function(x, } if ("gonococcus" %like_case% x_trimmed[i]) { # coerce Neisseria gonorrhoeae - x[i] <- microorganismsDT[mo == "B_NESSR_GNRR", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_NESSR_GNRR", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -903,7 +957,8 @@ exec_as.mo <- function(x, } if ("pneumococcus" %like_case% x_trimmed[i]) { # coerce Streptococcus penumoniae - x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", ..property][[1]][1L] + x[i] <- microorganismsDT[mo == "B_STRPT_PNMN", + ..property][[1]][1L] if (initial_search == TRUE) { set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) } @@ -928,7 +983,8 @@ exec_as.mo <- function(x, # if only genus is available, return only genus if (all(!c(x[i], b.x_trimmed) %like_case% " ")) { - found <- data_to_check[fullname_lower %in% c(h.x_species, i.x_trimmed_species), ..property][[1]] + found <- data_to_check[fullname_lower %in% c(h.x_species, i.x_trimmed_species), + ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { @@ -937,7 +993,8 @@ exec_as.mo <- function(x, return(x[i]) } if (nchar(g.x_backup_without_spp) >= 6) { - found <- data_to_check[fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), ..property][[1]] + found <- data_to_check[fullname_lower %like_case% paste0("^", unregex(g.x_backup_without_spp), "[a-z]+"), + ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] if (initial_search == TRUE) { @@ -951,7 +1008,8 @@ exec_as.mo <- function(x, # allow no codes less than 4 characters long, was already checked for WHONET earlier if (nchar(g.x_backup_without_spp) < 4) { - x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + x[i] <- microorganismsDT[mo == "UNKNOWN", + ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, a.x_backup) set_mo_history(a.x_backup, get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) @@ -960,36 +1018,42 @@ exec_as.mo <- function(x, } # try probable: trimmed version of fullname ---- - found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), ..property][[1]] + found <- data_to_check[fullname_lower %in% tolower(g.x_backup_without_spp), + ..property][[1]] if (length(found) > 0) { return(found[1L]) } # try any match keeping spaces ---- - found <- data_to_check[fullname_lower %like_case% d.x_withspaces_start_end, ..property][[1]] + found <- data_to_check[fullname_lower %like_case% d.x_withspaces_start_end, + ..property][[1]] if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } # try any match keeping spaces, not ending with $ ---- - found <- data_to_check[fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), ..property][[1]] + found <- data_to_check[fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), + ..property][[1]] if (length(found) > 0) { return(found[1L]) } - found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, ..property][[1]] + found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, + ..property][[1]] if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } # try any match keeping spaces, not start with ^ ---- - found <- data_to_check[fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), ..property][[1]] + found <- data_to_check[fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), + ..property][[1]] if (length(found) > 0) { return(found[1L]) } # try a trimmed version found <- data_to_check[fullname_lower %like_case% b.x_trimmed - | fullname_lower %like_case% c.x_trimmed_without_group, ..property][[1]] + | fullname_lower %like_case% c.x_trimmed_without_group, + ..property][[1]] if (length(found) > 0 & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } @@ -1004,7 +1068,8 @@ exec_as.mo <- function(x, g.x_backup_without_spp %>% substr(1, x_length / 2), ".* ", g.x_backup_without_spp %>% substr((x_length / 2) + 1, x_length)) - found <- data_to_check[fullname_lower %like_case% x_split, ..property][[1]] + found <- data_to_check[fullname_lower %like_case% x_split, + ..property][[1]] if (length(found) > 0) { return(found[1L]) } @@ -1012,7 +1077,8 @@ exec_as.mo <- function(x, # try fullname without start and without nchar limit of >= 6 ---- # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, ..property][[1]] + found <- data_to_check[fullname_lower %like_case% e.x_withspaces_start_only, + ..property][[1]] if (length(found) > 0) { return(found[1L]) } @@ -1031,7 +1097,8 @@ exec_as.mo <- function(x, if (property == "ref") { x[i] <- found[1, ref] } else { - x[i] <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]] + x[i] <- microorganismsDT[col_id == found[1, col_id_new], + ..property][[1]] } options(mo_renamed_last_run = found[1, fullname]) was_renamed(name_old = found[1, fullname], @@ -1077,7 +1144,8 @@ exec_as.mo <- function(x, # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999" x <- found[1, ref] } else { - x <- microorganismsDT[col_id == found[1, col_id_new], ..property][[1]] + x <- microorganismsDT[col_id == found[1, col_id_new], + ..property][[1]] } was_renamed(name_old = found[1, fullname], name_new = microorganismsDT[col_id == found[1, col_id_new], fullname], @@ -1109,7 +1177,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1135,7 +1204,8 @@ exec_as.mo <- function(x, message("Running '", paste(b.x_trimmed, "species"), "'") } # not when input is like Genustext, because then Neospora would lead to Actinokineospora - found <- uncertain.reference_data_to_use[fullname_lower %like_case% paste(b.x_trimmed, "species"), ..property][[1]] + found <- uncertain.reference_data_to_use[fullname_lower %like_case% paste(b.x_trimmed, "species"), + ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] uncertainties <<- rbind(uncertainties, @@ -1167,7 +1237,8 @@ exec_as.mo <- function(x, } if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1194,7 +1265,8 @@ exec_as.mo <- function(x, } if (!empty_result(found) & nchar(g.x_backup_without_spp) >= 6) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1228,7 +1300,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1260,7 +1333,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1280,7 +1354,8 @@ exec_as.mo <- function(x, if (b.x_trimmed %like_case% "yeast") { found <- "F_YEAST" found_result <- found - found <- microorganismsDT[mo == found, ..property][[1]] + found <- microorganismsDT[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1293,7 +1368,8 @@ exec_as.mo <- function(x, if (b.x_trimmed %like_case% "(fungus|fungi)" & !b.x_trimmed %like_case% "fungiphrya") { found <- "F_FUNGUS" found_result <- found - found <- microorganismsDT[mo == found, ..property][[1]] + found <- microorganismsDT[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1322,7 +1398,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found_result[1L], ..property][[1]] + found <- reference_data_to_use[mo == found_result[1L], + ..property][[1]] # uncertainty level 2 only if searched part contains a space (otherwise it will be found with lvl 3) if (x_strip_collapsed %like_case% " ") { uncertainties <<- rbind(uncertainties, @@ -1362,7 +1439,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1393,7 +1471,8 @@ exec_as.mo <- function(x, } if (!empty_result(found)) { found_result <- found - found <- reference_data_to_use[mo == found, ..property][[1]] + found <- reference_data_to_use[mo == found, + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1417,7 +1496,8 @@ exec_as.mo <- function(x, if (nrow(found) > 0) { found_result <- found[["mo"]] if (!empty_result(found_result) & nchar(g.x_backup_without_spp) >= 6) { - found <- reference_data_to_use[mo == found_result[1L], ..property][[1]] + found <- reference_data_to_use[mo == found_result[1L], + ..property][[1]] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1525,7 +1605,8 @@ exec_as.mo <- function(x, } # no results found: make them UNKNOWN ---- - x[i] <- microorganismsDT[mo == "UNKNOWN", ..property][[1]] + x[i] <- microorganismsDT[mo == "UNKNOWN", + ..property][[1]] if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) set_mo_history(x_backup[i], get_mo_code(x[i], property), 0, force = force_mo_history, disable = disable_mo_history) @@ -1586,56 +1667,76 @@ exec_as.mo <- function(x, "saccharolyticus", "saprophyticus", "sciuri", "stepanovicii", "simulans", "succinus", "vitulinus", "warneri", "xylosus") - | (species == "schleiferi" & subspecies %in% c("schleiferi", "")), ..property][[1]] + | (species == "schleiferi" & subspecies %in% c("schleiferi", "")), + ..property][[1]] CoPS <- MOs_staph[species %in% c("simiae", "agnetis", "delphini", "lutrae", "hyicus", "intermedius", "pseudintermedius", "pseudointermedius", "schweitzeri", "argenteus") - | (species == "schleiferi" & subspecies == "coagulans"), ..property][[1]] + | (species == "schleiferi" & subspecies == "coagulans"), + ..property][[1]] # warn when species found that are not in Becker (2014, PMID 25278577) and Becker (2019, PMID 30872103) post_Becker <- c("argensis", "caeli", "cornubiensis", "edaphicus") - if (any(x %in% MOs_staph[species %in% post_Becker, ..property][[1]])) { + if (any(x %in% MOs_staph[species %in% post_Becker, + ..property][[1]])) { warning("Becker ", italic("et al."), " (2014, 2019) does not contain these species named after their publication: ", italic(paste("S.", - sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, ..property][[1]]]))), + sort(mo_species(unique(x[x %in% MOs_staph[species %in% post_Becker, + ..property][[1]]]))), collapse = ", ")), ".", call. = FALSE, immediate. = TRUE) } - x[x %in% CoNS] <- microorganismsDT[mo == "B_STPHY_CONS", ..property][[1]][1L] - x[x %in% CoPS] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L] + x[x %in% CoNS] <- microorganismsDT[mo == "B_STPHY_CONS", + ..property][[1]][1L] + x[x %in% CoPS] <- microorganismsDT[mo == "B_STPHY_COPS", + ..property][[1]][1L] if (Becker == "all") { - x[x %in% microorganismsDT[mo %like_case% "^B_STPHY_AURS", ..property][[1]]] <- microorganismsDT[mo == "B_STPHY_COPS", ..property][[1]][1L] + x[x %in% microorganismsDT[mo %like_case% "^B_STPHY_AURS", + ..property][[1]]] <- microorganismsDT[mo == "B_STPHY_COPS", + ..property][[1]][1L] } } # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A - S. pyogenes - x[x == microorganismsDT[mo == "B_STRPT_PYGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPA", ..property][[1]][1L] + x[x == microorganismsDT[mo == "B_STRPT_PYGN", + ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPA", + ..property][[1]][1L] # group B - S. agalactiae - x[x == microorganismsDT[mo == "B_STRPT_AGLC", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPB", ..property][[1]][1L] + x[x == microorganismsDT[mo == "B_STRPT_AGLC", + ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPB", + ..property][[1]][1L] # group C S_groupC <- microorganismsDT %>% filter(genus == "Streptococcus", species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae")) %>% pull(property) - x[x %in% S_groupC] <- microorganismsDT[mo == "B_STRPT_GRPC", ..property][[1]][1L] + x[x %in% S_groupC] <- microorganismsDT[mo == "B_STRPT_GRPC", + ..property][[1]][1L] if (Lancefield == "all") { # all Enterococci - x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == "B_STRPT_GRPD", ..property][[1]][1L] + x[x %like% "^(Enterococcus|B_ENTRC)"] <- microorganismsDT[mo == "B_STRPT_GRPD", + ..property][[1]][1L] } # group F - S. anginosus - x[x == microorganismsDT[mo == "B_STRPT_ANGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPF", ..property][[1]][1L] + x[x == microorganismsDT[mo == "B_STRPT_ANGN", + ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPF", + ..property][[1]][1L] # group H - S. sanguinis - x[x == microorganismsDT[mo == "B_STRPT_SNGN", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPH", ..property][[1]][1L] + x[x == microorganismsDT[mo == "B_STRPT_SNGN", + ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPH", + ..property][[1]][1L] # group K - S. salivarius - x[x == microorganismsDT[mo == "B_STRPT_SLVR", ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPK", ..property][[1]][1L] + x[x == microorganismsDT[mo == "B_STRPT_SLVR", + ..property][[1]][1L]] <- microorganismsDT[mo == "B_STRPT_GRPK", + ..property][[1]][1L] } # Wrap up ---------------------------------------------------------------- @@ -1805,7 +1906,8 @@ as.data.frame.mo <- function(x, ...) { "[<-.mo" <- function(i, j, ..., value) { y <- NextMethod() attributes(y) <- attributes(i) - class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old))) + class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), + as.character(microorganisms.translation$mo_old))) } #' @exportMethod [[<-.mo #' @export @@ -1813,7 +1915,8 @@ as.data.frame.mo <- function(x, ...) { "[[<-.mo" <- function(i, j, ..., value) { y <- NextMethod() attributes(y) <- attributes(i) - class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old))) + class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), + as.character(microorganisms.translation$mo_old))) } #' @exportMethod c.mo #' @export @@ -1821,7 +1924,8 @@ as.data.frame.mo <- function(x, ...) { c.mo <- function(x, ...) { y <- NextMethod() attributes(y) <- attributes(x) - class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), as.character(microorganisms.translation$mo_old))) + class_integrity_check(y, "microorganism code", c(as.character(AMR::microorganisms$mo), + as.character(microorganisms.translation$mo_old))) } #' @rdname as.mo diff --git a/R/zzz.R b/R/zzz.R index 8a2347e0..528b7e9d 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -21,21 +21,18 @@ #' @importFrom data.table as.data.table setkey .onLoad <- function(libname, pkgname) { + # packageStartupMessage("Loading taxonomic reference data") + # get new functions not available in older versions of R backports::import(pkgname) - + # register data - microorganisms.oldDT <- as.data.table(AMR::microorganisms.old) - # for fullname_lower: keep only dots, letters, numbers, slashes, spaces and dashes - microorganisms.oldDT$fullname_lower <- gsub("[^.a-z0-9/ \\-]+", "", tolower(microorganisms.oldDT$fullname)) - setkey(microorganisms.oldDT, prevalence, fullname) - assign(x = "microorganismsDT", value = make_DT(), envir = asNamespace("AMR")) assign(x = "microorganisms.oldDT", - value = microorganisms.oldDT, + value = make_oldDT(), envir = asNamespace("AMR")) assign(x = "mo_codes_v0.5.0", @@ -62,7 +59,9 @@ make_DT <- function() { # work with Viridans Group Streptococci, etc. tolower(trimws(ifelse(genus == "", fullname, - paste(genus, species, subspecies)))))) %>% + paste(genus, species, subspecies))))), + # add a column with only "e coli" like combinations + g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>% as.data.table() # so arrange data on prevalence first, then kingdom, then full name @@ -73,6 +72,25 @@ make_DT <- function() { microorganismsDT } +#' @importFrom data.table as.data.table setkey +#' @importFrom dplyr %>% mutate +make_oldDT <- function() { + microorganisms.oldDT <- AMR::microorganisms.old %>% + mutate( + # for fullname_lower: keep only dots, letters, + # numbers, slashes, spaces and dashes + fullname_lower = gsub("[^.a-z0-9/ \\-]+", "", tolower(fullname)), + # add a column with only "e coli" like combinations + g_species = gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", fullname_lower)) %>% + as.data.table() + + # so arrange data on prevalence first, then full name + setkey(microorganisms.oldDT, + prevalence, + fullname) + microorganisms.oldDT +} + make_trans_tbl <- function() { # conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life) c(B_ACHRMB = "B_ACHRM", B_ANNMA = "B_ACTNS", B_ACLLS = "B_ALCYC", diff --git a/docs/404.html b/docs/404.html index 6439ba43..21f7adb5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5be758d9..ea9b3b04 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index f2ca4101..ddceba46 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -41,7 +41,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 @@ -187,7 +187,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

20 December 2019

+

21 December 2019

@@ -196,19 +196,21 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 20 December 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 21 December 2019.

Introduction

Conducting antimicrobial resistance analysis unfortunately requires in-depth knowledge from different scientific fields, which makes it hard to do right. At least, it requires:

-

Of course, we cannot instantly provide you with knowledge and experience. But with this AMR pacakge, we aimed at providing (1) tools to simplify antimicrobial resistance data cleaning/analysis, (2) methods to easily incorporate international guidelines and (3) scientifically reliable reference data. The AMR package enables standardised and reproducible antimicrobial resistance analyses, including the application of evidence-based rules, determination of first isolates, translation of various codes for microorganisms and antimicrobial agents, determination of (multi-drug) resistant microorganisms, and calculation of antimicrobial resistance, prevalence and future trends.

+

Of course, we cannot instantly provide you with knowledge and experience. But with this AMR package, we aimed at providing (1) tools to simplify antimicrobial resistance data cleaning, transformation and analysis, (2) methods to easily incorporate international guidelines and (3) scientifically reliable reference data, including the requirements mentioned above.

+

The AMR package enables standardised and reproducible antimicrobial resistance analysis, with the application of evidence-based rules, determination of first isolates, translation of various codes for microorganisms and antimicrobial agents, determination of (multi-drug) resistant microorganisms, and calculation of antimicrobial resistance, prevalence and future trends.

@@ -225,21 +227,21 @@ -2019-12-20 +2019-12-21 abcd Escherichia coli S S -2019-12-20 +2019-12-21 abcd Escherichia coli S R -2019-12-20 +2019-12-21 efgh Escherichia coli R @@ -334,10 +336,10 @@ -2015-01-13 -Y8 +2011-10-17 +V10 Hospital D -Escherichia coli +Staphylococcus aureus S S S @@ -345,9 +347,31 @@ F -2010-04-08 -O8 +2017-05-17 +R1 Hospital B +Klebsiella pneumoniae +S +S +S +S +F + + +2015-07-05 +J5 +Hospital B +Escherichia coli +S +S +S +S +M + + +2012-01-01 +R10 +Hospital C Escherichia coli S S @@ -356,49 +380,27 @@ F -2010-05-11 -W2 +2014-07-18 +T6 Hospital A Escherichia coli -R S S -R +S +S F -2011-11-23 -I9 +2016-07-17 +K1 Hospital C -Klebsiella pneumoniae -R -S -S -S -M - - -2011-03-28 -D3 -Hospital B Escherichia coli -S -S +I +I S S M - -2013-11-16 -P8 -Hospital D -Streptococcus pneumoniae -R -R -S -S -F -

Now, let’s start the cleaning and the analysis!

@@ -419,8 +421,8 @@ # # Item Count Percent Cum. Count Cum. Percent # --- ----- ------- -------- ----------- ------------- -# 1 M 10,341 51.71% 10,341 51.71% -# 2 F 9,659 48.30% 20,000 100.00% +# 1 M 10,402 52.01% 10,402 52.01% +# 2 F 9,598 47.99% 20,000 100.00%

So, we can draw at least two conclusions immediately. From a data scientists perspective, the data looks clean: only values M and F. From a researchers perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

data <- data %>%
@@ -435,8 +437,8 @@
 # Other rules by this AMR package
 # Non-EUCAST: inherit amoxicillin results for unavailable ampicillin (no changes)
 # Non-EUCAST: inherit ampicillin results for unavailable amoxicillin (no changes)
-# Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S (2,942 values changed)
-# Non-EUCAST: set ampicillin = R where amoxicillin/clav acid = R (159 values changed)
+# Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S (2,972 values changed)
+# Non-EUCAST: set ampicillin = R where amoxicillin/clav acid = R (139 values changed)
 # Non-EUCAST: set piperacillin = R where piperacillin/tazobactam = R (no changes)
 # Non-EUCAST: set piperacillin/tazobactam = S where piperacillin = S (no changes)
 # Non-EUCAST: set trimethoprim = R where trimethoprim/sulfa = R (no changes)
@@ -461,14 +463,14 @@
 # Pasteurella multocida (no changes)
 # Staphylococcus (no changes)
 # Streptococcus groups A, B, C, G (no changes)
-# Streptococcus pneumoniae (942 values changed)
+# Streptococcus pneumoniae (959 values changed)
 # Viridans group streptococci (no changes)
 # 
 # EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-# Table 01: Intrinsic resistance in Enterobacteriaceae (1,222 values changed)
+# Table 01: Intrinsic resistance in Enterobacteriaceae (1,333 values changed)
 # Table 02: Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
 # Table 03: Intrinsic resistance in other Gram-negative bacteria (no changes)
-# Table 04: Intrinsic resistance in Gram-positive bacteria (2,709 values changed)
+# Table 04: Intrinsic resistance in Gram-positive bacteria (2,723 values changed)
 # Table 08: Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
 # Table 09: Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
 # Table 11: Interpretive rules for macrolides, lincosamides, and streptogramins (no changes)
@@ -476,15 +478,15 @@
 # Table 13: Interpretive rules for quinolones (no changes)
 # 
 # -------------------------------------------------------------------------------
-# EUCAST rules affected 6,398 out of 20,000 rows, making a total of 7,974 edits
+# EUCAST rules affected 6,551 out of 20,000 rows, making a total of 8,126 edits
 # => added 0 test results
 # 
-# => changed 7,974 test results
-#    - 95 test results changed from S to I
-#    - 4,603 test results changed from S to R
-#    - 1,165 test results changed from I to S
-#    - 334 test results changed from I to R
-#    - 1,777 test results changed from R to S
+# => changed 8,126 test results
+#    - 115 test results changed from S to I
+#    - 4,709 test results changed from S to R
+#    - 1,179 test results changed from I to S
+#    - 330 test results changed from I to R
+#    - 1,793 test results changed from R to S
 # -------------------------------------------------------------------------------
 # 
 # Use eucast_rules(..., verbose = TRUE) (on your original data) to get a data.frame with all specified edits instead.
@@ -512,8 +514,8 @@ # NOTE: Using column `bacteria` as input for `col_mo`. # NOTE: Using column `date` as input for `col_date`. # NOTE: Using column `patient_id` as input for `col_patient_id`. -# => Found 5,647 first isolates (28.2% of total)

-

So only 28.2% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

+# => Found 5,691 first isolates (28.5% of total) +

So only 28.5% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>% 
   filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

@@ -523,7 +525,7 @@

First weighted isolates

-

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient T5, sorted on date:

+

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient Y10, sorted on date:

@@ -539,54 +541,54 @@ - - + + - + - - + + - - + + - - + + - + - - + + - + - - + + - + @@ -594,30 +596,30 @@ - - + + - - + + - - + + - + - - + + @@ -627,29 +629,29 @@ - - - - - - - - - - - - - + + + + + + + + + + + + +
isolate
12010-02-25T52010-06-10Y10 B_ESCHR_COLI S SSR S TRUE
22010-03-02T52010-09-27Y10 B_ESCHR_COLIRISS S S FALSE
32010-04-02T52010-11-27Y10 B_ESCHR_COLI S SSR S FALSE
42010-06-08T52010-12-06Y10 B_ESCHR_COLI S SSR S FALSE
52010-06-20T52011-02-18Y10 B_ESCHR_COLISR S S S
62010-09-13T52011-04-05Y10 B_ESCHR_COLISSR R SR FALSE
72010-10-06T52011-05-29Y10 B_ESCHR_COLI R S SSR FALSE
82010-11-19T52011-06-04Y10 B_ESCHR_COLI S S
92010-12-06T5B_ESCHR_COLISSSSFALSE
102010-12-07T52011-06-14Y10 B_ESCHR_COLI S S R STRUE
102011-11-02Y10B_ESCHR_COLISSSS FALSE
-

Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

+

Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

data <- data %>% 
   mutate(keyab = key_antibiotics(.)) %>% 
@@ -660,7 +662,7 @@
 # NOTE: Using column `patient_id` as input for `col_patient_id`.
 # NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.
 # [Criterion] Inclusion based on key antibiotics, ignoring I
-# => Found 15,050 first weighted isolates (75.3% of total)
+# => Found 15,020 first weighted isolates (75.1% of total)
@@ -677,23 +679,23 @@ - - + + - + - - + + - - + + @@ -701,68 +703,68 @@ - - + + - + - - + + - + - - + + - + - + - - + + - - + + - - + + - + - - + + @@ -773,35 +775,35 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + +
isolate
12010-02-25T52010-06-10Y10 B_ESCHR_COLI S SSR S TRUE TRUE
22010-03-02T52010-09-27Y10 B_ESCHR_COLIRISS S S FALSE
32010-04-02T52010-11-27Y10 B_ESCHR_COLI S SSR S FALSE TRUE
42010-06-08T52010-12-06Y10 B_ESCHR_COLI S SSR S FALSE FALSE
52010-06-20T52011-02-18Y10 B_ESCHR_COLISR S S S FALSEFALSETRUE
62010-09-13T52011-04-05Y10 B_ESCHR_COLISSR R SR FALSE TRUE
72010-10-06T52011-05-29Y10 B_ESCHR_COLI R S SSR FALSE TRUE
82010-11-19T52011-06-04Y10 B_ESCHR_COLI S S
92010-12-06T5B_ESCHR_COLISSSSFALSEFALSE
102010-12-07T52011-06-14Y10 B_ESCHR_COLI S S R STRUETRUE
102011-11-02Y10B_ESCHR_COLISSSS FALSE TRUE
-

Instead of 1, now 7 isolates are flagged. In total, 75.3% of all isolates are marked ‘first weighted’ - 47.0% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 2, now 9 isolates are flagged. In total, 75.1% of all isolates are marked ‘first weighted’ - 46.6% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,050 isolates for analysis.

+

So we end up with 15,020 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>% 
   select(-c(first, keyab))
@@ -827,24 +829,24 @@ 2 -2010-04-08 -O8 +2017-05-17 +R1 Hospital B -B_ESCHR_COLI -S +B_KLBSL_PNMN +R S S S F Gram-negative -Escherichia -coli +Klebsiella +pneumoniae TRUE -3 -2010-05-11 -W2 +9 +2016-05-07 +N8 Hospital A B_ESCHR_COLI R @@ -858,67 +860,67 @@ TRUE -4 -2011-11-23 -I9 -Hospital C -B_KLBSL_PNMN -R +11 +2015-11-13 +U7 +Hospital B +B_STPHY_AURS S S S -M -Gram-negative -Klebsiella -pneumoniae +S +F +Gram-positive +Staphylococcus +aureus TRUE -5 -2011-03-28 -D3 -Hospital B -B_ESCHR_COLI -S -S +14 +2011-08-27 +C10 +Hospital C +B_STPHY_AURS S S +R +R M -Gram-negative -Escherichia -coli +Gram-positive +Staphylococcus +aureus TRUE -6 -2013-11-16 -P8 -Hospital D -B_STRPT_PNMN -R +15 +2013-10-17 +R10 +Hospital B +B_STPHY_AURS R S -R +S +S F Gram-positive -Streptococcus -pneumoniae +Staphylococcus +aureus TRUE -7 -2014-12-19 -Q5 -Hospital B -B_ESCHR_COLI +16 +2011-10-01 +I6 +Hospital A +B_STRPT_PNMN S S -S -S -F -Gram-negative -Escherichia -coli +R +R +M +Gram-positive +Streptococcus +pneumoniae TRUE @@ -940,7 +942,7 @@
data_1st %>% freq(genus, species)

Frequency table

Class: character
-Length: 15,050 (of which NA: 0 = 0%)
+Length: 15,020 (of which NA: 0 = 0%)
Unique: 4

Shortest: 16
Longest: 24

@@ -957,33 +959,33 @@ Longest: 24

1 Escherichia coli -7,510 -49.90% -7,510 -49.90% +7,478 +49.79% +7,478 +49.79% 2 Staphylococcus aureus -3,786 -25.16% -11,296 -75.06% +3,691 +24.57% +11,169 +74.36% 3 Streptococcus pneumoniae -2,294 -15.24% -13,590 -90.30% +2,306 +15.35% +13,475 +89.71% 4 Klebsiella pneumoniae -1,460 -9.70% -15,050 +1,545 +10.29% +15,020 100.00% @@ -995,7 +997,7 @@ Longest: 24

The functions resistance() and susceptibility() can be used to calculate antimicrobial resistance or susceptibility. For more specific analyses, the functions proportion_S(), proportion_SI(), proportion_I(), proportion_IR() and proportion_R() can be used to determine the proportion of a specific antimicrobial outcome.

As per the EUCAST guideline of 2019, we calculate resistance as the proportion of R (proportion_R(), equal to resistance()) and susceptibility as the proportion of S and I (proportion_SI(), equal to susceptibility()). These functions can be used on their own:

data_1st %>% resistance(AMX)
-# [1] 0.4623256
+# [1] 0.4637816

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>% 
   group_by(hospital) %>% 
@@ -1008,19 +1010,19 @@ Longest: 24

Hospital A -0.4637522 +0.4687360 Hospital B -0.4602655 +0.4664905 Hospital C -0.4760000 +0.4576649 Hospital D -0.4536218 +0.4561813 @@ -1038,23 +1040,23 @@ Longest: 24

Hospital A -0.4637522 -4552 +0.4687360 +4478 Hospital B -0.4602655 -5197 +0.4664905 +5297 Hospital C -0.4760000 -2250 +0.4576649 +2244 Hospital D -0.4536218 -3051 +0.4561813 +3001 @@ -1074,27 +1076,27 @@ Longest: 24

Escherichia -0.9227696 -0.8909454 -0.9920107 +0.9259160 +0.8915485 +0.9933137 Klebsiella -0.9191781 -0.9061644 -0.9917808 +0.9177994 +0.8990291 +0.9967638 Staphylococcus -0.9186476 -0.9199683 -0.9915478 +0.9244107 +0.9260363 +0.9943105 Streptococcus -0.6129032 +0.6183868 0.0000000 -0.6129032 +0.6183868 diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index f070ec08..7a459ed6 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index e5725a49..1e77ae7e 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index c33c760a..d3e24f2b 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index 8d5ad519..4b21fb11 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index c95f38bc..be934c10 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005
diff --git a/docs/authors.html b/docs/authors.html index f62a507d..aa58853e 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 diff --git a/docs/index.html b/docs/index.html index d83e946e..09b83360 100644 --- a/docs/index.html +++ b/docs/index.html @@ -45,7 +45,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 diff --git a/docs/news/index.html b/docs/news/index.html index 38947f08..247a584a 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -84,7 +84,7 @@ AMR (for R) - 0.9.0.9004 + 0.9.0.9005 @@ -231,19 +231,21 @@ -
+

-AMR 0.9.0.9004 Unreleased +AMR 0.9.0.9005 Unreleased

-
+

-Last updated: 20-Dec-2019 +Last updated: 21-Dec-2019

Changes

    -
  • Speed improvement for as.mo() (and consequently all mo_* functions that use as.mo() internally)
  • +
  • Speed improvement for as.mo() (and consequently all mo_* functions that use as.mo() internally), especially for the G. species format (G for genus), like E. coli and K penumoniae +
  • +
  • Input values for as.disk() limited to a maximum of 50 millimeters
@@ -1407,7 +1409,7 @@ Using as.mo(..., allow_uncertain = 3)

Contents

@@ -234,7 +234,7 @@
-

This transforms a vector to a new class disk, which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 99.

+

This transforms a vector to a new class disk, which is a growth zone size (around an antibiotic disk) in millimeters between 6 and 50.

as.disk(x, na.rm = FALSE)
@@ -256,7 +256,7 @@
 
     

Value

-

Ordered integer factor with new class disk

+

An integer with additional new class disk

Details

Interpret disk values as RSI values with as.rsi(). It supports guidelines from EUCAST and CLSI.

@@ -297,7 +297,7 @@