diff --git a/DESCRIPTION b/DESCRIPTION index f0eedddf..080dbdd5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.1.0.9020 -Date: 2020-05-27 +Version: 1.1.0.9021 +Date: 2020-05-28 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 0339b4f9..2249a670 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -254,7 +254,6 @@ importFrom(graphics,arrows) importFrom(graphics,axis) importFrom(graphics,barplot) importFrom(graphics,par) -importFrom(graphics,plot) importFrom(graphics,points) importFrom(graphics,text) importFrom(stats,complete.cases) diff --git a/NEWS.md b/NEWS.md index cdeaa3de..adcd3e51 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.1.0.9020 -## Last updated: 27-May-2020 +# AMR 1.1.0.9021 +## Last updated: 28-May-2020 ### Breaking * Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users. @@ -14,7 +14,7 @@ ### Changed * Taxonomy: - * Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020) + * Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020). **Note:** a taxonomic update may always impact determination of first isolates (using `first_isolate()`), since some bacterial names might be renamed to other genera or other (sub)species. This is expected behaviour. * Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string) * EUCAST rules: * The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`. diff --git a/R/resistance_predict.R b/R/resistance_predict.R index b9f223d6..5d77df5a 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -304,7 +304,7 @@ rsi_predict <- resistance_predict #' @exportMethod plot.mic #' @export -#' @importFrom graphics plot axis arrows points +#' @importFrom graphics axis arrows points #' @rdname resistance_predict plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", x_name), ...) { x_name <- paste0(ab_name(attributes(x)$ab), " (", attributes(x)$ab, ")") @@ -314,6 +314,12 @@ plot.resistance_predict <- function(x, main = paste("Resistance Prediction of", } else { ylab <- "%IR" } + # get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0 + if (as.integer(R.Version()$major) >= 4) { + plot <- get("plot", envir = asNamespace("base")) + } else { + plot <- get("plot", envir = asNamespace("graphics")) + } plot(x = x$year, y = x$value, ylim = c(0, 1), diff --git a/R/rsi.R b/R/rsi.R index 8a99d1f4..9d16b432 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -563,25 +563,20 @@ summary.rsi <- function(object, ...) { #' @exportMethod plot.rsi #' @export -#' @importFrom graphics plot text +#' @importFrom graphics text axis #' @noRd plot.rsi <- function(x, lwd = 2, ylim = NULL, ylab = "Percentage", xlab = "Antimicrobial Interpretation", - main = paste("Susceptibility Analysis of", deparse(substitute(x))), + main = paste("Resistance Overview of", deparse(substitute(x))), axes = FALSE, ...) { - suppressWarnings( - data <- data.frame(x = x, - y = 1, - stringsAsFactors = TRUE) %>% - group_by(x) %>% - summarise(n = sum(y)) %>% - filter(!is.na(x)) %>% - mutate(s = round((n / sum(n)) * 100, 1)) - ) + data <- as.data.frame(table(x), stringsAsFactors = FALSE) + colnames(data) <- c("x", "n") + data$s <- round((data$n / sum(data$n)) * 100, 1) + if (!"S" %in% data$x) { data <- rbind(data, data.frame(x = "S", n = 0, s = 0)) } @@ -592,10 +587,17 @@ plot.rsi <- function(x, data <- rbind(data, data.frame(x = "R", n = 0, s = 0)) } + # don't use as.rsi() here, it will confuse plot() data$x <- factor(data$x, levels = c("S", "I", "R"), ordered = TRUE) ymax <- if_else(max(data$s) > 95, 105, 100) + # get plot() generic; this was moved from the 'graphics' pkg to the 'base' pkg in R 4.0.0 + if (as.integer(R.Version()$major) >= 4) { + plot <- get("plot", envir = asNamespace("base")) + } else { + plot <- get("plot", envir = asNamespace("graphics")) + } plot(x = data$x, y = data$s, lwd = lwd, @@ -623,7 +625,7 @@ plot.rsi <- function(x, barplot.rsi <- function(height, col = c("chartreuse4", "chartreuse3", "brown3"), xlab = ifelse(beside, "Antimicrobial Interpretation", ""), - main = paste("Antimicrobial resistance of", deparse(substitute(height))), + main = paste("Resistance Overview of", deparse(substitute(height))), ylab = "Frequency", beside = TRUE, axes = beside, diff --git a/data-raw/country_analysis.R b/data-raw/country_analysis.R index 34cff779..44a22bf1 100644 --- a/data-raw/country_analysis.R +++ b/data-raw/country_analysis.R @@ -90,9 +90,29 @@ countries_geometry <- sf::st_as_sf(map('world', plot = FALSE, fill = TRUE)) %>% not_antarctica = as.integer(ID != "Antarctica"), countries_name = ifelse(included == 1, as.character(ID), NA)) +# add countries not in the list +countries_missing <- unique(ip_tbl$country[!ip_tbl$country %in% countries_geometry$countries_code]) +for (i in seq_len(length(countries_missing))) { + countries_geometry <- countries_geometry %>% + rbind(countries_geometry %>% + filter(ID == "Netherlands") %>% + mutate(ID = countrycode::countrycode(countries_missing[i], + origin = 'iso2c', + destination = 'country.name'), + countries_code = countries_missing[i], + included = 1, + not_antarctica = 1, + countries_name = countrycode::countrycode(countries_missing[i], + origin = 'iso2c', + destination = 'country.name'))) +} + # how many? countries_geometry %>% filter(included == 1) %>% nrow() +countries_geometry$countries_name <- gsub("UK", "United Kingdom", countries_geometry$countries_name, fixed = TRUE) +countries_geometry$countries_name <- gsub("USA", "United States", countries_geometry$countries_name, fixed = TRUE) + countries_plot <- ggplot(countries_geometry) + geom_sf(aes(fill = included, colour = not_antarctica), size = 0.25, @@ -101,9 +121,9 @@ countries_plot <- ggplot(countries_geometry) + theme(panel.grid = element_blank(), axis.title = element_blank(), axis.text = element_blank()) + - scale_fill_gradient(low = "white", high = "#CAD6EA", ) + + scale_fill_gradient(low = "white", high = "#128f7645") + # this makes the border Antarctica turn white (invisible): - scale_colour_gradient(low = "white", high = "#81899B") + scale_colour_gradient(low = "white", high = "#128f76") countries_plot_mini <- countries_plot countries_plot_mini$data <- countries_plot_mini$data %>% filter(ID != "Antarctica") diff --git a/data-raw/microorganisms.txt b/data-raw/microorganisms.txt index 6bc5e2c3..32009e55 100644 --- a/data-raw/microorganisms.txt +++ b/data-raw/microorganisms.txt @@ -37251,6 +37251,7 @@ "B_MYCBC_TKNS" "Mycobacterium tokaiense" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tokaiense" "" "species" "Tsukamura, 1981" "c457ca4ae3a404100c8ce8c82a6100cc" "CoL" 2 "72477006" "B_MYCBC_TRPL" "Mycobacterium triplex" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "triplex" "" "species" "Floyd et al., 1997" "f23c2b6cad7a0e20374cdf3d3ff55dce" "CoL" 2 "113860005" "B_MYCBC_TRVL" "Mycobacterium triviale" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "triviale" "" "species" "Kubica, 1970" "9cb8b676cce27952821e173b12bfff3f" "CoL" 2 "40333002" +"B_MYCBC_TBRC" "Mycobacterium tuberculosis" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tuberculosis" "" "species" "Lehmann et al., 2018" "778540" "DSMZ" 2 "c(\"113861009\", \"113858008\")" "B_MYCBC_TUSC" "Mycobacterium tusciae" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "tusciae" "" "species" "Tortoli et al., 1999" "7a8ff8f5a2b16131366fe6e8dfb6b570" "CoL" 2 "B_MYCBC_ULCR" "Mycobacterium ulcerans" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "ulcerans" "" "species" "MacCallum et al., 1950" "96b3a2e207e76f4725132034d7d0bde1" "CoL" 2 "40713003" "B_MYCBC_VACC" "Mycobacterium vaccae" "Bacteria" "Actinobacteria" "(unknown class)" "Actinomycetales" "Mycobacteriaceae" "Mycobacterium" "vaccae" "" "species" "Bonicke et al., 1964" "adbc928aba39beadc25b2ba7e8214c91" "CoL" 2 "54925005" diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index a4529ebe..4b2cfa95 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -920,6 +920,22 @@ testthat::test_file("tests/testthat/test-data.R") testthat::test_file("tests/testthat/test-mo.R") testthat::test_file("tests/testthat/test-mo_property.R") +# edit 2020-05-28 +# Not sure why it now says M. tuberculosis was renamed to M. africanum (B_MYCBC_AFRC), but that's not true +microorganisms <- microorganisms %>% + bind_rows(microorganisms %>% + filter(mo == "B_MYCBC_AFRC") %>% + mutate(mo = "B_MYCBC_TBRC", snomed = list(c("113861009", "113858008")), + ref = "Lehmann et al., 2018",species_id = "778540", + source = "DSMZ", species = "tuberculosis", + fullname = "Mycobacterium tuberculosis")) %>% + arrange(fullname) +class(microorganisms$mo) <- c("mo", "character") +microorganisms.old <- microorganisms.old %>% filter(fullname != "Mycobacterium tuberculosis") + +usethis::use_data(microorganisms, overwrite = TRUE, version = 2) +usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2) + # OLD CODE ---------------------------------------------------------------- diff --git a/data-raw/reproduction_of_microorganisms_new.R b/data-raw/reproduction_of_microorganisms_new.R deleted file mode 100644 index 3d9b9140..00000000 --- a/data-raw/reproduction_of_microorganisms_new.R +++ /dev/null @@ -1,682 +0,0 @@ -# ==================================================================== # -# TITLE # -# Antimicrobial Resistance (AMR) Analysis # -# # -# SOURCE # -# https://gitlab.com/msberends/AMR # -# # -# LICENCE # -# (c) 2018-2020 Berends MS, Luz CF et al. # -# # -# This R package is free software; you can freely use and distribute # -# it for both personal and commercial purposes under the terms of the # -# GNU General Public License version 2.0 (GNU GPL-2), as published by # -# the Free Software Foundation. # -# # -# We created this package for both routine data analysis and academic # -# research and it was publicly released in the hope that it will be # -# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # -# Visit our website for more info: https://msberends.gitlab.io/AMR. # -# ==================================================================== # - -# --------------------------------------------------------------------------------- -# Reproduction of the `microorganisms` data set -# --------------------------------------------------------------------------------- -# Data retrieved from: -# -# [1] Catalogue of Life (CoL) through the Encyclopaedia of Life -# https://opendata.eol.org/dataset/catalogue-of-life/ -# * Download the resource file with a name like "Catalogue of Life yyyy-mm-dd" -# * Extract "taxon.tab" -# -# [2] Global Biodiversity Information Facility (GBIF) -# https://doi.org/10.15468/39omei -# * Extract "Taxon.tsv" -# -# [3] Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ) -# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html -# * Download the latest "Complete List" as xlsx file (DSMZ_bactnames.xlsx) -# --------------------------------------------------------------------------------- - -library(dplyr) -library(AMR) - -data_col <- data.table::fread("Documents/taxon.tab") -data_gbif <- data.table::fread("Documents/Taxon.tsv") - -# read the xlsx file from DSMZ (only around 2.5 MB): -data_dsmz <- readxl::read_xlsx("Downloads/DSMZ_bactnames.xlsx") - -# the CoL data is over 3.7M rows: -data_col %>% freq(kingdom) -# Item Count Percent Cum. Count Cum. Percent -# --- ---------- ---------- -------- ----------- ------------- -# 1 Animalia 2,225,627 59.1% 2,225,627 59.1% -# 2 Plantae 1,177,412 31.3% 3,403,039 90.4% -# 3 Fungi 290,145 7.7% 3,693,184 98.1% -# 4 Chromista 47,126 1.3% 3,740,310 99.3% -# 5 Bacteria 14,478 0.4% 3,754,788 99.7% -# 6 Protozoa 6,060 0.2% 3,760,848 99.9% -# 7 Viruses 3,827 0.1% 3,764,675 100.0% -# 8 Archaea 610 0.0% 3,765,285 100.0% - -# the GBIF data is over 5.8M rows: -data_gbif %>% freq(kingdom) -# Item Count Percent Cum. Count Cum. Percent -# --- --------------- ---------- -------- ----------- ------------- -# 1 Animalia 3,264,138 55.7% 3,264,138 55.7% -# 2 Plantae 1,814,962 31.0% 5,079,100 86.7% -# 3 Fungi 538,086 9.2% 5,617,186 95.9% -# 4 Chromista 181,374 3.1% 5,798,560 99.0% -# 5 Bacteria 24,048 0.4% 5,822,608 99.4% -# 6 Protozoa 15,138 0.3% 5,837,746 99.7% -# 7 incertae sedis 9,995 0.2% 5,847,741 99.8% -# 8 Viruses 9,630 0.2% 5,857,371 100.0% -# 9 Archaea 771 0.0% 5,858,142 100.0% - - -# Clean up helper function ------------------------------------------------ -clean_new <- function(new) { - new %>% - # only the ones that have no new ID to refer to a newer name - filter(is.na(col_id_new)) %>% - filter( - ( - # we only want all MICROorganisms and no viruses - !kingdom %in% c("Animalia", "Chromista", "Plantae", "Viruses") - # and not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important, - # so only keep these orders from the fungi: - & !(kingdom == "Fungi" - & !order %in% c("Eurotiales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales")) - ) - # or the family has to contain a genus we found in our hospitals last decades (Northern Netherlands, 2002-2018) - | genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces", - "Arachnia", "Ascaris", "Aureobacterium", "Aureobasidium", "Balantidum", "Bilophilia", "Branhamella", "Brochontrix", - "Brugia", "Calymmatobacterium", "Catabacter", "Chilomastix", "Chryseomonas", "Cladophialophora", "Cladosporium", - "Clonorchis", "Cordylobia", "Curvularia", "Demodex", "Dermatobia", "Diphyllobothrium", "Dracunculus", "Echinococcus", - "Enterobius", "Euascomycetes", "Exophiala", "Fasciola", "Fusarium", "Hendersonula", "Hymenolepis", "Kloeckera", - "Koserella", "Larva", "Leishmania", "Lelliottia", "Loa", "Lumbricus", "Malassezia", "Metagonimus", "Molonomonas", - "Mucor", "Nattrassia", "Necator", "Novospingobium", "Onchocerca", "Opistorchis", "Paragonimus", "Paramyxovirus", - "Pediculus", "Phoma", "Phthirus", "Pityrosporum", "Pseudallescheria", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", - "Salinococcus", "Sanguibacteroides", "Schistosoma", "Scopulariopsis", "Scytalidium", "Sporobolomyces", "Stomatococcus", - "Strongyloides", "Syncephalastraceae", "Taenia", "Torulopsis", "Trichinella", "Trichobilharzia", "Trichomonas", - "Trichosporon", "Trichuris", "Trypanosoma", "Wuchereria")) %>% - mutate( - authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"), - # remove leading and trailing brackets - authors2 = gsub("^[(](.*)[)]$", "\\1", authors2), - # only take part after brackets if there's a name - authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2), - gsub(".*[)] (.*)", "\\1", authors2), - authors2), - # get year from last 4 digits - lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)), - # can never be later than now - lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), - NA, - lastyear), - # get authors without last year - authors = gsub("(.*)[0-9]{4}$", "\\1", authors2), - # remove nonsense characters from names - authors = gsub("[^a-zA-Z,'& -]", "", authors), - # remove trailing and leading spaces - authors = trimws(authors), - # only keep first author and replace all others by 'et al' - authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors), - # et al. always with ending dot - authors = gsub(" et al\\.?", " et al.", authors), - authors = gsub(" ?,$", "", authors), - # don't start with 'sensu' or 'ehrenb' - authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE), - # no initials, only surname - authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE), - # combine author and year if year is available - ref = ifelse(!is.na(lastyear), - paste0(authors, ", ", lastyear), - authors), - # fix beginning and ending - ref = gsub(", $", "", ref), - ref = gsub("^, ", "", ref)) %>% - # remove text if it contains 'Not assigned' like phylum in viruses - mutate_all(~gsub("Not assigned", "", .)) %>% - # Remove non-ASCII characters (these are not allowed by CRAN) - lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% - as_tibble(stringsAsFactors = FALSE) %>% - mutate(fullname = trimws(case_when(rank == "family" ~ family, - rank == "order" ~ order, - rank == "class" ~ class, - rank == "phylum" ~ phylum, - rank == "kingdom" ~ kingdom, - TRUE ~ paste(genus, species, subspecies)))) -} -clean_old <- function(old, new) { - old %>% - # only the ones that exist in the new data set - filter(col_id_new %in% new$col_id) %>% - mutate( - authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"), - # remove leading and trailing brackets - authors2 = gsub("^[(](.*)[)]$", "\\1", authors2), - # only take part after brackets if there's a name - authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2), - gsub(".*[)] (.*)", "\\1", authors2), - authors2), - # get year from last 4 digits - lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)), - # can never be later than now - lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")), - NA, - lastyear), - # get authors without last year - authors = gsub("(.*)[0-9]{4}$", "\\1", authors2), - # remove nonsense characters from names - authors = gsub("[^a-zA-Z,'& -]", "", authors), - # remove trailing and leading spaces - authors = trimws(authors), - # only keep first author and replace all others by 'et al' - authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors), - # et al. always with ending dot - authors = gsub(" et al\\.?", " et al.", authors), - authors = gsub(" ?,$", "", authors), - # don't start with 'sensu' or 'ehrenb' - authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE), - # no initials, only surname - authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE), - # combine author and year if year is available - ref = ifelse(!is.na(lastyear), - paste0(authors, ", ", lastyear), - authors), - # fix beginning and ending - ref = gsub(", $", "", ref), - ref = gsub("^, ", "", ref)) %>% - # remove text if it contains 'Not assigned' like phylum in viruses - mutate_all(~gsub("Not assigned", "", .)) %>% - # Remove non-ASCII characters (these are not allowed by CRAN) - lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% - as_tibble(stringsAsFactors = FALSE) %>% - select(col_id_new, fullname, ref, authors2) %>% - left_join(new %>% select(col_id, fullname_new = fullname), by = c(col_id_new = "col_id")) %>% - mutate(fullname = trimws( - gsub("(.*)[(].*", "\\1", - stringr::str_replace( - string = fullname, - pattern = stringr::fixed(authors2), - replacement = "")) %>% - gsub(" (var|f|subsp)[.]", "", .))) %>% - select(-c("col_id_new", "authors2")) %>% - filter(!is.na(fullname), !is.na(fullname_new)) %>% - filter(fullname != fullname_new, !fullname %like% "^[?]") -} - -# clean CoL and GBIF ---- -# clean data_col -data_col <- data_col %>% - as_tibble() %>% - select(col_id = taxonID, - col_id_new = acceptedNameUsageID, - fullname = scientificName, - kingdom, - phylum, - class, - order, - family, - genus, - species = specificEpithet, - subspecies = infraspecificEpithet, - rank = taxonRank, - ref = scientificNameAuthorship, - species_id = furtherInformationURL) %>% - mutate(source = "CoL") -# split into old and new -data_col.new <- data_col %>% clean_new() -data_col.old <- data_col %>% clean_old(new = data_col.new) -rm(data_col) - -# clean data_gbif -data_gbif <- data_gbif %>% - as_tibble() %>% - filter( - # no uncertain taxonomic placements - taxonRemarks != "doubtful", - kingdom != "incertae sedis", - taxonRank != "unranked") %>% - transmute(col_id = taxonID, - col_id_new = acceptedNameUsageID, - fullname = scientificName, - kingdom, - phylum, - class, - order, - family, - genus, - species = specificEpithet, - subspecies = infraspecificEpithet, - rank = taxonRank, - ref = scientificNameAuthorship, - species_id = as.character(parentNameUsageID)) %>% - mutate(source = "GBIF") -# split into old and new -data_gbif.new <- data_gbif %>% clean_new() -data_gbif.old <- data_gbif %>% clean_old(new = data_gbif.new) -rm(data_gbif) - -# put CoL and GBIF together ---- -MOs.new <- bind_rows(data_col.new, - data_gbif.new) %>% - mutate(taxonomic_tree_length = nchar(trimws(paste(kingdom, phylum, class, order, family, genus, species, subspecies)))) %>% - arrange(desc(taxonomic_tree_length)) %>% - distinct(fullname, .keep_all = TRUE) %>% - select(-c("col_id_new", "authors2", "authors", "lastyear", "taxonomic_tree_length")) %>% - arrange(fullname) -MOs.old <- bind_rows(data_col.old, - data_gbif.old) %>% - distinct(fullname, .keep_all = TRUE) %>% - arrange(fullname) - -# clean up DSMZ --- -data_dsmz <- data_dsmz %>% - as_tibble() %>% - transmute(col_id = NA_integer_, - col_id_new = NA_integer_, - fullname = "", - # kingdom = "", - # phylum = "", - # class = "", - # order = "", - # family = "", - genus = ifelse(is.na(GENUS), "", GENUS), - species = ifelse(is.na(SPECIES), "", SPECIES), - subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES), - rank = ifelse(species == "", "genus", "species"), - ref = AUTHORS, - species_id = as.character(RECORD_NO), - source = "DSMZ") - -# DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col -ref_taxonomy <- MOs.new %>% - distinct(genus, .keep_all = TRUE) %>% - filter(family != "") %>% - filter(genus %in% data_dsmz$genus) %>% - distinct(genus, .keep_all = TRUE) %>% - select(kingdom, phylum, class, order, family, genus) - -data_dsmz <- data_dsmz %>% - left_join(ref_taxonomy, by = "genus") %>% - mutate(kingdom = "Bacteria") - -data_dsmz.new <- data_dsmz %>% - clean_new() %>% - distinct(fullname, .keep_all = TRUE) %>% - select(colnames(MOs.new)) %>% - arrange(fullname) - -# combine everything ---- -MOs <- bind_rows(MOs.new, - data_dsmz.new) %>% - distinct(fullname, .keep_all = TRUE) %>% - # not the ones that are old - filter(!fullname %in% MOs.old$fullname) %>% - arrange(fullname) %>% - mutate(col_id = ifelse(source != "CoL", NA_integer_, col_id)) %>% - filter(fullname != "") - -rm(data_col.new) -rm(data_col.old) -rm(data_gbif.new) -rm(data_gbif.old) -rm(data_dsmz) -rm(data_dsmz.new) -rm(ref_taxonomy) -rm(MOs.new) - -MOs.bak <- MOs - -# Trichomonas trick ---- -# for species in Trypanosoma and Trichomonas we observe al lot of taxonomic info missing -MOs %>% filter(genus %in% c("Trypanosoma", "Trichomonas")) %>% View() -MOs[which(MOs$genus == "Trypanosoma"), "kingdom"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$kingdom -MOs[which(MOs$genus == "Trypanosoma"), "phylum"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$phylum -MOs[which(MOs$genus == "Trypanosoma"), "class"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$class -MOs[which(MOs$genus == "Trypanosoma"), "order"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$order -MOs[which(MOs$genus == "Trypanosoma"), "family"] <- MOs[which(MOs$fullname == "Trypanosoma"),]$family -MOs[which(MOs$genus == "Trichomonas"), "kingdom"] <- MOs[which(MOs$fullname == "Trichomonas"),]$kingdom -MOs[which(MOs$genus == "Trichomonas"), "phylum"] <- MOs[which(MOs$fullname == "Trichomonas"),]$phylum -MOs[which(MOs$genus == "Trichomonas"), "class"] <- MOs[which(MOs$fullname == "Trichomonas"),]$class -MOs[which(MOs$genus == "Trichomonas"), "order"] <- MOs[which(MOs$fullname == "Trichomonas"),]$order -MOs[which(MOs$genus == "Trichomonas"), "family"] <- MOs[which(MOs$fullname == "Trichomonas"),]$family - -# fill taxonomic properties that are missing -MOs <- MOs %>% - mutate(phylum = ifelse(phylum %in% c(NA, ""), "(unknown phylum)", phylum), - class = ifelse(class %in% c(NA, ""), "(unknown class)", class), - order = ifelse(order %in% c(NA, ""), "(unknown order)", order), - family = ifelse(family %in% c(NA, ""), "(unknown family)", family)) - -# Abbreviations ---- -# Add abbreviations so we can easily know which ones are which ones. -# These will become valid and unique microbial IDs for the AMR package. -MOs <- MOs %>% - arrange(kingdom, fullname) %>% - group_by(kingdom) %>% - mutate(abbr_other = case_when( - rank == "family" ~ paste0("[FAM]_", - abbreviate(family, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "order" ~ paste0("[ORD]_", - abbreviate(order, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "class" ~ paste0("[CLS]_", - abbreviate(class, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "phylum" ~ paste0("[PHL]_", - abbreviate(phylum, - minlength = 8, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)), - rank == "kingdom" ~ paste0("[KNG]_", kingdom), - TRUE ~ NA_character_ - )) %>% - # abbreviations determined per kingdom and family - # becuase they are part of the abbreviation - mutate(abbr_genus = abbreviate(genus, - minlength = 7, - use.classes = TRUE, - method = "both.sides", - strict = FALSE)) %>% - ungroup() %>% - group_by(genus) %>% - # species abbreviations may be the same between genera - # because the genus abbreviation is part of the abbreviation - mutate(abbr_species = abbreviate(stringr::str_to_title(species), - minlength = 3, - use.classes = FALSE, - method = "both.sides")) %>% - ungroup() %>% - group_by(genus, species) %>% - mutate(abbr_subspecies = abbreviate(stringr::str_to_title(subspecies), - minlength = 3, - use.classes = FALSE, - method = "both.sides")) %>% - ungroup() %>% - # remove trailing underscores - mutate(mo = gsub("_+$", "", - toupper(paste( - # first character: kingdom - ifelse(kingdom %in% c("Animalia", "Plantae"), - substr(kingdom, 1, 2), - substr(kingdom, 1, 1)), - # next: genus, species, subspecies - ifelse(is.na(abbr_other), - paste(abbr_genus, - abbr_species, - abbr_subspecies, - sep = "_"), - abbr_other), - sep = "_")))) %>% - mutate(mo = ifelse(duplicated(.$mo), - # these one or two must be unique too - paste0(mo, "1"), - mo), - fullname = ifelse(fullname == "", - trimws(paste(genus, species, subspecies)), - fullname)) %>% - # put `mo` in front, followed by the rest - select(mo, everything(), -abbr_other, -abbr_genus, -abbr_species, -abbr_subspecies) - -# add non-taxonomic entries -MOs <- MOs %>% - bind_rows( - # Unknowns - data.frame(mo = "UNKNOWN", - col_id = NA_integer_, - fullname = "(unknown name)", - kingdom = "(unknown kingdom)", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown genus)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "(unknown rank)", - ref = NA_character_, - species_id = "", - source = "manually added", - stringsAsFactors = FALSE), - data.frame(mo = "B_GRAMN", - col_id = NA_integer_, - fullname = "(unknown Gram-negatives)", - kingdom = "Bacteria", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown Gram-negatives)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - stringsAsFactors = FALSE), - data.frame(mo = "B_GRAMP", - col_id = NA_integer_, - fullname = "(unknown Gram-positives)", - kingdom = "Bacteria", - phylum = "(unknown phylum)", - class = "(unknown class)", - order = "(unknown order)", - family = "(unknown family)", - genus = "(unknown Gram-positives)", - species = "(unknown species)", - subspecies = "(unknown subspecies)", - rank = "species", - ref = NA_character_, - species_id = "", - source = "manually added", - stringsAsFactors = FALSE), - # CoNS - MOs %>% - filter(genus == "Staphylococcus", species == "") %>% .[1,] %>% - mutate(mo = paste(mo, "CNS", sep = "_"), - rank = "species", - col_id = NA_integer_, - species = "coagulase-negative", - fullname = "Coagulase-negative Staphylococcus (CoNS)", - ref = NA_character_, - species_id = "", - source = "manually added"), - # CoPS - MOs %>% - filter(genus == "Staphylococcus", species == "") %>% .[1,] %>% - mutate(mo = paste(mo, "CPS", sep = "_"), - rank = "species", - col_id = NA_integer_, - species = "coagulase-positive", - fullname = "Coagulase-positive Staphylococcus (CoPS)", - ref = NA_character_, - species_id = "", - source = "manually added"), - # Streptococci groups A, B, C, F, H, K - MOs %>% - filter(genus == "Streptococcus", species == "pyogenes") %>% .[1,] %>% - # we can keep all other details, since S. pyogenes is the only member of group A - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRA", sep = "_"), - species = "group A" , - fullname = "Streptococcus group A"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - # we can keep all other details, since S. agalactiae is the only member of group B - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRB", sep = "_"), - species = "group B" , - fullname = "Streptococcus group B"), - MOs %>% - filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRC", sep = "_"), - col_id = NA_integer_, - species = "group C" , - fullname = "Streptococcus group C", - ref = NA_character_, - species_id = "", - source = "manually added"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRD", sep = "_"), - col_id = NA_integer_, - species = "group D" , - fullname = "Streptococcus group D", - ref = NA_character_, - species_id = "", - source = "manually added"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRF", sep = "_"), - col_id = NA_integer_, - species = "group F" , - fullname = "Streptococcus group F", - ref = NA_character_, - species_id = "", - source = "manually added"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRG", sep = "_"), - col_id = NA_integer_, - species = "group G" , - fullname = "Streptococcus group G", - ref = NA_character_, - species_id = "", - source = "manually added"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRH", sep = "_"), - col_id = NA_integer_, - species = "group H" , - fullname = "Streptococcus group H", - ref = NA_character_, - species_id = "", - source = "manually added"), - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "GRK", sep = "_"), - col_id = NA_integer_, - species = "group K" , - fullname = "Streptococcus group K", - ref = NA_character_, - species_id = "", - source = "manually added"), - # Beta-haemolytic Streptococci - MOs %>% - filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>% - mutate(mo = paste(MOs[MOs$fullname == "Streptococcus",]$mo, "HAE", sep = "_"), - col_id = NA_integer_, - species = "beta-haemolytic" , - fullname = "Beta-haemolytic Streptococcus", - ref = NA_character_, - species_id = "", - source = "manually added") - ) - - -# everything distinct? -sum(duplicated(MOs$mo)) -colnames(MOs) - -# set prevalence per species -MOs <- MOs %>% - mutate(prevalence = case_when( - class == "Gammaproteobacteria" - | genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus") - | mo %in% c("UNKNOWN", "B_GRAMN", "B_GRAMP") - ~ 1, - phylum %in% c("Proteobacteria", - "Firmicutes", - "Actinobacteria", - "Sarcomastigophora") - | genus %in% c("Aspergillus", - "Bacteroides", - "Candida", - "Capnocytophaga", - "Chryseobacterium", - "Cryptococcus", - "Elisabethkingia", - "Flavobacterium", - "Fusobacterium", - "Giardia", - "Leptotrichia", - "Mycoplasma", - "Prevotella", - "Rhodotorula", - "Treponema", - "Trichophyton", - "Trichomonas", - "Ureaplasma") - | rank %in% c("kingdom", "phylum", "class", "order", "family") - ~ 2, - TRUE ~ 3 - )) - -# arrange -MOs <- MOs %>% arrange(fullname) - -# transform -MOs <- as.data.frame(MOs, stringsAsFactors = FALSE) -MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE) -class(MOs$mo) <- "mo" -MOs$col_id <- as.integer(MOs$col_id) - -# get differences in MO codes between this data and the package version -MO_diff <- AMR::microorganisms %>% - mutate(pastedtext = paste(mo, fullname)) %>% - filter(!pastedtext %in% (MOs %>% mutate(pastedtext = paste(mo, fullname)) %>% pull(pastedtext))) %>% - select(mo_old = mo, fullname, pastedtext) %>% - left_join(MOs %>% - transmute(mo_new = mo, fullname_new = fullname, pastedtext = paste(mo, fullname)), "pastedtext") %>% - select(mo_old, mo_new, fullname_new) - -mo_diff2 <- AMR::microorganisms %>% - select(mo, fullname) %>% - left_join(MOs %>% - select(mo, fullname), - by = "fullname", - suffix = c("_old", "_new")) %>% - filter(mo_old != mo_new, - #!mo_new %in% mo_old, - !mo_old %like% "\\[") - -mo_diff3 <- tibble(previous_old = names(AMR:::make_trans_tbl()), - previous_new = AMR:::make_trans_tbl()) %>% - left_join(AMR::microorganisms %>% select(mo, fullname), by = c(previous_new = "mo")) %>% - left_join(MOs %>% select(mo_new = mo, fullname), by = "fullname") - -# what did we win most? -MOs %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% freq(genus) -# what did we lose most? -AMR::microorganisms %>% - filter(kingdom != "Chromista" & !fullname %in% MOs$fullname & !fullname %in% MOs.old$fullname) %>% - freq(genus) - - -# save -saveRDS(MOs, "microorganisms.rds") -saveRDS(MOs.old, "microorganisms.old.rds") - -# on the server, do: -usethis::use_data(microorganisms, overwrite = TRUE, version = 2) -usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2) -rm(microorganisms) -rm(microorganisms.old) - -# TO DO AFTER THIS -# * Update the year and dim()s in R/data.R -# * Rerun data-raw/reproduction_of_rsi_translation.R -# * Run unit tests diff --git a/data/microorganisms.old.rda b/data/microorganisms.old.rda index 02aabf5a..f0910ad1 100644 Binary files a/data/microorganisms.old.rda and b/data/microorganisms.old.rda differ diff --git a/data/microorganisms.rda b/data/microorganisms.rda index cedc2d0d..db8f7a99 100644 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/docs/404.html b/docs/404.html index 44b54f27..40d22457 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 5aa5c5bf..070e9fa7 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 1d459a7a..fd671271 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -39,7 +39,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021 @@ -186,7 +186,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

25 May 2020

+

28 May 2020

Source: vignettes/AMR.Rmd @@ -195,7 +195,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 25 May 2020.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in R Markdown. However, the methodology remains unchanged. This page was generated on 28 May 2020.

Introduction

@@ -226,21 +226,21 @@ -2020-05-25 +2020-05-28 abcd Escherichia coli S S -2020-05-25 +2020-05-28 abcd Escherichia coli S R -2020-05-25 +2020-05-28 efgh Escherichia coli R @@ -336,71 +336,71 @@ -2015-03-17 -U1 -Hospital D -Escherichia coli -S -S -S -S -F - - -2017-08-02 -P6 +2015-07-23 +Z5 Hospital B Streptococcus pneumoniae -R +S S S S F - -2017-06-24 -E4 -Hospital C -Escherichia coli -S -S -S -S -M - -2011-02-12 -I10 -Hospital D -Streptococcus pneumoniae -R -S -S -R -M - - -2010-03-17 -Q3 +2017-01-21 +D6 Hospital C Staphylococcus aureus -R -S -S -S -F - - -2010-08-19 -A7 -Hospital D -Escherichia coli S S S S M + +2014-05-20 +Y10 +Hospital A +Escherichia coli +S +I +S +S +F + + +2017-11-02 +M10 +Hospital D +Staphylococcus aureus +S +S +R +S +M + + +2014-08-26 +B4 +Hospital C +Staphylococcus aureus +S +S +S +S +M + + +2013-05-29 +R3 +Hospital B +Escherichia coli +S +R +S +S +F +

Now, let’s start the cleaning and the analysis!

@@ -432,16 +432,16 @@ Longest: 1

1 M -10,403 -52.02% -10,403 -52.02% +10,518 +52.59% +10,518 +52.59% 2 F -9,597 -47.99% +9,482 +47.41% 20,000 100.00% @@ -481,7 +481,7 @@ Longest: 1

# NOTE: Using column `bacteria` as input for `col_mo`. # NOTE: Using column `date` as input for `col_date`. # NOTE: Using column `patient_id` as input for `col_patient_id`.
-

So only 28.2% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

+

So only 28.5% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>%
   filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

@@ -491,7 +491,7 @@ Longest: 1

First weighted isolates

-

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient K4, sorted on date:

+

We made a slight twist to the CLSI algorithm, to take into account the antimicrobial susceptibility profile. Have a look at all isolates of patient P10, sorted on date:

@@ -507,52 +507,52 @@ Longest: 1

- - + + - - + + - - + + - - + + - - + + - + - + - - + + + - - + - - + + @@ -562,62 +562,62 @@ Longest: 1

- - + + - + - - + + - - + + - - + + - + - - + + - - + + - - + + - +
isolate
12010-01-01K42010-01-22P10 B_ESCHR_COLIRRIS S S TRUE
22010-02-09K42010-02-24P10 B_ESCHR_COLI SIR SSR FALSE
32010-03-03K42010-03-10P10 B_ESCHR_COLIIS S RSR FALSE
42010-04-25K42010-03-25P10 B_ESCHR_COLIR S SSSR FALSE
52010-07-04K42010-04-30P10 B_ESCHR_COLI S S
62010-09-04K42010-05-05P10 B_ESCHR_COLI SI SR S FALSE
72010-10-01K42010-05-19P10 B_ESCHR_COLIIS R SSS FALSE
82011-03-20K42010-05-27P10 B_ESCHR_COLI S S S STRUEFALSE
92011-06-26K42010-10-01P10 B_ESCHR_COLI SRRSS S FALSE
102011-10-22K42010-11-19P10 B_ESCHR_COLI SSR S S FALSE
-

Only 2 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

+

Only 1 isolates are marked as ‘first’ according to CLSI guideline. But when reviewing the antibiogram, it is obvious that some isolates are absolutely different strains and should be included too. This is why we weigh isolates, based on their antibiogram. The key_antibiotics() function adds a vector with 18 key antibiotics: 6 broad spectrum ones, 6 small spectrum for Gram negatives and 6 small spectrum for Gram positives. These can be defined by the user.

If a column exists with a name like ‘key(…)ab’ the first_isolate() function will automatically use it and determine the first weighted isolates. Mind the NOTEs in below output:

data <- data %>%
   mutate(keyab = key_antibiotics(.)) %>%
@@ -643,11 +643,11 @@ Longest: 1

1 -2010-01-01 -K4 +2010-01-22 +P10 B_ESCHR_COLI -R -R +I +S S S TRUE @@ -655,32 +655,44 @@ Longest: 1

2 -2010-02-09 -K4 +2010-02-24 +P10 B_ESCHR_COLI S -I -R S +S +R FALSE TRUE 3 -2010-03-03 -K4 +2010-03-10 +P10 B_ESCHR_COLI -I +S S R -S -FALSE +R FALSE +TRUE 4 -2010-04-25 -K4 +2010-03-25 +P10 +B_ESCHR_COLI +R +S +S +R +FALSE +TRUE + + +5 +2010-04-30 +P10 B_ESCHR_COLI S S @@ -689,85 +701,73 @@ Longest: 1

FALSE TRUE - -5 -2010-07-04 -K4 -B_ESCHR_COLI -S -S -S -S -FALSE -FALSE - 6 -2010-09-04 -K4 +2010-05-05 +P10 B_ESCHR_COLI S -I S +R S FALSE -FALSE +TRUE 7 -2010-10-01 -K4 +2010-05-19 +P10 B_ESCHR_COLI -I -S R S +S +S FALSE TRUE 8 -2011-03-20 -K4 +2010-05-27 +P10 B_ESCHR_COLI S S S S -TRUE -TRUE - - -9 -2011-06-26 -K4 -B_ESCHR_COLI -S -R -R -S FALSE TRUE - -10 -2011-10-22 -K4 + +9 +2010-10-01 +P10 B_ESCHR_COLI S S S S FALSE +FALSE + + +10 +2010-11-19 +P10 +B_ESCHR_COLI +S +R +S +S +FALSE TRUE -

Instead of 2, now 7 isolates are flagged. In total, 78.4% of all isolates are marked ‘first weighted’ - 50.1% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 1, now 9 isolates are flagged. In total, 78.3% of all isolates are marked ‘first weighted’ - 49.8% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>%
   filter_first_weighted_isolate()
-

So we end up with 15,673 isolates for analysis.

+

So we end up with 15,654 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>%
   select(-c(first, keyab))
@@ -793,75 +793,27 @@ Longest: 1

1 -2015-03-17 -U1 -Hospital D -B_ESCHR_COLI -S +2015-07-23 +Z5 +Hospital B +B_STRPT_PNMN S S S +R F -Gram-negative -Escherichia -coli +Gram-positive +Streptococcus +pneumoniae TRUE 2 -2017-08-02 -P6 -Hospital B -B_STRPT_PNMN -R -R -S -R -F -Gram-positive -Streptococcus -pneumoniae -TRUE - - -4 -2011-02-12 -I10 -Hospital D -B_STRPT_PNMN -R -R -S -R -M -Gram-positive -Streptococcus -pneumoniae -TRUE - - -6 -2010-08-19 -A7 -Hospital D -B_ESCHR_COLI -S -S -S -S -M -Gram-negative -Escherichia -coli -TRUE - - -7 -2013-04-06 -H5 -Hospital A +2017-01-21 +D6 +Hospital C B_STPHY_AURS -R +S S S S @@ -871,20 +823,68 @@ Longest: 1

aureus TRUE - -8 -2013-12-11 -J8 -Hospital C -B_KLBSL_PNMN + +4 +2017-11-02 +M10 +Hospital D +B_STPHY_AURS +S +S R -I +S +M +Gram-positive +Staphylococcus +aureus +TRUE + + +5 +2014-08-26 +B4 +Hospital C +B_STPHY_AURS +S +S S S M +Gram-positive +Staphylococcus +aureus +TRUE + + +6 +2013-05-29 +R3 +Hospital B +B_ESCHR_COLI +S +R +S +S +F Gram-negative -Klebsiella -pneumoniae +Escherichia +coli +TRUE + + +7 +2013-12-10 +T3 +Hospital B +B_ESCHR_COLI +S +S +S +S +F +Gram-negative +Escherichia +coli TRUE @@ -906,8 +906,8 @@ Longest: 1

data_1st %>% freq(genus, species)

Frequency table

Class: character
-Length: 15,673
-Available: 15,673 (100%, NA: 0 = 0%)
+Length: 15,654
+Available: 15,654 (100%, NA: 0 = 0%)
Unique: 4

Shortest: 16
Longest: 24

@@ -924,33 +924,33 @@ Longest: 24

1 Escherichia coli -7,843 -50.04% -7,843 -50.04% +7,778 +49.69% +7,778 +49.69% 2 Staphylococcus aureus -3,949 -25.20% -11,792 -75.24% +3,999 +25.55% +11,777 +75.23% 3 Streptococcus pneumoniae -2,320 -14.80% -14,112 -90.04% +2,300 +14.69% +14,077 +89.93% 4 Klebsiella pneumoniae -1,561 -9.96% -15,673 +1,577 +10.07% +15,654 100.00% @@ -962,7 +962,7 @@ Longest: 24

The functions resistance() and susceptibility() can be used to calculate antimicrobial resistance or susceptibility. For more specific analyses, the functions proportion_S(), proportion_SI(), proportion_I(), proportion_IR() and proportion_R() can be used to determine the proportion of a specific antimicrobial outcome.

As per the EUCAST guideline of 2019, we calculate resistance as the proportion of R (proportion_R(), equal to resistance()) and susceptibility as the proportion of S and I (proportion_SI(), equal to susceptibility()). These functions can be used on their own:

data_1st %>% resistance(AMX)
-# [1] 0.441396
+# [1] 0.4435288

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>%
   group_by(hospital) %>%
@@ -975,19 +975,19 @@ Longest: 24

Hospital A -0.4339461 +0.4461239 Hospital B -0.4463033 +0.4404762 Hospital C -0.4511013 +0.4549763 Hospital D -0.4368288 +0.4366688 @@ -1005,23 +1005,23 @@ Longest: 24

Hospital A -0.4339461 -4678 +0.4461239 +4631 Hospital B -0.4463033 -5559 +0.4404762 +5544 Hospital C -0.4511013 -2270 +0.4549763 +2321 Hospital D -0.4368288 -3166 +0.4366688 +3158 @@ -1041,27 +1041,27 @@ Longest: 24

Escherichia -0.8236644 -0.8999107 -0.9832972 +0.8215480 +0.8976601 +0.9835433 Klebsiella -0.8315183 -0.8923767 -0.9820628 +0.8300571 +0.8972733 +0.9866836 Staphylococcus -0.8219802 -0.9161813 -0.9853127 +0.8202051 +0.9229807 +0.9832458 Streptococcus -0.6224138 +0.6204348 0.0000000 -0.6224138 +0.6204348 diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index c99032eb..6c7b7504 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index f0a36370..b603f607 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index bc55a496..fd031cf1 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index 50ac57dd..958d0f1a 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/MDR.html b/docs/articles/MDR.html index b0e346dc..cc06cb1f 100644 --- a/docs/articles/MDR.html +++ b/docs/articles/MDR.html @@ -39,7 +39,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021
@@ -186,7 +186,7 @@

How to determine multi-drug resistance (MDR)

Matthijs S. Berends

-

25 May 2020

+

28 May 2020

Source: vignettes/MDR.Rmd @@ -259,16 +259,16 @@ Unique: 2

1 Negative -1596 -93.28% -1596 -93.28% +1595 +93.22% +1595 +93.22% 2 Multi-drug-resistant (MDR) -115 -6.72% +116 +6.78% 1711 100.00% @@ -302,19 +302,19 @@ Unique: 2

The data set now looks like this:

head(my_TB_data)
 #   rifampicin isoniazid gatifloxacin ethambutol pyrazinamide moxifloxacin
-# 1          S         S            R          R            S            S
-# 2          R         R            S          R            S            R
-# 3          R         S            S          R            S            S
-# 4          R         S            S          S            S            R
-# 5          S         R            R          S            R            R
-# 6          R         R            R          R            R            R
+# 1          R         S            R          I            I            S
+# 2          S         R            S          I            R            I
+# 3          S         S            I          S            R            R
+# 4          I         R            S          S            I            S
+# 5          S         S            R          R            R            S
+# 6          I         R            R          R            R            S
 #   kanamycin
 # 1         R
 # 2         S
-# 3         S
+# 3         I
 # 4         S
 # 5         R
-# 6         R
+# 6 I

We can now add the interpretation of MDR-TB to our data set. You can use:

mdro(my_TB_data, guideline = "TB")

or its shortcut mdr_tb():

@@ -343,40 +343,40 @@ Unique: 5

1 Mono-resistant -3239 -64.78% -3239 -64.78% +3192 +63.84% +3192 +63.84% 2 Negative -655 -13.10% -3894 -77.88% +667 +13.34% +3859 +77.18% 3 Multi-drug-resistant -593 -11.86% -4487 -89.74% +637 +12.74% +4496 +89.92% 4 Poly-resistant -304 -6.08% -4791 -95.82% +287 +5.74% +4783 +95.66% 5 Extensively drug-resistant -209 -4.18% +217 +4.34% 5000 100.00% diff --git a/docs/articles/PCA.html b/docs/articles/PCA.html index 0eb49c22..e65fca7d 100644 --- a/docs/articles/PCA.html +++ b/docs/articles/PCA.html @@ -39,7 +39,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021 @@ -186,7 +186,7 @@

How to conduct principal component analysis (PCA) for AMR

Matthijs S. Berends

-

25 May 2020

+

28 May 2020

Source: vignettes/PCA.Rmd @@ -269,14 +269,14 @@ head(resistance_data) # # A tibble: 6 x 10 # # Groups: order [2] -# order genus AMC CXM CTX CAZ GEN TOB TMP SXT -# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -# 1 (unknown orde… Micrococcoides NA NA NA NA NA NA NA NA -# 2 Actinomycetal… Actinomyces NA NA NA NA NA NA NA NA -# 3 Actinomycetal… Corynebacterium NA NA NA NA NA NA NA NA -# 4 Actinomycetal… Dermabacter NA NA NA NA NA NA NA NA -# 5 Actinomycetal… Micrococcus NA NA NA NA NA NA NA NA -# 6 Actinomycetal… Propionibacter… NA NA NA NA NA NA NA NA +# order genus AMC CXM CTX CAZ GEN TOB TMP SXT +# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> +# 1 (unknown order) (unknown genu… NA NA NA NA NA NA NA NA +# 2 Actinomycetales Corynebacteri… NA NA NA NA NA NA NA NA +# 3 Actinomycetales Cutibacterium NA NA NA NA NA NA NA NA +# 4 Actinomycetales Dermabacter NA NA NA NA NA NA NA NA +# 5 Actinomycetales Micrococcus NA NA NA NA NA NA NA NA +# 6 Actinomycetales Rothia NA NA NA NA NA NA NA NA

@@ -288,11 +288,11 @@

The result can be reviewed with the good old summary() function:

summary(pca_result)
 # Importance of components:
-#                           PC1    PC2     PC3     PC4     PC5     PC6       PC7
-# Standard deviation     2.1580 1.6783 0.61282 0.33017 0.20150 0.03190 2.123e-16
-# Proportion of Variance 0.5821 0.3521 0.04694 0.01363 0.00508 0.00013 0.000e+00
-# Cumulative Proportion  0.5821 0.9342 0.98117 0.99480 0.99987 1.00000 1.000e+00
-

Good news. The first two components explain a total of 93.4% of the variance (see the PC1 and PC2 values of the Proportion of Variance. We can create a so-called biplot with the base R biplot() function, to see which antimicrobial resistance per drug explain the difference per microorganism.

+# PC1 PC2 PC3 PC4 PC5 PC6 PC7 +# Standard deviation 2.154 1.6809 0.61305 0.33882 0.20755 0.03137 1.602e-16 +# Proportion of Variance 0.580 0.3532 0.04698 0.01435 0.00538 0.00012 0.000e+00 +# Cumulative Proportion 0.580 0.9332 0.98014 0.99449 0.99988 1.00000 1.000e+00

+

Good news. The first two components explain a total of 93.3% of the variance (see the PC1 and PC2 values of the Proportion of Variance. We can create a so-called biplot with the base R biplot() function, to see which antimicrobial resistance per drug explain the difference per microorganism.

diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png index eaaafe4d..dd2f5d11 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png index db8eb744..111fcb5b 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png index 5fca6abd..077965a4 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 7cf15ca7..6485137b 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -39,7 +39,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021

@@ -186,7 +186,7 @@

Benchmarks

Matthijs S. Berends

-

25 May 2020

+

28 May 2020

Source: vignettes/benchmarks.Rmd @@ -221,21 +221,36 @@ times = 10) print(S.aureus, unit = "ms", signif = 2) # Unit: milliseconds -# expr min lq mean median uq max neval -# as.mo("sau") 9.7 11.0 15 11.0 12.0 49 10 -# as.mo("stau") 130.0 150.0 180 170.0 190.0 270 10 -# as.mo("STAU") 130.0 130.0 140 140.0 150.0 170 10 -# as.mo("staaur") 7.9 9.6 15 11.0 12.0 40 10 -# as.mo("STAAUR") 9.1 9.4 16 11.0 12.0 41 10 -# as.mo("S. aureus") 8.5 11.0 12 12.0 13.0 16 10 -# as.mo("S aureus") 8.4 11.0 17 12.0 14.0 47 10 -# as.mo("Staphylococcus aureus") 6.4 8.6 12 8.8 9.8 40 10 -# as.mo("Staphylococcus aureus (MRSA)") 830.0 860.0 910 900.0 930.0 1100 10 -# as.mo("Sthafilokkockus aaureuz") 370.0 390.0 410 400.0 430.0 440 10 -# as.mo("MRSA") 9.2 9.5 16 11.0 11.0 63 10 -# as.mo("VISA") 22.0 22.0 36 26.0 54.0 59 10 -# as.mo("VRSA") 21.0 23.0 48 27.0 55.0 180 10 -# as.mo(22242419) 150.0 160.0 160 160.0 170.0 190 10 +# expr min lq mean median uq max +# as.mo("sau") 8.2 9.7 22.0 14.0 38.0 43 +# as.mo("stau") 130.0 130.0 160.0 170.0 170.0 190 +# as.mo("STAU") 120.0 130.0 150.0 150.0 180.0 190 +# as.mo("staaur") 8.3 9.1 13.0 9.4 10.0 40 +# as.mo("STAAUR") 8.0 9.3 15.0 10.0 13.0 35 +# as.mo("S. aureus") 9.3 11.0 26.0 14.0 15.0 120 +# as.mo("S aureus") 10.0 12.0 22.0 13.0 38.0 49 +# as.mo("Staphylococcus aureus") 6.6 7.7 8.5 8.6 9.1 10 +# as.mo("Staphylococcus aureus (MRSA)") 820.0 860.0 890.0 880.0 930.0 1000 +# as.mo("Sthafilokkockus aaureuz") 350.0 350.0 370.0 370.0 370.0 380 +# as.mo("MRSA") 7.8 9.1 13.0 10.0 11.0 42 +# as.mo("VISA") 11.0 12.0 19.0 13.0 15.0 47 +# as.mo("VRSA") 11.0 12.0 21.0 14.0 39.0 42 +# as.mo(22242419) 130.0 140.0 150.0 140.0 150.0 190 +# neval +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second.

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside of this is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Methanosarcina semesiae (B_MTHNSR_SEMS), a bug probably never found before in humans:

@@ -247,12 +262,12 @@ times = 10) print(M.semesiae, unit = "ms", signif = 4) # Unit: milliseconds -# expr min lq mean median uq max -# as.mo("metsem") 152.700 163.100 172.10 171.40 176.20 213.80 -# as.mo("METSEM") 148.300 167.900 181.50 185.30 196.70 204.60 -# as.mo("M. semesiae") 8.610 9.468 13.88 10.05 15.28 35.20 -# as.mo("M. semesiae") 9.164 9.530 21.29 11.58 42.98 50.57 -# as.mo("Methanosarcina semesiae") 6.625 7.229 14.25 7.98 11.32 42.18 +# expr min lq mean median uq max +# as.mo("metsem") 135.600 143.500 157.800 153.400 175.70 191.40 +# as.mo("METSEM") 139.700 140.800 158.100 148.300 176.10 188.50 +# as.mo("M. semesiae") 9.010 9.317 13.280 9.802 12.51 40.46 +# as.mo("M. semesiae") 9.155 9.321 9.665 9.557 10.07 10.37 +# as.mo("Methanosarcina semesiae") 6.737 7.160 16.020 8.413 33.21 37.10 # neval # 10 # 10 @@ -292,8 +307,8 @@ print(run_it, unit = "ms", signif = 3) # Unit: milliseconds # expr min lq mean median uq max neval -# mo_name(x) 1700 1760 1780 1770 1800 1880 10 -

So transforming 500,000 values (!!) of 50 unique values only takes 1.77 seconds. You only lose time on your unique input values.

+# mo_name(x) 1670 1740 1800 1790 1880 1900 10 +

So transforming 500,000 values (!!) of 50 unique values only takes 1.79 seconds. You only lose time on your unique input values.

@@ -305,10 +320,10 @@ times = 10) print(run_it, unit = "ms", signif = 3) # Unit: milliseconds -# expr min lq mean median uq max neval -# A 5.640 5.890 10.300 6.580 6.850 44.400 10 -# B 11.000 11.300 11.500 11.400 11.500 12.400 10 -# C 0.217 0.238 0.271 0.267 0.298 0.383 10

+# expr min lq mean median uq max neval +# A 5.630 5.900 6.420 6.480 6.910 7.120 10 +# B 9.940 11.300 15.000 11.800 12.300 45.300 10 +# C 0.247 0.277 0.315 0.295 0.358 0.386 10

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0003 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
@@ -322,14 +337,14 @@
 print(run_it, unit = "ms", signif = 3)
 # Unit: milliseconds
 #  expr   min    lq  mean median    uq   max neval
-#     A 0.205 0.208 0.230  0.219 0.240 0.307    10
-#     B 0.201 0.216 0.227  0.223 0.229 0.293    10
-#     C 0.209 0.210 0.224  0.218 0.241 0.255    10
-#     D 0.200 0.211 0.222  0.217 0.224 0.279    10
-#     E 0.199 0.209 0.220  0.211 0.219 0.280    10
-#     F 0.201 0.205 0.228  0.212 0.217 0.346    10
-#     G 0.193 0.208 0.218  0.217 0.220 0.272    10
-#     H 0.190 0.195 0.206  0.200 0.203 0.263    10
+# A 0.207 0.214 0.241 0.218 0.240 0.412 10 +# B 0.178 0.207 0.215 0.211 0.219 0.272 10 +# C 0.218 0.221 0.234 0.233 0.240 0.264 10 +# D 0.203 0.211 0.221 0.214 0.218 0.290 10 +# E 0.208 0.209 0.223 0.214 0.227 0.279 10 +# F 0.172 0.203 0.222 0.212 0.222 0.302 10 +# G 0.200 0.202 0.212 0.208 0.213 0.253 10 +# H 0.193 0.194 0.208 0.200 0.215 0.261 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -356,13 +371,13 @@ print(run_it, unit = "ms", signif = 4) # Unit: milliseconds # expr min lq mean median uq max neval -# en 20.45 21.03 25.78 21.63 27.29 71.19 100 -# de 21.48 22.06 28.50 22.81 28.56 73.54 100 -# nl 25.11 26.21 33.48 27.31 35.01 81.28 100 -# es 21.59 22.20 29.02 23.05 30.33 73.83 100 -# it 21.46 22.05 28.45 22.79 29.45 64.36 100 -# fr 21.47 22.12 31.87 22.83 31.24 174.10 100 -# pt 21.53 22.22 27.19 22.91 25.67 71.87 100
+# en 11.24 11.97 16.09 13.31 13.71 48.40 100 +# de 12.18 12.84 19.63 14.33 15.00 55.14 100 +# nl 16.24 17.07 24.36 18.61 19.83 55.70 100 +# es 12.23 12.77 17.38 13.71 14.72 51.57 100 +# it 12.15 13.01 18.55 14.03 14.81 146.90 100 +# fr 12.23 13.08 16.81 14.30 14.86 49.30 100 +# pt 12.30 12.85 18.13 14.24 14.88 49.70 100

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png index 3548c4a0..78c0b610 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png index 3bb56f1d..c48d8145 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index dae70693..77f6f0c5 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html index 35e84e5d..234dd590 100644 --- a/docs/articles/resistance_predict.html +++ b/docs/articles/resistance_predict.html @@ -39,7 +39,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021 @@ -186,7 +186,7 @@

How to predict antimicrobial resistance

Matthijs S. Berends

-

25 May 2020

+

28 May 2020

Source: vignettes/resistance_predict.Rmd diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png index f8a6b159..4439d9bf 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png index abe8c085..7960642c 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 6cdb4dca..94e69a1f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 diff --git a/docs/countries.png b/docs/countries.png index 3429e4ca..445ee879 100644 Binary files a/docs/countries.png and b/docs/countries.png differ diff --git a/docs/countries_large.png b/docs/countries_large.png index db9aff9e..edf17cf7 100644 Binary files a/docs/countries_large.png and b/docs/countries_large.png differ diff --git a/docs/index.html b/docs/index.html index c0528341..5070eb9c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 @@ -200,7 +200,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/

(To find out how to conduct AMR analysis, please continue reading here to get started.)

AMR is a free, open-source and independent R package to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. Our aim is to provide a standard for clean and reproducible antimicrobial resistance data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting.

After installing this package, R knows ~70,000 distinct microbial species and all ~550 antibiotic, antimycotic and antiviral drugs by name and code (including ATC, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data.

-

This package was created for both routine data analysis and academic research, at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology & Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is actively maintained and is free software (see Copyright). It is fully independent of any other R package, can be used with all versions of R since R-3.0.0 (April 2013) and has a total file size of only 5 MB. It was designed to work in any setting, including those with very limited resources.

+

This package was created for both routine data analysis and academic research, at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology & Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is actively maintained and is free software (see Copyright). It is fully independent of any other R package, works with all versions of R since R-3.0.0 (April 2013) and has a total file size of only 5 MB. It was designed to work in any setting, including those with very limited resources.

Used in more than 100 countries
Since its first public release in early 2018, this package has been downloaded from more than 100 countries (source: CRAN logs). Click the map to enlarge, to see the names of the countries. diff --git a/docs/news/index.html b/docs/news/index.html index 19a5bd93..619e5378 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021

@@ -229,13 +229,13 @@ Source: NEWS.md -
-

-AMR 1.1.0.9020 Unreleased +
+

+AMR 1.1.0.9021 Unreleased

-
+

-Last updated: 27-May-2020 +Last updated: 28-May-2020

@@ -260,7 +260,7 @@ Negative effects of this change are:
  • Taxonomy:
      -
    • Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)
    • +
    • Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020). Note: a taxonomic update may always impact determination of first isolates (using first_isolate()), since some bacterial names might be renamed to other genera or other (sub)species. This is expected behaviour.
    • Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 2c7066e3..0ea0b550 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -10,7 +10,7 @@ articles: WHONET: WHONET.html benchmarks: benchmarks.html resistance_predict: resistance_predict.html -last_built: 2020-05-27T14:37Z +last_built: 2020-05-28T08:48Z urls: reference: https://msberends.gitlab.io/AMR/reference article: https://msberends.gitlab.io/AMR/articles diff --git a/docs/reference/ab_property.html b/docs/reference/ab_property.html index d2d79529..2c263ed7 100644 --- a/docs/reference/ab_property.html +++ b/docs/reference/ab_property.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9019 + 1.1.0.9021

diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 834dec24..7e05b9d6 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021
diff --git a/docs/reference/catalogue_of_life.html b/docs/reference/catalogue_of_life.html index e58aed9e..5b70de3b 100644 --- a/docs/reference/catalogue_of_life.html +++ b/docs/reference/catalogue_of_life.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021
diff --git a/docs/reference/index.html b/docs/reference/index.html index df855040..dba6713c 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021
@@ -472,7 +472,7 @@

microorganisms

-

Data set with 67,107 microorganisms

+

Data set with 67,108 microorganisms

diff --git a/docs/reference/microorganisms.codes.html b/docs/reference/microorganisms.codes.html index b595f329..4dca81bf 100644 --- a/docs/reference/microorganisms.codes.html +++ b/docs/reference/microorganisms.codes.html @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021

diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 2439de91..ffe572ef 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -6,7 +6,7 @@ -Data set with 67,107 microorganisms — microorganisms • AMR (for R) +Data set with 67,108 microorganisms — microorganisms • AMR (for R) @@ -48,7 +48,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.1.0.9020 + 1.1.0.9021 @@ -226,7 +226,7 @@
@@ -240,7 +240,7 @@

Format

-

A data.frame with 67,107 observations and 16 variables:

@@ -240,7 +240,7 @@

Format

-

A data.frame with 12,709 observations and 4 variables:

diff --git a/docs/reference/mo_source.html b/docs/reference/mo_source.html index 075dfc5d..fa90545c 100644 --- a/docs/reference/mo_source.html +++ b/docs/reference/mo_source.html @@ -83,7 +83,7 @@ This is the fastest way to have your organisation (or analysis) specific codes p AMR (for R) - 1.1.0.9019 + 1.1.0.9021 diff --git a/index.md b/index.md index f63bb4c0..60f25924 100644 --- a/index.md +++ b/index.md @@ -11,7 +11,7 @@ After installing this package, R knows [**~70,000 distinct microbial species**](./reference/microorganisms.html) and all [**~550 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. It supports any data format, including WHONET/EARS-Net data. -This package was created for both routine data analysis and academic research, at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology & Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is [actively maintained](./news) and is free software (see [Copyright](#copyright)). It is fully independent of any other R package, can be used with all versions of R since R-3.0.0 (April 2013) and has a total file size of only 5 MB. It was designed to work in any setting, including those with very limited resources. +This package was created for both routine data analysis and academic research, at the Faculty of Medical Sciences of the University of Groningen, the Netherlands, and the Medical Microbiology & Infection Prevention (MMBI) department of the University Medical Center Groningen (UMCG). This R package is [actively maintained](./news) and is free software (see [Copyright](#copyright)). It is fully independent of any other R package, works with all versions of R since R-3.0.0 (April 2013) and has a total file size of only 5 MB. It was designed to work in any setting, including those with very limited resources.

diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 690c06c1..84e5dc4c 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -3,9 +3,9 @@ \docType{data} \name{microorganisms} \alias{microorganisms} -\title{Data set with 67,107 microorganisms} +\title{Data set with 67,108 microorganisms} \format{ -A \code{\link{data.frame}} with 67,107 observations and 16 variables: +A \code{\link{data.frame}} with 67,108 observations and 16 variables: \itemize{ \item \code{mo}\cr ID of microorganism as used by this package \item \code{fullname}\cr Full name, like \code{"Escherichia coli"} @@ -40,7 +40,7 @@ Manually added were: \item 1 entry of \emph{Blastocystis} (\emph{Blastocystis hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993) \item 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus) \item 6 families under the Enterobacterales order, according to Adeolu \emph{et al.} (2016, PMID 27620848), that are not (yet) in the Catalogue of Life -\item 7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications +\item 7,369 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications } \subsection{Direct download}{ diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd index 01bb7979..f62305fd 100644 --- a/man/microorganisms.old.Rd +++ b/man/microorganisms.old.Rd @@ -5,7 +5,7 @@ \alias{microorganisms.old} \title{Data set with previously accepted taxonomic names} \format{ -A \code{\link{data.frame}} with 12,709 observations and 4 variables: +A \code{\link{data.frame}} with 12,708 observations and 4 variables: \itemize{ \item \code{fullname}\cr Old full taxonomic name of the microorganism \item \code{fullname_new}\cr New full taxonomic name of the microorganism diff --git a/pkgdown/logos/countries.png b/pkgdown/logos/countries.png index 3429e4ca..445ee879 100644 Binary files a/pkgdown/logos/countries.png and b/pkgdown/logos/countries.png differ diff --git a/pkgdown/logos/countries_large.png b/pkgdown/logos/countries_large.png index db9aff9e..edf17cf7 100644 Binary files a/pkgdown/logos/countries_large.png and b/pkgdown/logos/countries_large.png differ diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index edb67752..99b08efe 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -34,7 +34,7 @@ test_that("first isolates work", { col_mo = "mo", info = TRUE), na.rm = TRUE), - 1317) + 1300) # first weighted isolates ex_iso_with_keyab <- example_isolates @@ -47,7 +47,7 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1413) + 1396) # when not ignoring I expect_equal( @@ -62,7 +62,7 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1436) + 1419) # when using points expect_equal( suppressWarnings( @@ -75,7 +75,7 @@ test_that("first isolates work", { type = "points", info = TRUE), na.rm = TRUE)), - 1417) + 1400) # first non-ICU isolates expect_equal( @@ -88,7 +88,7 @@ test_that("first isolates work", { info = TRUE, icu_exclude = TRUE), na.rm = TRUE), - 891) + 881) # set 1500 random observations to be of specimen type 'Urine' random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE) @@ -175,19 +175,19 @@ test_that("first isolates work", { col_mo = "mo", info = TRUE), na.rm = TRUE), - 1322) + 1305) # unknown MOs test_unknown <- example_isolates test_unknown$mo <- ifelse(test_unknown$mo == "B_ESCHR_COLI", "UNKNOWN", test_unknown$mo) expect_equal(sum(first_isolate(test_unknown, include_unknown = FALSE)), - 1062) + 1045) expect_equal(sum(first_isolate(test_unknown, include_unknown = TRUE)), - 1529) + 1528) test_unknown$mo <- ifelse(test_unknown$mo == "UNKNOWN", NA, test_unknown$mo) expect_equal(sum(first_isolate(test_unknown)), - 1062) + 1045) # shortcuts expect_identical(filter_first_isolate(example_isolates),