diff --git a/NEWS b/NEWS index ca90d540..f4452ba6 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,11 @@ ## 0.1.2 -- Added full support for Windows, Linux and macOS; this package now works everywhere :) -- New function `guess_bactid` to determine the ID of a microorganism based on genus/species +- Added full support for Windows, Linux and macOS +- New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA - New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate` - Renamed dataset `ablist` to `antibiotics` - Renamed dataset `bactlist` to `microorganisms` +- Added more microorganisms to `bactlist` - Added analysis examples on help page of dataset `septic_patients` - Added support for character vector in join functions - Added warnings when applying a join results in more rows after than before the join diff --git a/R/atc.R b/R/atc.R index bcc4bff8..cdfe4386 100644 --- a/R/atc.R +++ b/R/atc.R @@ -54,7 +54,7 @@ #' @importFrom xml2 read_html #' @importFrom rvest html_nodes html_table #' @source \url{https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/} -#' @examples +#' @examples #' \donttest{ #' atc_property("J01CA04", "DDD", "O") # oral DDD (Defined Daily Dose) of amoxicillin #' atc_property("J01CA04", "DDD", "P") # parenteral DDD (Defined Daily Dose) of amoxicillin @@ -63,50 +63,50 @@ atc_property <- function(atc_code, property, administration = 'O', url = 'https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no') { - + # property <- property %>% tolower() # if (property %like% 'unit') { property <- 'U' } - + # validation of properties valid_properties.bak <- c("ATC code", "Name", "DDD", "U", "Adm.R", "Note") valid_properties <- valid_properties.bak #%>% tolower() if (!property %in% valid_properties) { stop('Invalid `property`, use one of ', paste(valid_properties, collapse = ", "), '.') } - + returnvalue <- rep(NA_character_, length(atc_code)) if (property == 'DDD') { returnvalue <- rep(NA_real_, length(atc_code)) } - + progress <- progress_estimated(n = length(atc_code)) - + for (i in 1:length(atc_code)) { - + progress$tick()$print() - + atc_url <- sub('%s', atc_code[i], url, fixed = TRUE) tbl <- xml2::read_html(atc_url) %>% rvest::html_nodes('table') %>% rvest::html_table(header = TRUE) - + if (length(tbl) == 0) { warning('ATC not found: ', atc_code[i], '. Please check ', atc_url, '.', call. = FALSE) returnvalue[i] <- NA next } - + tbl <- tbl[[1]] - + if (property == 'Name') { returnvalue[i] <- tbl[1, 2] } else { - + names(returnvalue)[i] <- tbl[1, 2] %>% as.character() - + if (!'Adm.R' %in% colnames(tbl) | is.na(tbl[1, 'Adm.R'])) { returnvalue[i] <- NA next @@ -119,10 +119,10 @@ atc_property <- function(atc_code, } } } - + cat('\n') returnvalue - + } #' Name of an antibiotic @@ -135,7 +135,7 @@ atc_property <- function(atc_code, #' @keywords ab antibiotics #' @source \code{\link{antibiotics}} #' @export -#' @importFrom dplyr %>% filter select slice +#' @importFrom dplyr %>% filter select slice #' @examples #' abname("AMCL") #' # "amoxicillin and enzyme inhibitor" @@ -144,7 +144,7 @@ atc_property <- function(atc_code, #' # "amoxicillin and enzyme inhibitor + gentamicin" #' #' abname(c("AMCL", "GENT")) -#' # "amoxicillin and enzyme inhibitor" "gentamicin" +#' # "amoxicillin and enzyme inhibitor" "gentamicin" #' #' abname("AMCL", to = "trivial_nl") #' # "Amoxicilline/clavulaanzuur" @@ -155,9 +155,9 @@ atc_property <- function(atc_code, #' abname("J01CR02", from = "atc", to = "umcg") #' # "AMCL" abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'official', textbetween = ' + ', tolower = FALSE) { - + antibiotics <- AMR::antibiotics - + from <- from[1] if (from == "guess") { for (i in 1:3) { @@ -169,19 +169,19 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi from <- "umcg" } } - + colnames(antibiotics) <- colnames(antibiotics) %>% tolower() from <- from %>% tolower() to <- to %>% tolower() - + if (!from %in% colnames(antibiotics) | !to %in% colnames(antibiotics)) { - stop(paste0('Invalid `from` or `to`. Choose one of ', + stop(paste0('Invalid `from` or `to`. Choose one of ', colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE) } - + abcode <- as.character(abcode) - + for (i in 1:length(abcode)) { drug <- abcode[i] if (!grepl('+', drug, fixed = TRUE) & !grepl(' en ', drug, fixed = TRUE)) { @@ -215,7 +215,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi abcode[i] <- NA next } - + for (j in 1:length(drug.group)) { drug.group[j] <- antibiotics %>% @@ -230,10 +230,140 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi abcode[i] <- paste(drug.group, collapse = textbetween) } } - + if (tolower == TRUE) { abcode <- abcode %>% tolower() } - + abcode } + +#' Find bacteria ID based on genus/species +#' +#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe. +#' @param x character vector to determine \code{bactid} +#' @export +#' @importFrom dplyr %>% filter slice pull +#' @return Character (vector). +#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. +#' @examples +#' # These examples all return "STAAUR", the ID of S. aureus: +#' guess_bactid("stau") +#' guess_bactid("STAU") +#' guess_bactid("staaur") +#' guess_bactid("S. aureus") +#' guess_bactid("S aureus") +#' guess_bactid("Staphylococcus aureus") +#' guess_bactid("MRSA") # Methicillin-resistant S. aureus +#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus +guess_bactid <- function(x) { + # remove dots and other non-text in case of "E. coli" except spaces + x <- gsub("[^a-zA-Z ]+", "", x) + # but spaces before and after should be omitted + x <- trimws(x, which = "both") + x.bak <- x + # replace space by regex sign + x <- gsub(" ", ".*", x, fixed = TRUE) + # add start and stop + x_species <- paste(x, 'species') + x <- paste0('^', x, '$') + + for (i in 1:length(x)) { + if (tolower(x[i]) == '^e.*coli$') { + # avoid detection of Entamoeba coli in case of E. coli + x[i] <- 'Escherichia coli' + } + if (tolower(x[i]) == '^h.*influenzae$') { + # avoid detection of Haematobacter influenzae in case of H. influenzae + x[i] <- 'Haemophilus influenzae' + } + if (tolower(x[i]) == '^st.*au$' + | tolower(x[i]) == '^stau$' + | tolower(x[i]) == '^staaur$') { + # avoid detection of Staphylococcus auricularis in case of S. aureus + x[i] <- 'Staphylococcus aureus' + } + if (tolower(x[i]) == '^p.*aer$') { + # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa + x[i] <- 'Pseudomonas aeruginosa' + } + + # translate known trivial names to genus+species + if (toupper(x.bak[i]) == 'MRSA' + | toupper(x.bak[i]) == 'VISA' + | toupper(x.bak[i]) == 'VRSA') { + x[i] <- 'Staphylococcus aureus' + } + if (toupper(x.bak[i]) == 'MRSE') { + x[i] <- 'Staphylococcus epidermidis' + } + if (toupper(x.bak[i]) == 'VRE') { + x[i] <- 'Enterococcus' + } + if (toupper(x.bak[i]) == 'MRPA') { + # multi resistant P. aeruginosa + x[i] <- 'Pseudomonas aeruginosa' + } + if (toupper(x.bak[i]) == 'PISP' + | toupper(x.bak[i]) == 'PRSP') { + # peni resistant S. pneumoniae + x[i] <- 'Streptococcus pneumoniae' + } + if (toupper(x.bak[i]) == 'VISP' + | toupper(x.bak[i]) == 'VRSP') { + # vanco resistant S. pneumoniae + x[i] <- 'Streptococcus pneumoniae' + } + + # let's try the ID's first + found <- AMR::microorganisms %>% filter(bactid == x.bak[i]) + + if (nrow(found) == 0) { + # now try exact match + found <- AMR::microorganisms %>% filter(fullname == x[i]) + } + if (nrow(found) == 0) { + # try any match + found <- AMR::microorganisms %>% filter(fullname %like% x[i]) + } + if (nrow(found) == 0) { + # try only genus, with 'species' attached + found <- AMR::microorganisms %>% filter(fullname %like% x_species[i]) + } + if (nrow(found) == 0) { + # search for GLIMS code + if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) { + found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i])) + } + } + if (nrow(found) == 0) { + # try splitting of characters and then find ID + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus + x_split <- x + x_length <- nchar(x.bak[i]) + x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i])) + } + if (nrow(found) == 0) { + # try any match with text before and after original search string + # so "negative rods" will be "GNR" + if (x.bak[i] %like% "^Gram") { + x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE) + # remove leading and trailing spaces again + x.bak[i] <- trimws(x.bak[i], which = "both") + } + found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i]) + } + + if (nrow(found) != 0) { + x[i] <- found %>% + slice(1) %>% + pull(bactid) + } else { + x[i] <- "" + } + } + x +} diff --git a/R/classes.R b/R/classes.R index eb806a45..feed7b1d 100644 --- a/R/classes.R +++ b/R/classes.R @@ -361,26 +361,19 @@ print.mic <- function(x, ...) { #' @exportMethod summary.mic #' @export -#' @importFrom dplyr %>% tibble group_by summarise pull +#' @importFrom dplyr %>% #' @noRd summary.mic <- function(object, ...) { x <- object n_total <- x %>% length() x <- x[!is.na(x)] n <- x %>% length() - return(c("Mode" = 'mic', - "" = n_total - n, - "Min." = sort(x)[1] %>% as.character(), - "Max." = sort(x)[n] %>% as.character() - )) - cat("Class 'mic': ", n, " isolates\n", sep = '') - cat('\n') - cat(' ', n_total - n, '\n') - cat('\n') - tbl <- tibble(x = x, y = 1) %>% group_by(x) %>% summarise(y = sum(y)) - cnt <- tbl %>% pull(y) - names(cnt) <- tbl %>% pull(x) - print(cnt) + lst <- c('mic', + n_total - n, + sort(x)[1] %>% as.character(), + sort(x)[n] %>% as.character()) + names(lst) <- c("Mode", "", "Min.", "Max.") + lst } #' @exportMethod plot.mic diff --git a/R/clipboard.R b/R/clipboard.R index e41c2842..03ec6f43 100644 --- a/R/clipboard.R +++ b/R/clipboard.R @@ -1,17 +1,35 @@ #' Import/export from clipboard #' -#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. +#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example. #' @rdname clipboard #' @name clipboard #' @inheritParams utils::read.table #' @inheritParams utils::write.table -#' @param startrow \emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header. +#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header. #' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header. #' @param info print info about copying #' @keywords clipboard clipboard_import clipboard_export import export #' @importFrom dplyr %>% pull as_tibble #' @importFrom utils read.delim write.table object.size -#' @details For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM. +#' @details For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM. +#' +#' Example for copying from Excel: +#' \if{html}{ +#' \out{
}\figure{Excel_copy.png}\out{
} +#' } +#' \if{latex}{ +#' \out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}} +#' } +#' \cr +#' And pasting in R: \cr \cr +#' \code{> data <- clipboard_import()} \cr +#' \code{> data} \cr +#' \if{html}{ +#' \out{
}\figure{Excel_paste.png}\out{
} +#' } +#' \if{latex}{ +#' \out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}} +#' } #' @export #' @return data.frame clipboard_import <- function(sep = '\t', diff --git a/R/first_isolates.R b/R/first_isolates.R index 54a2ca5e..862b9800 100644 --- a/R/first_isolates.R +++ b/R/first_isolates.R @@ -610,105 +610,3 @@ key_antibiotics_equal <- function(x, } result } - -#' Find bacteria ID based on genus/species -#' -#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe. -#' @param x character vector to determine \code{bactid} -#' @export -#' @importFrom dplyr %>% filter slice pull -#' @return Character (vector). -#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. -#' @examples -#' # These examples all return "STAAUR", the ID of S. aureus: -#' guess_bactid("stau") -#' guess_bactid("STAU") -#' guess_bactid("staaur") -#' guess_bactid("S. aureus") -#' guess_bactid("S aureus") -#' guess_bactid("Staphylococcus aureus") -#' guess_bactid("MRSA") # Methicillin-resistant S. aureus -#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus -guess_bactid <- function(x) { - # remove dots and other non-text in case of "E. coli" except spaces - x <- gsub("[^a-zA-Z ]+", "", x) - x.bak <- x - # replace space by regex sign - x <- gsub(" ", ".*", x, fixed = TRUE) - # add start and stop - x_species <- paste(x, 'species') - x <- paste0('^', x, '$') - - for (i in 1:length(x)) { - if (tolower(x[i]) == '^e.*coli$') { - # avoid detection of Entamoeba coli in case of E. coli - x[i] <- 'Escherichia coli' - } - if (tolower(x[i]) == '^h.*influenzae$') { - # avoid detection of Haematobacter influenzae in case of H. influenzae - x[i] <- 'Haemophilus influenzae' - } - if (tolower(x[i]) == '^st.*au$' - | tolower(x[i]) == '^stau$' - | tolower(x[i]) == '^staaur$') { - # avoid detection of Staphylococcus auricularis in case of S. aureus - x[i] <- 'Staphylococcus aureus' - } - if (tolower(x[i]) == '^p.*aer$') { - # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa - x[i] <- 'Pseudomonas aeruginosa' - } - # translate known trivial names to genus+species - if (toupper(x.bak[i]) == 'MRSA' - | toupper(x.bak[i]) == 'VISA' - | toupper(x.bak[i]) == 'VRSA') { - x[i] <- 'Staphylococcus aureus' - } - if (toupper(x.bak[i]) == 'MRSE') { - x[i] <- 'Staphylococcus epidermidis' - } - if (toupper(x.bak[i]) == 'VRE') { - x[i] <- 'Enterococcus' - } - - # let's try the ID's first - found <- AMR::microorganisms %>% filter(bactid == x.bak[i]) - - if (nrow(found) == 0) { - # now try exact match - found <- AMR::microorganisms %>% filter(fullname == x[i]) - } - if (nrow(found) == 0) { - # try any match - found <- AMR::microorganisms %>% filter(fullname %like% x[i]) - } - if (nrow(found) == 0) { - # try only genus, with 'species' attached - found <- AMR::microorganisms %>% filter(fullname %like% x_species[i]) - } - if (nrow(found) == 0) { - # search for GLIMS code - if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) { - found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i])) - } - } - if (nrow(found) == 0) { - # try splitting of characters and then find ID - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_length <- nchar(x.bak[i]) - x[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x[i])) - } - - if (nrow(found) != 0) { - x[i] <- found %>% - slice(1) %>% - pull(bactid) - } else { - x[i] <- "" - } - } - x -} diff --git a/data/microorganisms.rda b/data/microorganisms.rda index 9db19073..76cf6470 100644 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/man/abname.Rd b/man/abname.Rd index 9ea36200..6163e2ca 100644 --- a/man/abname.Rd +++ b/man/abname.Rd @@ -30,7 +30,7 @@ abname("AMCL+GENT") # "amoxicillin and enzyme inhibitor + gentamicin" abname(c("AMCL", "GENT")) -# "amoxicillin and enzyme inhibitor" "gentamicin" +# "amoxicillin and enzyme inhibitor" "gentamicin" abname("AMCL", to = "trivial_nl") # "Amoxicilline/clavulaanzuur" diff --git a/man/clipboard.Rd b/man/clipboard.Rd index 387a1a3e..347da4f0 100644 --- a/man/clipboard.Rd +++ b/man/clipboard.Rd @@ -28,7 +28,7 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE, \item{na}{the string to use for missing values in the data.} -\item{startrow}{\emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header.} +\item{startrow}{\emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.} \item{as_vector}{a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.} @@ -41,10 +41,28 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE, data.frame } \description{ -These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. +These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example. } \details{ -For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM. +For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM. + + Example for copying from Excel: + \if{html}{ + \out{
}\figure{Excel_copy.png}\out{
} + } + \if{latex}{ + \out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}} + } + \cr + And pasting in R: \cr \cr + \code{> data <- clipboard_import()} \cr + \code{> data} \cr + \if{html}{ + \out{
}\figure{Excel_paste.png}\out{
} + } + \if{latex}{ + \out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}} + } } \keyword{clipboard} \keyword{clipboard_export} diff --git a/man/figures/Excel_copy.png b/man/figures/Excel_copy.png new file mode 100644 index 00000000..8f2ed9cd Binary files /dev/null and b/man/figures/Excel_copy.png differ diff --git a/man/figures/Excel_paste.png b/man/figures/Excel_paste.png new file mode 100644 index 00000000..802e040f Binary files /dev/null and b/man/figures/Excel_paste.png differ diff --git a/man/guess_bactid.Rd b/man/guess_bactid.Rd index 32bbbb27..9dc99719 100644 --- a/man/guess_bactid.Rd +++ b/man/guess_bactid.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/first_isolates.R +% Please edit documentation in R/atc.R \name{guess_bactid} \alias{guess_bactid} \title{Find bacteria ID based on genus/species} diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index 06183f61..2946446e 100644 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -14,3 +14,18 @@ test_that("abname works", { expect_equal(abname("amox", from = 'molis'), "Amoxicillin") expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin") }) + +test_that("guess_bactid works", { + expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF")) + expect_equal(guess_bactid("Escherichia coli"), "ESCCOL") + expect_equal(guess_bactid("Negative rods"), "GNR") + expect_equal(guess_bactid(c("stau", + "STAU", + "staaur", + "S. aureus", + "S aureus", + "Staphylococcus aureus", + "MRSA", + "VISA")), + rep("STAAUR", 8)) +}) diff --git a/tests/testthat/test-first_isolates.R b/tests/testthat/test-first_isolates.R index ce85cd6b..6a1627e1 100644 --- a/tests/testthat/test-first_isolates.R +++ b/tests/testthat/test-first_isolates.R @@ -7,11 +7,6 @@ test_that("keyantibiotics work", { expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE)) }) -test_that("guess_bactid works", { - expect_equal(guess_bactid("E. coli"), "ESCCOL") - expect_equal(guess_bactid("Escherichia coli"), "ESCCOL") -}) - test_that("first isolates work", { # septic_patients contains 1960 out of 2000 first isolates #septic_ptns <- septic_patients