more MOs

2026-06-30 08:16:20 +02:00 · 2018-04-03 16:07:32 +02:00
parent 4a47e59e6f
commit 3165c50d06
13 changed files with 226 additions and 158 deletions
--- a/5
+++ b/5
@@ -1,10 +1,11 @@
 ## 0.1.2
- Added full support for Windows, Linux and macOS; this package now works everywhere :)
+- Added full support for Windows, Linux and macOS
- New function `guess_bactid` to determine the ID of a microorganism based on genus/species
+- New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
 - New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
 - New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
 - Renamed dataset `ablist` to `antibiotics`
 - Renamed dataset `bactlist` to `microorganisms`
 - Added more microorganisms to `bactlist`
 - Added analysis examples on help page of dataset `septic_patients`
 - Added support for character vector in join functions
 - Added warnings when applying a join results in more rows after than before the join
--- a/R/atc.R
+++ b/R/atc.R
@@ -237,3 +237,133 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
  abcode
 }
 #' Find bacteria ID based on genus/species
 #'
 #' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
 #' @param x character vector to determine \code{bactid}
 #' @export
 #' @importFrom dplyr %>% filter slice pull
 #' @return Character (vector).
 #' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
 #' @examples
 #' # These examples all return "STAAUR", the ID of S. aureus:
 #' guess_bactid("stau")
 #' guess_bactid("STAU")
 #' guess_bactid("staaur")
 #' guess_bactid("S. aureus")
 #' guess_bactid("S aureus")
 #' guess_bactid("Staphylococcus aureus")
 #' guess_bactid("MRSA") # Methicillin-resistant S. aureus
 #' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
 guess_bactid <- function(x) {
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z ]+", "", x)
  # but spaces before and after should be omitted
  x <- trimws(x, which = "both")
  x.bak <- x
  # replace space by regex sign
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # add start and stop
  x_species <- paste(x, 'species')
  x <- paste0('^', x, '$')
  for (i in 1:length(x)) {
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
    }
    if (tolower(x[i]) == '^h.*influenzae$') {
      # avoid detection of Haematobacter influenzae in case of H. influenzae
      x[i] <- 'Haemophilus influenzae'
    }
    if (tolower(x[i]) == '^st.*au$'
        | tolower(x[i]) == '^stau$'
        | tolower(x[i]) == '^staaur$') {
      # avoid detection of Staphylococcus auricularis in case of S. aureus
      x[i] <- 'Staphylococcus aureus'
    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    # translate known trivial names to genus+species
    if (toupper(x.bak[i]) == 'MRSA'
        | toupper(x.bak[i]) == 'VISA'
        | toupper(x.bak[i]) == 'VRSA') {
      x[i] <- 'Staphylococcus aureus'
    }
    if (toupper(x.bak[i]) == 'MRSE') {
      x[i] <- 'Staphylococcus epidermidis'
    }
    if (toupper(x.bak[i]) == 'VRE') {
      x[i] <- 'Enterococcus'
    }
    if (toupper(x.bak[i]) == 'MRPA') {
      # multi resistant P. aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    if (toupper(x.bak[i]) == 'PISP'
        | toupper(x.bak[i]) == 'PRSP') {
      # peni resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }
    if (toupper(x.bak[i]) == 'VISP'
        | toupper(x.bak[i]) == 'VRSP') {
      # vanco resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }
    # let's try the ID's first
    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
    if (nrow(found) == 0) {
      # now try exact match
      found <- AMR::microorganisms %>% filter(fullname == x[i])
    }
    if (nrow(found) == 0) {
      # try any match
      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
    }
    if (nrow(found) == 0) {
      # try only genus, with 'species' attached
      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
    }
    if (nrow(found) == 0) {
      # search for GLIMS code
      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
      }
    }
    if (nrow(found) == 0) {
      # try splitting of characters and then find ID
      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
      x_split <- x
      x_length <- nchar(x.bak[i])
      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
                     '.* ',
                     x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
    }
    if (nrow(found) == 0) {
      # try any match with text before and after original search string
      # so "negative rods" will be "GNR"
      if (x.bak[i] %like% "^Gram") {
        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
        # remove leading and trailing spaces again
        x.bak[i] <- trimws(x.bak[i], which = "both")
      }
      found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
    }
    if (nrow(found) != 0) {
      x[i] <- found %>%
        slice(1) %>%
        pull(bactid)
    } else {
      x[i] <- ""
    }
  }
  x
 }
--- a/R/classes.R
+++ b/R/classes.R
@@ -361,26 +361,19 @@ print.mic <- function(x, ...) {
 #' @exportMethod summary.mic
 #' @export
-#' @importFrom dplyr %>% tibble group_by summarise pull
+#' @importFrom dplyr %>%
 #' @noRd
 summary.mic <- function(object, ...) {
  x <- object
  n_total <- x %>% length()
  x <- x[!is.na(x)]
  n <- x %>% length()
-  return(c("Mode" = 'mic',
+  lst <- c('mic',
-           "<NA>" = n_total - n,
+           n_total - n,
-           "Min." = sort(x)[1] %>% as.character(),
+           sort(x)[1] %>% as.character(),
-           "Max." = sort(x)[n] %>% as.character()
+           sort(x)[n] %>% as.character())
-  ))
+  names(lst) <- c("Mode", "<NA>", "Min.", "Max.")
-  cat("Class 'mic': ", n, " isolates\n", sep = '')
+  lst
  cat('\n')
  cat('<NA> ', n_total - n, '\n')
  cat('\n')
  tbl <- tibble(x = x, y = 1) %>% group_by(x) %>% summarise(y = sum(y))
  cnt <- tbl %>% pull(y)
  names(cnt) <- tbl %>% pull(x)
  print(cnt)
 }
 #' @exportMethod plot.mic
--- a/R/clipboard.R
+++ b/R/clipboard.R
@@ -1,17 +1,35 @@
 #' Import/export from clipboard
 #'
-#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
+#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example.
 #' @rdname clipboard
 #' @name clipboard
 #' @inheritParams utils::read.table
 #' @inheritParams utils::write.table
-#' @param startrow \emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header.
+#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
 #' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
 #' @param info print info about copying
 #' @keywords clipboard clipboard_import clipboard_export import export
 #' @importFrom dplyr %>% pull as_tibble
 #' @importFrom utils read.delim write.table object.size
-#' @details For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
+#' @details For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
 #'
 #'   Example for copying from Excel:
 #'   \if{html}{
 #'     \out{<div style="text-align: left">}\figure{Excel_copy.png}\out{</div>}
 #'   }
 #'   \if{latex}{
 #'     \out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}}
 #'   }
 #'   \cr
 #'   And pasting in R: \cr \cr
 #'   \code{> data <- clipboard_import()} \cr
 #'   \code{> data} \cr
 #'   \if{html}{
 #'     \out{<div style="text-align: left">}\figure{Excel_paste.png}\out{</div>}
 #'   }
 #'   \if{latex}{
 #'     \out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}}
 #'   }
 #' @export
 #' @return data.frame
 clipboard_import <- function(sep = '\t',
--- a/R/first_isolates.R
+++ b/R/first_isolates.R
@@ -610,105 +610,3 @@ key_antibiotics_equal <- function(x,
  }
  result
 }
 #' Find bacteria ID based on genus/species
 #'
 #' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
 #' @param x character vector to determine \code{bactid}
 #' @export
 #' @importFrom dplyr %>% filter slice pull
 #' @return Character (vector).
 #' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
 #' @examples
 #' # These examples all return "STAAUR", the ID of S. aureus:
 #' guess_bactid("stau")
 #' guess_bactid("STAU")
 #' guess_bactid("staaur")
 #' guess_bactid("S. aureus")
 #' guess_bactid("S aureus")
 #' guess_bactid("Staphylococcus aureus")
 #' guess_bactid("MRSA") # Methicillin-resistant S. aureus
 #' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
 guess_bactid <- function(x) {
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z ]+", "", x)
  x.bak <- x
  # replace space by regex sign
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # add start and stop
  x_species <- paste(x, 'species')
  x <- paste0('^', x, '$')
  for (i in 1:length(x)) {
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
    }
    if (tolower(x[i]) == '^h.*influenzae$') {
      # avoid detection of Haematobacter influenzae in case of H. influenzae
      x[i] <- 'Haemophilus influenzae'
    }
    if (tolower(x[i]) == '^st.*au$'
        | tolower(x[i]) == '^stau$'
        | tolower(x[i]) == '^staaur$') {
      # avoid detection of Staphylococcus auricularis in case of S. aureus
      x[i] <- 'Staphylococcus aureus'
    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    # translate known trivial names to genus+species
    if (toupper(x.bak[i]) == 'MRSA'
        | toupper(x.bak[i]) == 'VISA'
        | toupper(x.bak[i]) == 'VRSA') {
      x[i] <- 'Staphylococcus aureus'
    }
    if (toupper(x.bak[i]) == 'MRSE') {
      x[i] <- 'Staphylococcus epidermidis'
    }
    if (toupper(x.bak[i]) == 'VRE') {
      x[i] <- 'Enterococcus'
    }
    # let's try the ID's first
    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
    if (nrow(found) == 0) {
      # now try exact match
      found <- AMR::microorganisms %>% filter(fullname == x[i])
    }
    if (nrow(found) == 0) {
      # try any match
      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
    }
    if (nrow(found) == 0) {
      # try only genus, with 'species' attached
      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
    }
    if (nrow(found) == 0) {
      # search for GLIMS code
      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
      }
    }
    if (nrow(found) == 0) {
      # try splitting of characters and then find ID
      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
      x_length <- nchar(x.bak[i])
      x[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
                     '.* ',
                     x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x[i]))
    }
    if (nrow(found) != 0) {
      x[i] <- found %>%
        slice(1) %>%
        pull(bactid)
    } else {
      x[i] <- ""
    }
  }
  x
 }
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/man/clipboard.Rd
+++ b/man/clipboard.Rd
@@ -28,7 +28,7 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE,
 \item{na}{the string to use for missing values in the data.}
-\item{startrow}{\emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header.}
+\item{startrow}{\emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.}
 \item{as_vector}{a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.}
@@ -41,10 +41,28 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE,
 data.frame
 }
 \description{
-These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
+These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example.
 }
 \details{
-For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
+For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
  Example for copying from Excel:
  \if{html}{
    \out{<div style="text-align: left">}\figure{Excel_copy.png}\out{</div>}
  }
  \if{latex}{
    \out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}}
  }
  \cr
  And pasting in R: \cr \cr
  \code{> data <- clipboard_import()} \cr
  \code{> data} \cr
  \if{html}{
    \out{<div style="text-align: left">}\figure{Excel_paste.png}\out{</div>}
  }
  \if{latex}{
    \out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}}
  }
 }
 \keyword{clipboard}
 \keyword{clipboard_export}
--- a/man/figures/Excel_copy.png
+++ b/man/figures/Excel_copy.png
--- a/man/figures/Excel_paste.png
+++ b/man/figures/Excel_paste.png
--- a/man/guess_bactid.Rd
+++ b/man/guess_bactid.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/first_isolates.R
+% Please edit documentation in R/atc.R
 \name{guess_bactid}
 \alias{guess_bactid}
 \title{Find bacteria ID based on genus/species}
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@@ -14,3 +14,18 @@ test_that("abname works", {
  expect_equal(abname("amox", from = 'molis'), "Amoxicillin")
  expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
 })
 test_that("guess_bactid works", {
  expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
  expect_equal(guess_bactid("Negative rods"), "GNR")
  expect_equal(guess_bactid(c("stau",
                              "STAU",
                              "staaur",
                              "S. aureus",
                              "S aureus",
                              "Staphylococcus aureus",
                              "MRSA",
                              "VISA")),
               rep("STAAUR", 8))
 })
--- a/tests/testthat/test-first_isolates.R
+++ b/tests/testthat/test-first_isolates.R
@@ -7,11 +7,6 @@ test_that("keyantibiotics work", {
  expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE))
 })
 test_that("guess_bactid works", {
  expect_equal(guess_bactid("E. coli"), "ESCCOL")
  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
 })
 test_that("first isolates work", {
  # septic_patients contains 1960 out of 2000 first isolates
  #septic_ptns <- septic_patients