mirror of https://github.com/msberends/AMR.git
more MOs
This commit is contained in:
parent
4a47e59e6f
commit
3165c50d06
5
NEWS
5
NEWS
|
@ -1,10 +1,11 @@
|
|||
## 0.1.2
|
||||
- Added full support for Windows, Linux and macOS; this package now works everywhere :)
|
||||
- New function `guess_bactid` to determine the ID of a microorganism based on genus/species
|
||||
- Added full support for Windows, Linux and macOS
|
||||
- New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
|
||||
- New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
|
||||
- New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
|
||||
- Renamed dataset `ablist` to `antibiotics`
|
||||
- Renamed dataset `bactlist` to `microorganisms`
|
||||
- Added more microorganisms to `bactlist`
|
||||
- Added analysis examples on help page of dataset `septic_patients`
|
||||
- Added support for character vector in join functions
|
||||
- Added warnings when applying a join results in more rows after than before the join
|
||||
|
|
130
R/atc.R
130
R/atc.R
|
@ -237,3 +237,133 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
|
|||
|
||||
abcode
|
||||
}
|
||||
|
||||
#' Find bacteria ID based on genus/species
|
||||
#'
|
||||
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
||||
#' @param x character vector to determine \code{bactid}
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% filter slice pull
|
||||
#' @return Character (vector).
|
||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
#' @examples
|
||||
#' # These examples all return "STAAUR", the ID of S. aureus:
|
||||
#' guess_bactid("stau")
|
||||
#' guess_bactid("STAU")
|
||||
#' guess_bactid("staaur")
|
||||
#' guess_bactid("S. aureus")
|
||||
#' guess_bactid("S aureus")
|
||||
#' guess_bactid("Staphylococcus aureus")
|
||||
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||
guess_bactid <- function(x) {
|
||||
# remove dots and other non-text in case of "E. coli" except spaces
|
||||
x <- gsub("[^a-zA-Z ]+", "", x)
|
||||
# but spaces before and after should be omitted
|
||||
x <- trimws(x, which = "both")
|
||||
x.bak <- x
|
||||
# replace space by regex sign
|
||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||
# add start and stop
|
||||
x_species <- paste(x, 'species')
|
||||
x <- paste0('^', x, '$')
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
if (tolower(x[i]) == '^e.*coli$') {
|
||||
# avoid detection of Entamoeba coli in case of E. coli
|
||||
x[i] <- 'Escherichia coli'
|
||||
}
|
||||
if (tolower(x[i]) == '^h.*influenzae$') {
|
||||
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
||||
x[i] <- 'Haemophilus influenzae'
|
||||
}
|
||||
if (tolower(x[i]) == '^st.*au$'
|
||||
| tolower(x[i]) == '^stau$'
|
||||
| tolower(x[i]) == '^staaur$') {
|
||||
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (tolower(x[i]) == '^p.*aer$') {
|
||||
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
|
||||
# translate known trivial names to genus+species
|
||||
if (toupper(x.bak[i]) == 'MRSA'
|
||||
| toupper(x.bak[i]) == 'VISA'
|
||||
| toupper(x.bak[i]) == 'VRSA') {
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRSE') {
|
||||
x[i] <- 'Staphylococcus epidermidis'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VRE') {
|
||||
x[i] <- 'Enterococcus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRPA') {
|
||||
# multi resistant P. aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'PISP'
|
||||
| toupper(x.bak[i]) == 'PRSP') {
|
||||
# peni resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VISP'
|
||||
| toupper(x.bak[i]) == 'VRSP') {
|
||||
# vanco resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
|
||||
# let's try the ID's first
|
||||
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
||||
|
||||
if (nrow(found) == 0) {
|
||||
# now try exact match
|
||||
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try only genus, with 'species' attached
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# search for GLIMS code
|
||||
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
||||
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
||||
}
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try splitting of characters and then find ID
|
||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
||||
x_split <- x
|
||||
x_length <- nchar(x.bak[i])
|
||||
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
||||
'.* ',
|
||||
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match with text before and after original search string
|
||||
# so "negative rods" will be "GNR"
|
||||
if (x.bak[i] %like% "^Gram") {
|
||||
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
||||
# remove leading and trailing spaces again
|
||||
x.bak[i] <- trimws(x.bak[i], which = "both")
|
||||
}
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
||||
}
|
||||
|
||||
if (nrow(found) != 0) {
|
||||
x[i] <- found %>%
|
||||
slice(1) %>%
|
||||
pull(bactid)
|
||||
} else {
|
||||
x[i] <- ""
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
||||
|
|
21
R/classes.R
21
R/classes.R
|
@ -361,26 +361,19 @@ print.mic <- function(x, ...) {
|
|||
|
||||
#' @exportMethod summary.mic
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% tibble group_by summarise pull
|
||||
#' @importFrom dplyr %>%
|
||||
#' @noRd
|
||||
summary.mic <- function(object, ...) {
|
||||
x <- object
|
||||
n_total <- x %>% length()
|
||||
x <- x[!is.na(x)]
|
||||
n <- x %>% length()
|
||||
return(c("Mode" = 'mic',
|
||||
"<NA>" = n_total - n,
|
||||
"Min." = sort(x)[1] %>% as.character(),
|
||||
"Max." = sort(x)[n] %>% as.character()
|
||||
))
|
||||
cat("Class 'mic': ", n, " isolates\n", sep = '')
|
||||
cat('\n')
|
||||
cat('<NA> ', n_total - n, '\n')
|
||||
cat('\n')
|
||||
tbl <- tibble(x = x, y = 1) %>% group_by(x) %>% summarise(y = sum(y))
|
||||
cnt <- tbl %>% pull(y)
|
||||
names(cnt) <- tbl %>% pull(x)
|
||||
print(cnt)
|
||||
lst <- c('mic',
|
||||
n_total - n,
|
||||
sort(x)[1] %>% as.character(),
|
||||
sort(x)[n] %>% as.character())
|
||||
names(lst) <- c("Mode", "<NA>", "Min.", "Max.")
|
||||
lst
|
||||
}
|
||||
|
||||
#' @exportMethod plot.mic
|
||||
|
|
|
@ -1,17 +1,35 @@
|
|||
#' Import/export from clipboard
|
||||
#'
|
||||
#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
|
||||
#' These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example.
|
||||
#' @rdname clipboard
|
||||
#' @name clipboard
|
||||
#' @inheritParams utils::read.table
|
||||
#' @inheritParams utils::write.table
|
||||
#' @param startrow \emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header.
|
||||
#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
|
||||
#' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
|
||||
#' @param info print info about copying
|
||||
#' @keywords clipboard clipboard_import clipboard_export import export
|
||||
#' @importFrom dplyr %>% pull as_tibble
|
||||
#' @importFrom utils read.delim write.table object.size
|
||||
#' @details For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
|
||||
#' @details For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
|
||||
#'
|
||||
#' Example for copying from Excel:
|
||||
#' \if{html}{
|
||||
#' \out{<div style="text-align: left">}\figure{Excel_copy.png}\out{</div>}
|
||||
#' }
|
||||
#' \if{latex}{
|
||||
#' \out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}}
|
||||
#' }
|
||||
#' \cr
|
||||
#' And pasting in R: \cr \cr
|
||||
#' \code{> data <- clipboard_import()} \cr
|
||||
#' \code{> data} \cr
|
||||
#' \if{html}{
|
||||
#' \out{<div style="text-align: left">}\figure{Excel_paste.png}\out{</div>}
|
||||
#' }
|
||||
#' \if{latex}{
|
||||
#' \out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}}
|
||||
#' }
|
||||
#' @export
|
||||
#' @return data.frame
|
||||
clipboard_import <- function(sep = '\t',
|
||||
|
|
|
@ -610,105 +610,3 @@ key_antibiotics_equal <- function(x,
|
|||
}
|
||||
result
|
||||
}
|
||||
|
||||
#' Find bacteria ID based on genus/species
|
||||
#'
|
||||
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
||||
#' @param x character vector to determine \code{bactid}
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% filter slice pull
|
||||
#' @return Character (vector).
|
||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
#' @examples
|
||||
#' # These examples all return "STAAUR", the ID of S. aureus:
|
||||
#' guess_bactid("stau")
|
||||
#' guess_bactid("STAU")
|
||||
#' guess_bactid("staaur")
|
||||
#' guess_bactid("S. aureus")
|
||||
#' guess_bactid("S aureus")
|
||||
#' guess_bactid("Staphylococcus aureus")
|
||||
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||
guess_bactid <- function(x) {
|
||||
# remove dots and other non-text in case of "E. coli" except spaces
|
||||
x <- gsub("[^a-zA-Z ]+", "", x)
|
||||
x.bak <- x
|
||||
# replace space by regex sign
|
||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||
# add start and stop
|
||||
x_species <- paste(x, 'species')
|
||||
x <- paste0('^', x, '$')
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
if (tolower(x[i]) == '^e.*coli$') {
|
||||
# avoid detection of Entamoeba coli in case of E. coli
|
||||
x[i] <- 'Escherichia coli'
|
||||
}
|
||||
if (tolower(x[i]) == '^h.*influenzae$') {
|
||||
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
||||
x[i] <- 'Haemophilus influenzae'
|
||||
}
|
||||
if (tolower(x[i]) == '^st.*au$'
|
||||
| tolower(x[i]) == '^stau$'
|
||||
| tolower(x[i]) == '^staaur$') {
|
||||
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (tolower(x[i]) == '^p.*aer$') {
|
||||
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
# translate known trivial names to genus+species
|
||||
if (toupper(x.bak[i]) == 'MRSA'
|
||||
| toupper(x.bak[i]) == 'VISA'
|
||||
| toupper(x.bak[i]) == 'VRSA') {
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRSE') {
|
||||
x[i] <- 'Staphylococcus epidermidis'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VRE') {
|
||||
x[i] <- 'Enterococcus'
|
||||
}
|
||||
|
||||
# let's try the ID's first
|
||||
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
||||
|
||||
if (nrow(found) == 0) {
|
||||
# now try exact match
|
||||
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try only genus, with 'species' attached
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# search for GLIMS code
|
||||
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
||||
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
||||
}
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try splitting of characters and then find ID
|
||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
||||
x_length <- nchar(x.bak[i])
|
||||
x[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
||||
'.* ',
|
||||
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x[i]))
|
||||
}
|
||||
|
||||
if (nrow(found) != 0) {
|
||||
x[i] <- found %>%
|
||||
slice(1) %>%
|
||||
pull(bactid)
|
||||
} else {
|
||||
x[i] <- ""
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -28,7 +28,7 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE,
|
|||
|
||||
\item{na}{the string to use for missing values in the data.}
|
||||
|
||||
\item{startrow}{\emph{n}th row to start importing from. For \code{clipboard_import}, when \code{header = TRUE} the import will start on row \code{startrow} \emph{below} the header.}
|
||||
\item{startrow}{\emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.}
|
||||
|
||||
\item{as_vector}{a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.}
|
||||
|
||||
|
@ -41,10 +41,28 @@ clipboard_export(x, sep = "\\t", dec = ".", na = "", header = TRUE,
|
|||
data.frame
|
||||
}
|
||||
\description{
|
||||
These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard, with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
|
||||
These are helper functions around \code{\link{read.table}} and \code{\link{write.table}} to import from and export to clipboard with support for Windows, Linux and macOS. The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. See Details for an example.
|
||||
}
|
||||
\details{
|
||||
For \code{clipboard_export}, the reserved clipboard size for exporting will be set automatically to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
|
||||
For \code{clipboard_export()}, the reserved clipboard size for exporting will be set to 125\% of the object size of \code{x}. This way, it is possible to export data with thousands of rows as the only limit will be your systems RAM.
|
||||
|
||||
Example for copying from Excel:
|
||||
\if{html}{
|
||||
\out{<div style="text-align: left">}\figure{Excel_copy.png}\out{</div>}
|
||||
}
|
||||
\if{latex}{
|
||||
\out{\begin{left}}\figure{Excel_copy.png}\out{\end{left}}
|
||||
}
|
||||
\cr
|
||||
And pasting in R: \cr \cr
|
||||
\code{> data <- clipboard_import()} \cr
|
||||
\code{> data} \cr
|
||||
\if{html}{
|
||||
\out{<div style="text-align: left">}\figure{Excel_paste.png}\out{</div>}
|
||||
}
|
||||
\if{latex}{
|
||||
\out{\begin{left}}\figure{Excel_paste.png}\out{\end{left}}
|
||||
}
|
||||
}
|
||||
\keyword{clipboard}
|
||||
\keyword{clipboard_export}
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 2.2 KiB |
Binary file not shown.
After Width: | Height: | Size: 3.6 KiB |
|
@ -1,5 +1,5 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/first_isolates.R
|
||||
% Please edit documentation in R/atc.R
|
||||
\name{guess_bactid}
|
||||
\alias{guess_bactid}
|
||||
\title{Find bacteria ID based on genus/species}
|
||||
|
|
|
@ -14,3 +14,18 @@ test_that("abname works", {
|
|||
expect_equal(abname("amox", from = 'molis'), "Amoxicillin")
|
||||
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
|
||||
})
|
||||
|
||||
test_that("guess_bactid works", {
|
||||
expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
|
||||
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
||||
expect_equal(guess_bactid("Negative rods"), "GNR")
|
||||
expect_equal(guess_bactid(c("stau",
|
||||
"STAU",
|
||||
"staaur",
|
||||
"S. aureus",
|
||||
"S aureus",
|
||||
"Staphylococcus aureus",
|
||||
"MRSA",
|
||||
"VISA")),
|
||||
rep("STAAUR", 8))
|
||||
})
|
||||
|
|
|
@ -7,11 +7,6 @@ test_that("keyantibiotics work", {
|
|||
expect_false(key_antibiotics_equal("SSS", "SIS", ignore_I = FALSE))
|
||||
})
|
||||
|
||||
test_that("guess_bactid works", {
|
||||
expect_equal(guess_bactid("E. coli"), "ESCCOL")
|
||||
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
||||
})
|
||||
|
||||
test_that("first isolates work", {
|
||||
# septic_patients contains 1960 out of 2000 first isolates
|
||||
#septic_ptns <- septic_patients
|
||||
|
|
Loading…
Reference in New Issue