mirror of
https://github.com/msberends/AMR.git
synced 2024-12-26 08:06:12 +01:00
fix for printing tibbles, improve guess_bactid
This commit is contained in:
parent
efdf5a3dc5
commit
0a5898b17d
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 0.2.0.9002
|
Version: 0.2.0.9003
|
||||||
Date: 2018-05-31
|
Date: 2018-06-08
|
||||||
Title: Antimicrobial Resistance Analysis
|
Title: Antimicrobial Resistance Analysis
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person(
|
person(
|
||||||
|
8
NEWS.md
8
NEWS.md
@ -1,13 +1,13 @@
|
|||||||
# 0.2.0.90xx (development version)
|
# 0.2.0.90xx (development version)
|
||||||
#### New
|
|
||||||
* Vignettes about frequency tables
|
* Vignettes about frequency tables
|
||||||
* Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
|
* Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
|
||||||
|
|
||||||
#### Changed
|
|
||||||
* Renamed `toConsole` parameter of `freq` to `as.data.frame`
|
* Renamed `toConsole` parameter of `freq` to `as.data.frame`
|
||||||
* Small translational improvements to the `septic_patients` dataset
|
* Small translational improvements to the `septic_patients` dataset
|
||||||
* Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
|
* Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
|
||||||
* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above.
|
* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
|
||||||
|
* Support for 1 or 2 columns as input for `guess_bactid`
|
||||||
|
* Fix for printing tibbles where characters would be accidentally transformed to factors
|
||||||
|
|
||||||
# 0.2.0 (latest stable version)
|
# 0.2.0 (latest stable version)
|
||||||
#### New
|
#### New
|
||||||
|
128
R/atc.R
128
R/atc.R
@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
|
|||||||
abcode
|
abcode
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Find bacteria ID based on genus/species
|
|
||||||
#'
|
|
||||||
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
|
||||||
#' @param x character vector to determine \code{bactid}
|
|
||||||
#' @export
|
|
||||||
#' @importFrom dplyr %>% filter slice pull
|
|
||||||
#' @return Character (vector).
|
|
||||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
|
||||||
#' @examples
|
|
||||||
#' # These examples all return "STAAUR", the ID of S. aureus:
|
|
||||||
#' guess_bactid("stau")
|
|
||||||
#' guess_bactid("STAU")
|
|
||||||
#' guess_bactid("staaur")
|
|
||||||
#' guess_bactid("S. aureus")
|
|
||||||
#' guess_bactid("S aureus")
|
|
||||||
#' guess_bactid("Staphylococcus aureus")
|
|
||||||
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
|
||||||
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
|
||||||
guess_bactid <- function(x) {
|
|
||||||
# remove dots and other non-text in case of "E. coli" except spaces
|
|
||||||
x <- gsub("[^a-zA-Z ]+", "", x)
|
|
||||||
# but spaces before and after should be omitted
|
|
||||||
x <- trimws(x, which = "both")
|
|
||||||
x.bak <- x
|
|
||||||
# replace space by regex sign
|
|
||||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
|
||||||
# add start and stop
|
|
||||||
x_species <- paste(x, 'species')
|
|
||||||
x <- paste0('^', x, '$')
|
|
||||||
|
|
||||||
for (i in 1:length(x)) {
|
|
||||||
if (tolower(x[i]) == '^e.*coli$') {
|
|
||||||
# avoid detection of Entamoeba coli in case of E. coli
|
|
||||||
x[i] <- 'Escherichia coli'
|
|
||||||
}
|
|
||||||
if (tolower(x[i]) == '^h.*influenzae$') {
|
|
||||||
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
|
||||||
x[i] <- 'Haemophilus influenzae'
|
|
||||||
}
|
|
||||||
if (tolower(x[i]) == '^st.*au$'
|
|
||||||
| tolower(x[i]) == '^stau$'
|
|
||||||
| tolower(x[i]) == '^staaur$') {
|
|
||||||
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
|
||||||
x[i] <- 'Staphylococcus aureus'
|
|
||||||
}
|
|
||||||
if (tolower(x[i]) == '^p.*aer$') {
|
|
||||||
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
|
||||||
x[i] <- 'Pseudomonas aeruginosa'
|
|
||||||
}
|
|
||||||
|
|
||||||
# translate known trivial names to genus+species
|
|
||||||
if (toupper(x.bak[i]) == 'MRSA'
|
|
||||||
| toupper(x.bak[i]) == 'VISA'
|
|
||||||
| toupper(x.bak[i]) == 'VRSA') {
|
|
||||||
x[i] <- 'Staphylococcus aureus'
|
|
||||||
}
|
|
||||||
if (toupper(x.bak[i]) == 'MRSE') {
|
|
||||||
x[i] <- 'Staphylococcus epidermidis'
|
|
||||||
}
|
|
||||||
if (toupper(x.bak[i]) == 'VRE') {
|
|
||||||
x[i] <- 'Enterococcus'
|
|
||||||
}
|
|
||||||
if (toupper(x.bak[i]) == 'MRPA') {
|
|
||||||
# multi resistant P. aeruginosa
|
|
||||||
x[i] <- 'Pseudomonas aeruginosa'
|
|
||||||
}
|
|
||||||
if (toupper(x.bak[i]) == 'PISP'
|
|
||||||
| toupper(x.bak[i]) == 'PRSP') {
|
|
||||||
# peni resistant S. pneumoniae
|
|
||||||
x[i] <- 'Streptococcus pneumoniae'
|
|
||||||
}
|
|
||||||
if (toupper(x.bak[i]) == 'VISP'
|
|
||||||
| toupper(x.bak[i]) == 'VRSP') {
|
|
||||||
# vanco resistant S. pneumoniae
|
|
||||||
x[i] <- 'Streptococcus pneumoniae'
|
|
||||||
}
|
|
||||||
|
|
||||||
# let's try the ID's first
|
|
||||||
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
|
||||||
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# now try exact match
|
|
||||||
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
|
||||||
}
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# try any match
|
|
||||||
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
|
||||||
}
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# try only genus, with 'species' attached
|
|
||||||
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
|
||||||
}
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# search for GLIMS code
|
|
||||||
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
|
||||||
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# try splitting of characters and then find ID
|
|
||||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
|
||||||
x_split <- x
|
|
||||||
x_length <- nchar(x.bak[i])
|
|
||||||
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
|
||||||
'.* ',
|
|
||||||
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
|
||||||
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
|
||||||
}
|
|
||||||
if (nrow(found) == 0) {
|
|
||||||
# try any match with text before and after original search string
|
|
||||||
# so "negative rods" will be "GNR"
|
|
||||||
if (x.bak[i] %like% "^Gram") {
|
|
||||||
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
|
||||||
# remove leading and trailing spaces again
|
|
||||||
x.bak[i] <- trimws(x.bak[i], which = "both")
|
|
||||||
}
|
|
||||||
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nrow(found) != 0) {
|
|
||||||
x[i] <- found %>%
|
|
||||||
slice(1) %>%
|
|
||||||
pull(bactid)
|
|
||||||
} else {
|
|
||||||
x[i] <- ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
x
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Find ATC code based on antibiotic property
|
#' Find ATC code based on antibiotic property
|
||||||
#'
|
#'
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
#' @param tbl a \code{data.frame} containing isolates.
|
#' @param tbl a \code{data.frame} containing isolates.
|
||||||
#' @param col_date column name of the result date (or date that is was received on the lab)
|
#' @param col_date column name of the result date (or date that is was received on the lab)
|
||||||
#' @param col_patient_id column name of the unique IDs of the patients
|
#' @param col_patient_id column name of the unique IDs of the patients
|
||||||
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)
|
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
|
||||||
#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
|
#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
|
||||||
#' @param col_specimen column name of the specimen type or group
|
#' @param col_specimen column name of the specimen type or group
|
||||||
#' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
|
#' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
|
||||||
@ -291,15 +291,15 @@ first_isolate <- function(tbl,
|
|||||||
return(tbl %>% pull(real_first_isolate))
|
return(tbl %>% pull(real_first_isolate))
|
||||||
}
|
}
|
||||||
|
|
||||||
scope.size <- tbl %>%
|
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||||
filter(
|
suppressWarnings(
|
||||||
suppressWarnings(
|
scope.size <- tbl %>%
|
||||||
|
filter(
|
||||||
row_number() %>% between(row.start,
|
row_number() %>% between(row.start,
|
||||||
row.end)
|
row.end),
|
||||||
),
|
genus != '') %>%
|
||||||
genus != '') %>%
|
nrow()
|
||||||
nrow()
|
)
|
||||||
|
|
||||||
|
|
||||||
# Analysis of first isolate ----
|
# Analysis of first isolate ----
|
||||||
all_first <- tbl %>%
|
all_first <- tbl %>%
|
||||||
@ -328,39 +328,44 @@ first_isolate <- function(tbl,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
type_param <- type
|
type_param <- type
|
||||||
all_first <- all_first %>%
|
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||||
mutate(key_ab_lag = lag(key_ab)) %>%
|
suppressWarnings(
|
||||||
mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
|
all_first <- all_first %>%
|
||||||
y = key_ab_lag,
|
mutate(key_ab_lag = lag(key_ab)) %>%
|
||||||
type = type_param,
|
mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
|
||||||
ignore_I = ignore_I,
|
y = key_ab_lag,
|
||||||
points_threshold = points_threshold,
|
type = type_param,
|
||||||
info = info)) %>%
|
ignore_I = ignore_I,
|
||||||
mutate(
|
points_threshold = points_threshold,
|
||||||
real_first_isolate =
|
info = info)) %>%
|
||||||
if_else(
|
mutate(
|
||||||
suppressWarnings(between(row_number(), row.start, row.end))
|
real_first_isolate =
|
||||||
& genus != ''
|
if_else(
|
||||||
& (other_pat_or_mo
|
between(row_number(), row.start, row.end)
|
||||||
| days_diff >= episode_days
|
& genus != ''
|
||||||
| key_ab_other),
|
& (other_pat_or_mo
|
||||||
TRUE,
|
| days_diff >= episode_days
|
||||||
FALSE))
|
| key_ab_other),
|
||||||
|
TRUE,
|
||||||
|
FALSE))
|
||||||
|
)
|
||||||
if (info == TRUE) {
|
if (info == TRUE) {
|
||||||
cat('\n')
|
cat('\n')
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
all_first <- all_first %>%
|
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||||
mutate(
|
suppressWarnings(
|
||||||
real_first_isolate =
|
all_first <- all_first %>%
|
||||||
if_else(
|
mutate(
|
||||||
suppressWarnings(between(row_number(), row.start, row.end))
|
real_first_isolate =
|
||||||
& genus != ''
|
if_else(
|
||||||
& (other_pat_or_mo
|
between(row_number(), row.start, row.end)
|
||||||
| days_diff >= episode_days),
|
& genus != ''
|
||||||
TRUE,
|
& (other_pat_or_mo
|
||||||
FALSE))
|
| days_diff >= episode_days),
|
||||||
|
TRUE,
|
||||||
|
FALSE))
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# first one as TRUE
|
# first one as TRUE
|
||||||
@ -402,8 +407,7 @@ first_isolate <- function(tbl,
|
|||||||
#' Key antibiotics based on bacteria ID
|
#' Key antibiotics based on bacteria ID
|
||||||
#'
|
#'
|
||||||
#' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
|
#' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
|
||||||
#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
|
#' @inheritParams first_isolate
|
||||||
#' @param info print warnings
|
|
||||||
#' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
|
#' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
|
||||||
#' @export
|
#' @export
|
||||||
#' @importFrom dplyr %>% mutate if_else
|
#' @importFrom dplyr %>% mutate if_else
|
||||||
|
185
R/guess_bactid.R
Normal file
185
R/guess_bactid.R
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
# ==================================================================== #
|
||||||
|
# TITLE #
|
||||||
|
# Antimicrobial Resistance (AMR) Analysis #
|
||||||
|
# #
|
||||||
|
# AUTHORS #
|
||||||
|
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
|
||||||
|
# #
|
||||||
|
# LICENCE #
|
||||||
|
# This program is free software; you can redistribute it and/or modify #
|
||||||
|
# it under the terms of the GNU General Public License version 2.0, #
|
||||||
|
# as published by the Free Software Foundation. #
|
||||||
|
# #
|
||||||
|
# This program is distributed in the hope that it will be useful, #
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
||||||
|
# GNU General Public License for more details. #
|
||||||
|
# ==================================================================== #
|
||||||
|
|
||||||
|
#' Find bacteria ID based on genus/species
|
||||||
|
#'
|
||||||
|
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
|
||||||
|
#' @param x character vector or a dataframe with one or two columns
|
||||||
|
#' @export
|
||||||
|
#' @importFrom dplyr %>% filter pull
|
||||||
|
#' @return Character (vector).
|
||||||
|
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||||
|
#' @examples
|
||||||
|
#' # These examples all return "STAAUR", the ID of S. aureus:
|
||||||
|
#' guess_bactid("stau")
|
||||||
|
#' guess_bactid("STAU")
|
||||||
|
#' guess_bactid("staaur")
|
||||||
|
#' guess_bactid("S. aureus")
|
||||||
|
#' guess_bactid("S aureus")
|
||||||
|
#' guess_bactid("Staphylococcus aureus")
|
||||||
|
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||||
|
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||||
|
#'
|
||||||
|
#' \dontrun{
|
||||||
|
#' df$bactid <- guess_bactid(df$microorganism_name)
|
||||||
|
#'
|
||||||
|
#' # the select function of tidyverse is also supported:
|
||||||
|
#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
|
||||||
|
#'
|
||||||
|
#' # and can even contain 2 columns, which is convenient for genus/species combinations:
|
||||||
|
#' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
|
||||||
|
#' # same result:
|
||||||
|
#' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid())
|
||||||
|
#' }
|
||||||
|
guess_bactid <- function(x) {
|
||||||
|
|
||||||
|
if (NCOL(x) == 2) {
|
||||||
|
# support tidyverse selection like: df %>% select(colA, colB)
|
||||||
|
# paste these columns together
|
||||||
|
x_vector <- vector("character", NROW(x))
|
||||||
|
for (i in 1:NROW(x)) {
|
||||||
|
x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ")
|
||||||
|
}
|
||||||
|
x <- x_vector
|
||||||
|
} else {
|
||||||
|
if (NCOL(x) > 2) {
|
||||||
|
stop('`x` can be 2 columns at most', call. = FALSE)
|
||||||
|
}
|
||||||
|
# support tidyverse selection like: df %>% select(colA)
|
||||||
|
if (!is.vector(x)) {
|
||||||
|
x <- pull(x, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove dots and other non-text in case of "E. coli" except spaces
|
||||||
|
x <- gsub("[^a-zA-Z ]+", "", x)
|
||||||
|
# but spaces before and after should be omitted
|
||||||
|
x <- trimws(x, which = "both")
|
||||||
|
x.bak <- x
|
||||||
|
# replace space by regex sign
|
||||||
|
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||||
|
# add start and stop
|
||||||
|
x_species <- paste(x, 'species')
|
||||||
|
x <- paste0('^', x, '$')
|
||||||
|
|
||||||
|
for (i in 1:length(x)) {
|
||||||
|
if (tolower(x[i]) == '^e.*coli$') {
|
||||||
|
# avoid detection of Entamoeba coli in case of E. coli
|
||||||
|
x[i] <- 'Escherichia coli'
|
||||||
|
}
|
||||||
|
if (tolower(x[i]) == '^h.*influenzae$') {
|
||||||
|
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
||||||
|
x[i] <- 'Haemophilus influenzae'
|
||||||
|
}
|
||||||
|
if (tolower(x[i]) == '^st.*au$'
|
||||||
|
| tolower(x[i]) == '^stau$'
|
||||||
|
| tolower(x[i]) == '^staaur$') {
|
||||||
|
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
||||||
|
x[i] <- 'Staphylococcus aureus'
|
||||||
|
}
|
||||||
|
if (tolower(x[i]) == '^p.*aer$') {
|
||||||
|
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
||||||
|
x[i] <- 'Pseudomonas aeruginosa'
|
||||||
|
}
|
||||||
|
|
||||||
|
# translate known trivial names to genus+species
|
||||||
|
if (!is.na(x.bak[i])) {
|
||||||
|
if (toupper(x.bak[i]) == 'MRSA'
|
||||||
|
| toupper(x.bak[i]) == 'VISA'
|
||||||
|
| toupper(x.bak[i]) == 'VRSA') {
|
||||||
|
x[i] <- 'Staphylococcus aureus'
|
||||||
|
}
|
||||||
|
if (toupper(x.bak[i]) == 'MRSE') {
|
||||||
|
x[i] <- 'Staphylococcus epidermidis'
|
||||||
|
}
|
||||||
|
if (toupper(x.bak[i]) == 'VRE') {
|
||||||
|
x[i] <- 'Enterococcus'
|
||||||
|
}
|
||||||
|
if (toupper(x.bak[i]) == 'MRPA') {
|
||||||
|
# multi resistant P. aeruginosa
|
||||||
|
x[i] <- 'Pseudomonas aeruginosa'
|
||||||
|
}
|
||||||
|
if (toupper(x.bak[i]) == 'PISP'
|
||||||
|
| toupper(x.bak[i]) == 'PRSP') {
|
||||||
|
# peni resistant S. pneumoniae
|
||||||
|
x[i] <- 'Streptococcus pneumoniae'
|
||||||
|
}
|
||||||
|
if (toupper(x.bak[i]) == 'VISP'
|
||||||
|
| toupper(x.bak[i]) == 'VRSP') {
|
||||||
|
# vanco resistant S. pneumoniae
|
||||||
|
x[i] <- 'Streptococcus pneumoniae'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# let's try the ID's first
|
||||||
|
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
||||||
|
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# now try exact match
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# try any match
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# try exact match of only genus, with 'species' attached
|
||||||
|
# (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname == x_species[i])
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# try any match of only genus, with 'species' attached
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# search for GLIMS code
|
||||||
|
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
||||||
|
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# try splitting of characters and then find ID
|
||||||
|
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
||||||
|
x_split <- x
|
||||||
|
x_length <- nchar(x.bak[i])
|
||||||
|
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
||||||
|
'.* ',
|
||||||
|
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
||||||
|
}
|
||||||
|
if (nrow(found) == 0) {
|
||||||
|
# try any match with text before and after original search string
|
||||||
|
# so "negative rods" will be "GNR"
|
||||||
|
if (x.bak[i] %like% "^Gram") {
|
||||||
|
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
||||||
|
# remove leading and trailing spaces again
|
||||||
|
x.bak[i] <- trimws(x.bak[i], which = "both")
|
||||||
|
}
|
||||||
|
if (!is.na(x.bak[i])) {
|
||||||
|
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nrow(found) != 0) {
|
||||||
|
x[i] <- as.character(found[1, 'bactid'])
|
||||||
|
} else {
|
||||||
|
x[i] <- ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x
|
||||||
|
}
|
26
R/print.R
26
R/print.R
@ -192,7 +192,7 @@ prettyprint_df <- function(x,
|
|||||||
if (n + 1 < nrow(x)) {
|
if (n + 1 < nrow(x)) {
|
||||||
# remove in between part, 1 extra for ~~~~ between first and last part
|
# remove in between part, 1 extra for ~~~~ between first and last part
|
||||||
rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
|
rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
|
||||||
x <- as.data.frame(x.bak[rows_list,])
|
x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE)
|
||||||
colnames(x) <- colnames(x.bak)
|
colnames(x) <- colnames(x.bak)
|
||||||
rownames(x) <- rownames(x.bak)[rows_list]
|
rownames(x) <- rownames(x.bak)[rows_list]
|
||||||
# set inbetweener between parts
|
# set inbetweener between parts
|
||||||
@ -204,12 +204,12 @@ prettyprint_df <- function(x,
|
|||||||
# class will be marked up per column
|
# class will be marked up per column
|
||||||
if (NROW(x.bak) > 0) {
|
if (NROW(x.bak) > 0) {
|
||||||
rownames.x <- rownames(x)
|
rownames.x <- rownames(x)
|
||||||
x <- x %>%
|
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||||
filter(
|
suppressWarnings(
|
||||||
suppressWarnings(
|
x <- x %>%
|
||||||
row_number() == 1)
|
filter(row_number() == 1) %>%
|
||||||
) %>%
|
rbind(x, stringsAsFactors = FALSE)
|
||||||
rbind(x, stringsAsFactors = FALSE)
|
)
|
||||||
rownames(x) <- c('*', rownames.x)
|
rownames(x) <- c('*', rownames.x)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,12 +252,12 @@ prettyprint_df <- function(x,
|
|||||||
}
|
}
|
||||||
|
|
||||||
# markup cols
|
# markup cols
|
||||||
|
|
||||||
for (i in 1:ncol(x)) {
|
for (i in 1:ncol(x)) {
|
||||||
if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
|
if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
|
||||||
class(x[, i]) <- class(x.bak[, i])
|
class(x[, i]) <- class(x.bak[, i])
|
||||||
}
|
}
|
||||||
try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
|
try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
|
||||||
|
|
||||||
# replace NAs
|
# replace NAs
|
||||||
if (nchar(na) < 2) {
|
if (nchar(na) < 2) {
|
||||||
# make as long as the text "NA"
|
# make as long as the text "NA"
|
||||||
@ -297,16 +297,16 @@ prettyprint_df <- function(x,
|
|||||||
colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
|
colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
# strip columns that do not fit (3 chars as margin)
|
# strip columns that do not fit (width + 2 extra chars as margin)
|
||||||
width_console <- options()$width
|
width_console <- options()$width
|
||||||
width_until_col <- x %>%
|
width_until_col <- x %>%
|
||||||
select(1:i) %>%
|
select(1:i) %>%
|
||||||
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
|
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
|
||||||
nchar() %>%
|
nchar() %>%
|
||||||
max()
|
max()
|
||||||
width_until_col_before <- x %>%
|
width_until_col_before <- x %>%
|
||||||
select(1:(max(i, 2) - 1)) %>%
|
select(1:(max(i, 2) - 1)) %>%
|
||||||
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
|
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
|
||||||
nchar() %>%
|
nchar() %>%
|
||||||
max()
|
max()
|
||||||
extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
|
extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
|
||||||
@ -340,9 +340,9 @@ prettyprint_df <- function(x,
|
|||||||
if (ncol(x) < ncol(x.bak)) {
|
if (ncol(x) < ncol(x.bak)) {
|
||||||
x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
|
x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
|
||||||
if (ncol(x.notshown) == 1) {
|
if (ncol(x.notshown) == 1) {
|
||||||
cat('...and 1 more column: ')
|
cat('... and 1 more column: ')
|
||||||
} else {
|
} else {
|
||||||
cat('...and', ncol(x.notshown), 'more columns: ')
|
cat('... and', ncol(x.notshown), 'more columns: ')
|
||||||
}
|
}
|
||||||
cat(x.notshown %>%
|
cat(x.notshown %>%
|
||||||
colnames() %>%
|
colnames() %>%
|
||||||
|
Binary file not shown.
@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,
|
|||||||
|
|
||||||
\item{col_patient_id}{column name of the unique IDs of the patients}
|
\item{col_patient_id}{column name of the unique IDs of the patients}
|
||||||
|
|
||||||
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)}
|
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
|
||||||
|
|
||||||
\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}
|
\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/atc.R
|
% Please edit documentation in R/guess_bactid.R
|
||||||
\name{guess_bactid}
|
\name{guess_bactid}
|
||||||
\alias{guess_bactid}
|
\alias{guess_bactid}
|
||||||
\title{Find bacteria ID based on genus/species}
|
\title{Find bacteria ID based on genus/species}
|
||||||
@ -7,13 +7,13 @@
|
|||||||
guess_bactid(x)
|
guess_bactid(x)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{x}{character vector to determine \code{bactid}}
|
\item{x}{character vector or a dataframe with one or two columns}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
Character (vector).
|
Character (vector).
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# These examples all return "STAAUR", the ID of S. aureus:
|
# These examples all return "STAAUR", the ID of S. aureus:
|
||||||
@ -25,6 +25,18 @@ guess_bactid("S aureus")
|
|||||||
guess_bactid("Staphylococcus aureus")
|
guess_bactid("Staphylococcus aureus")
|
||||||
guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||||
guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||||
|
|
||||||
|
\dontrun{
|
||||||
|
df$bactid <- guess_bactid(df$microorganism_name)
|
||||||
|
|
||||||
|
# the select function of tidyverse is also supported:
|
||||||
|
df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid()
|
||||||
|
|
||||||
|
# and can even contain 2 columns, which is convenient for genus/species combinations:
|
||||||
|
df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid()
|
||||||
|
# same result:
|
||||||
|
df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
\code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||||
|
@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl",
|
|||||||
\arguments{
|
\arguments{
|
||||||
\item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}
|
\item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}
|
||||||
|
|
||||||
\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
|
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
|
||||||
|
|
||||||
\item{info}{print warnings}
|
\item{info}{print progress}
|
||||||
|
|
||||||
\item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
|
\item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
context("atc.R")
|
context("atc.R")
|
||||||
|
|
||||||
|
|
||||||
test_that("atc_property works", {
|
test_that("atc_property works", {
|
||||||
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
|
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
|
||||||
expect_equivalent(atc_property("J01CA04", "DDD"), 1)
|
expect_equivalent(atc_property("J01CA04", "DDD"), 1)
|
||||||
@ -15,21 +14,6 @@ test_that("abname works", {
|
|||||||
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
|
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("guess_bactid works", {
|
|
||||||
expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
|
|
||||||
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
|
||||||
expect_equal(guess_bactid("Negative rods"), "GNR")
|
|
||||||
expect_equal(guess_bactid(c("stau",
|
|
||||||
"STAU",
|
|
||||||
"staaur",
|
|
||||||
"S. aureus",
|
|
||||||
"S aureus",
|
|
||||||
"Staphylococcus aureus",
|
|
||||||
"MRSA",
|
|
||||||
"VISA")),
|
|
||||||
rep("STAAUR", 8))
|
|
||||||
})
|
|
||||||
|
|
||||||
test_that("guess_atc works", {
|
test_that("guess_atc works", {
|
||||||
expect_equal(guess_atc(c("J01FA01",
|
expect_equal(guess_atc(c("J01FA01",
|
||||||
"Erythromycin",
|
"Erythromycin",
|
||||||
|
40
tests/testthat/test-guess_bactid.R
Normal file
40
tests/testthat/test-guess_bactid.R
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
context("guess_bactid.R")
|
||||||
|
|
||||||
|
test_that("guess_bactid works", {
|
||||||
|
expect_identical(
|
||||||
|
guess_bactid(c("E. coli", "H. influenzae")),
|
||||||
|
c("ESCCOL", "HAEINF"))
|
||||||
|
|
||||||
|
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
||||||
|
|
||||||
|
expect_equal(guess_bactid("Negative rods"), "GNR")
|
||||||
|
|
||||||
|
expect_identical(
|
||||||
|
guess_bactid(c("stau",
|
||||||
|
"STAU",
|
||||||
|
"staaur",
|
||||||
|
"S. aureus",
|
||||||
|
"S aureus",
|
||||||
|
"Staphylococcus aureus",
|
||||||
|
"MRSA",
|
||||||
|
"VISA")),
|
||||||
|
rep("STAAUR", 8))
|
||||||
|
|
||||||
|
# select with one column
|
||||||
|
expect_identical(
|
||||||
|
septic_patients[1:10,] %>%
|
||||||
|
left_join_microorganisms() %>%
|
||||||
|
select(genus) %>%
|
||||||
|
guess_bactid(),
|
||||||
|
c("STC", "STC", "NEI", "STA", "STA",
|
||||||
|
"NEI", "ENT", "ENT", "ESC", "KLE"))
|
||||||
|
|
||||||
|
# select with two columns
|
||||||
|
expect_identical(
|
||||||
|
septic_patients[1:10,] %>%
|
||||||
|
pull(bactid),
|
||||||
|
septic_patients[1:10,] %>%
|
||||||
|
left_join_microorganisms() %>%
|
||||||
|
select(genus, species) %>%
|
||||||
|
guess_bactid())
|
||||||
|
})
|
Loading…
Reference in New Issue
Block a user