mirror of
https://github.com/msberends/AMR.git
synced 2025-01-24 23:44:35 +01:00
fix for printing tibbles, improve guess_bactid
This commit is contained in:
parent
efdf5a3dc5
commit
0a5898b17d
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 0.2.0.9002
|
||||
Date: 2018-05-31
|
||||
Version: 0.2.0.9003
|
||||
Date: 2018-06-08
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
person(
|
||||
|
8
NEWS.md
8
NEWS.md
@ -1,13 +1,13 @@
|
||||
# 0.2.0.90xx (development version)
|
||||
#### New
|
||||
|
||||
* Vignettes about frequency tables
|
||||
* Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
|
||||
|
||||
#### Changed
|
||||
* Renamed `toConsole` parameter of `freq` to `as.data.frame`
|
||||
* Small translational improvements to the `septic_patients` dataset
|
||||
* Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
|
||||
* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above.
|
||||
* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
|
||||
* Support for 1 or 2 columns as input for `guess_bactid`
|
||||
* Fix for printing tibbles where characters would be accidentally transformed to factors
|
||||
|
||||
# 0.2.0 (latest stable version)
|
||||
#### New
|
||||
|
128
R/atc.R
128
R/atc.R
@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
|
||||
abcode
|
||||
}
|
||||
|
||||
#' Find bacteria ID based on genus/species
|
||||
#'
|
||||
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
||||
#' @param x character vector to determine \code{bactid}
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% filter slice pull
|
||||
#' @return Character (vector).
|
||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
#' @examples
|
||||
#' # These examples all return "STAAUR", the ID of S. aureus:
|
||||
#' guess_bactid("stau")
|
||||
#' guess_bactid("STAU")
|
||||
#' guess_bactid("staaur")
|
||||
#' guess_bactid("S. aureus")
|
||||
#' guess_bactid("S aureus")
|
||||
#' guess_bactid("Staphylococcus aureus")
|
||||
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||
guess_bactid <- function(x) {
|
||||
# remove dots and other non-text in case of "E. coli" except spaces
|
||||
x <- gsub("[^a-zA-Z ]+", "", x)
|
||||
# but spaces before and after should be omitted
|
||||
x <- trimws(x, which = "both")
|
||||
x.bak <- x
|
||||
# replace space by regex sign
|
||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||
# add start and stop
|
||||
x_species <- paste(x, 'species')
|
||||
x <- paste0('^', x, '$')
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
if (tolower(x[i]) == '^e.*coli$') {
|
||||
# avoid detection of Entamoeba coli in case of E. coli
|
||||
x[i] <- 'Escherichia coli'
|
||||
}
|
||||
if (tolower(x[i]) == '^h.*influenzae$') {
|
||||
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
||||
x[i] <- 'Haemophilus influenzae'
|
||||
}
|
||||
if (tolower(x[i]) == '^st.*au$'
|
||||
| tolower(x[i]) == '^stau$'
|
||||
| tolower(x[i]) == '^staaur$') {
|
||||
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (tolower(x[i]) == '^p.*aer$') {
|
||||
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
|
||||
# translate known trivial names to genus+species
|
||||
if (toupper(x.bak[i]) == 'MRSA'
|
||||
| toupper(x.bak[i]) == 'VISA'
|
||||
| toupper(x.bak[i]) == 'VRSA') {
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRSE') {
|
||||
x[i] <- 'Staphylococcus epidermidis'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VRE') {
|
||||
x[i] <- 'Enterococcus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRPA') {
|
||||
# multi resistant P. aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'PISP'
|
||||
| toupper(x.bak[i]) == 'PRSP') {
|
||||
# peni resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VISP'
|
||||
| toupper(x.bak[i]) == 'VRSP') {
|
||||
# vanco resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
|
||||
# let's try the ID's first
|
||||
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
||||
|
||||
if (nrow(found) == 0) {
|
||||
# now try exact match
|
||||
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try only genus, with 'species' attached
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# search for GLIMS code
|
||||
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
||||
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
||||
}
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try splitting of characters and then find ID
|
||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
||||
x_split <- x
|
||||
x_length <- nchar(x.bak[i])
|
||||
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
||||
'.* ',
|
||||
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match with text before and after original search string
|
||||
# so "negative rods" will be "GNR"
|
||||
if (x.bak[i] %like% "^Gram") {
|
||||
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
||||
# remove leading and trailing spaces again
|
||||
x.bak[i] <- trimws(x.bak[i], which = "both")
|
||||
}
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
||||
}
|
||||
|
||||
if (nrow(found) != 0) {
|
||||
x[i] <- found %>%
|
||||
slice(1) %>%
|
||||
pull(bactid)
|
||||
} else {
|
||||
x[i] <- ""
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
#' Find ATC code based on antibiotic property
|
||||
#'
|
||||
|
@ -22,7 +22,7 @@
|
||||
#' @param tbl a \code{data.frame} containing isolates.
|
||||
#' @param col_date column name of the result date (or date that is was received on the lab)
|
||||
#' @param col_patient_id column name of the unique IDs of the patients
|
||||
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)
|
||||
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
|
||||
#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
|
||||
#' @param col_specimen column name of the specimen type or group
|
||||
#' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
|
||||
@ -291,15 +291,15 @@ first_isolate <- function(tbl,
|
||||
return(tbl %>% pull(real_first_isolate))
|
||||
}
|
||||
|
||||
scope.size <- tbl %>%
|
||||
filter(
|
||||
suppressWarnings(
|
||||
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||
suppressWarnings(
|
||||
scope.size <- tbl %>%
|
||||
filter(
|
||||
row_number() %>% between(row.start,
|
||||
row.end)
|
||||
),
|
||||
genus != '') %>%
|
||||
nrow()
|
||||
|
||||
row.end),
|
||||
genus != '') %>%
|
||||
nrow()
|
||||
)
|
||||
|
||||
# Analysis of first isolate ----
|
||||
all_first <- tbl %>%
|
||||
@ -328,39 +328,44 @@ first_isolate <- function(tbl,
|
||||
}
|
||||
}
|
||||
type_param <- type
|
||||
all_first <- all_first %>%
|
||||
mutate(key_ab_lag = lag(key_ab)) %>%
|
||||
mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
|
||||
y = key_ab_lag,
|
||||
type = type_param,
|
||||
ignore_I = ignore_I,
|
||||
points_threshold = points_threshold,
|
||||
info = info)) %>%
|
||||
mutate(
|
||||
real_first_isolate =
|
||||
if_else(
|
||||
suppressWarnings(between(row_number(), row.start, row.end))
|
||||
& genus != ''
|
||||
& (other_pat_or_mo
|
||||
| days_diff >= episode_days
|
||||
| key_ab_other),
|
||||
TRUE,
|
||||
FALSE))
|
||||
|
||||
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||
suppressWarnings(
|
||||
all_first <- all_first %>%
|
||||
mutate(key_ab_lag = lag(key_ab)) %>%
|
||||
mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
|
||||
y = key_ab_lag,
|
||||
type = type_param,
|
||||
ignore_I = ignore_I,
|
||||
points_threshold = points_threshold,
|
||||
info = info)) %>%
|
||||
mutate(
|
||||
real_first_isolate =
|
||||
if_else(
|
||||
between(row_number(), row.start, row.end)
|
||||
& genus != ''
|
||||
& (other_pat_or_mo
|
||||
| days_diff >= episode_days
|
||||
| key_ab_other),
|
||||
TRUE,
|
||||
FALSE))
|
||||
)
|
||||
if (info == TRUE) {
|
||||
cat('\n')
|
||||
}
|
||||
} else {
|
||||
all_first <- all_first %>%
|
||||
mutate(
|
||||
real_first_isolate =
|
||||
if_else(
|
||||
suppressWarnings(between(row_number(), row.start, row.end))
|
||||
& genus != ''
|
||||
& (other_pat_or_mo
|
||||
| days_diff >= episode_days),
|
||||
TRUE,
|
||||
FALSE))
|
||||
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||
suppressWarnings(
|
||||
all_first <- all_first %>%
|
||||
mutate(
|
||||
real_first_isolate =
|
||||
if_else(
|
||||
between(row_number(), row.start, row.end)
|
||||
& genus != ''
|
||||
& (other_pat_or_mo
|
||||
| days_diff >= episode_days),
|
||||
TRUE,
|
||||
FALSE))
|
||||
)
|
||||
}
|
||||
|
||||
# first one as TRUE
|
||||
@ -402,8 +407,7 @@ first_isolate <- function(tbl,
|
||||
#' Key antibiotics based on bacteria ID
|
||||
#'
|
||||
#' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
|
||||
#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
|
||||
#' @param info print warnings
|
||||
#' @inheritParams first_isolate
|
||||
#' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% mutate if_else
|
||||
|
185
R/guess_bactid.R
Normal file
185
R/guess_bactid.R
Normal file
@ -0,0 +1,185 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis #
|
||||
# #
|
||||
# AUTHORS #
|
||||
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
|
||||
# #
|
||||
# LICENCE #
|
||||
# This program is free software; you can redistribute it and/or modify #
|
||||
# it under the terms of the GNU General Public License version 2.0, #
|
||||
# as published by the Free Software Foundation. #
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
||||
# GNU General Public License for more details. #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Find bacteria ID based on genus/species
|
||||
#'
|
||||
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
|
||||
#' @param x character vector or a dataframe with one or two columns
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% filter pull
|
||||
#' @return Character (vector).
|
||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
#' @examples
|
||||
#' # These examples all return "STAAUR", the ID of S. aureus:
|
||||
#' guess_bactid("stau")
|
||||
#' guess_bactid("STAU")
|
||||
#' guess_bactid("staaur")
|
||||
#' guess_bactid("S. aureus")
|
||||
#' guess_bactid("S aureus")
|
||||
#' guess_bactid("Staphylococcus aureus")
|
||||
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||
#'
|
||||
#' \dontrun{
|
||||
#' df$bactid <- guess_bactid(df$microorganism_name)
|
||||
#'
|
||||
#' # the select function of tidyverse is also supported:
|
||||
#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
|
||||
#'
|
||||
#' # and can even contain 2 columns, which is convenient for genus/species combinations:
|
||||
#' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
|
||||
#' # same result:
|
||||
#' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid())
|
||||
#' }
|
||||
guess_bactid <- function(x) {
|
||||
|
||||
if (NCOL(x) == 2) {
|
||||
# support tidyverse selection like: df %>% select(colA, colB)
|
||||
# paste these columns together
|
||||
x_vector <- vector("character", NROW(x))
|
||||
for (i in 1:NROW(x)) {
|
||||
x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ")
|
||||
}
|
||||
x <- x_vector
|
||||
} else {
|
||||
if (NCOL(x) > 2) {
|
||||
stop('`x` can be 2 columns at most', call. = FALSE)
|
||||
}
|
||||
# support tidyverse selection like: df %>% select(colA)
|
||||
if (!is.vector(x)) {
|
||||
x <- pull(x, 1)
|
||||
}
|
||||
}
|
||||
|
||||
# remove dots and other non-text in case of "E. coli" except spaces
|
||||
x <- gsub("[^a-zA-Z ]+", "", x)
|
||||
# but spaces before and after should be omitted
|
||||
x <- trimws(x, which = "both")
|
||||
x.bak <- x
|
||||
# replace space by regex sign
|
||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||
# add start and stop
|
||||
x_species <- paste(x, 'species')
|
||||
x <- paste0('^', x, '$')
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
if (tolower(x[i]) == '^e.*coli$') {
|
||||
# avoid detection of Entamoeba coli in case of E. coli
|
||||
x[i] <- 'Escherichia coli'
|
||||
}
|
||||
if (tolower(x[i]) == '^h.*influenzae$') {
|
||||
# avoid detection of Haematobacter influenzae in case of H. influenzae
|
||||
x[i] <- 'Haemophilus influenzae'
|
||||
}
|
||||
if (tolower(x[i]) == '^st.*au$'
|
||||
| tolower(x[i]) == '^stau$'
|
||||
| tolower(x[i]) == '^staaur$') {
|
||||
# avoid detection of Staphylococcus auricularis in case of S. aureus
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (tolower(x[i]) == '^p.*aer$') {
|
||||
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
|
||||
# translate known trivial names to genus+species
|
||||
if (!is.na(x.bak[i])) {
|
||||
if (toupper(x.bak[i]) == 'MRSA'
|
||||
| toupper(x.bak[i]) == 'VISA'
|
||||
| toupper(x.bak[i]) == 'VRSA') {
|
||||
x[i] <- 'Staphylococcus aureus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRSE') {
|
||||
x[i] <- 'Staphylococcus epidermidis'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VRE') {
|
||||
x[i] <- 'Enterococcus'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'MRPA') {
|
||||
# multi resistant P. aeruginosa
|
||||
x[i] <- 'Pseudomonas aeruginosa'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'PISP'
|
||||
| toupper(x.bak[i]) == 'PRSP') {
|
||||
# peni resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
if (toupper(x.bak[i]) == 'VISP'
|
||||
| toupper(x.bak[i]) == 'VRSP') {
|
||||
# vanco resistant S. pneumoniae
|
||||
x[i] <- 'Streptococcus pneumoniae'
|
||||
}
|
||||
}
|
||||
|
||||
# let's try the ID's first
|
||||
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
|
||||
|
||||
if (nrow(found) == 0) {
|
||||
# now try exact match
|
||||
found <- AMR::microorganisms %>% filter(fullname == x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try exact match of only genus, with 'species' attached
|
||||
# (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
|
||||
found <- AMR::microorganisms %>% filter(fullname == x_species[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match of only genus, with 'species' attached
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# search for GLIMS code
|
||||
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
|
||||
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
|
||||
}
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try splitting of characters and then find ID
|
||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
|
||||
x_split <- x
|
||||
x_length <- nchar(x.bak[i])
|
||||
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
|
||||
'.* ',
|
||||
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
|
||||
}
|
||||
if (nrow(found) == 0) {
|
||||
# try any match with text before and after original search string
|
||||
# so "negative rods" will be "GNR"
|
||||
if (x.bak[i] %like% "^Gram") {
|
||||
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
|
||||
# remove leading and trailing spaces again
|
||||
x.bak[i] <- trimws(x.bak[i], which = "both")
|
||||
}
|
||||
if (!is.na(x.bak[i])) {
|
||||
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
|
||||
}
|
||||
}
|
||||
|
||||
if (nrow(found) != 0) {
|
||||
x[i] <- as.character(found[1, 'bactid'])
|
||||
} else {
|
||||
x[i] <- ""
|
||||
}
|
||||
}
|
||||
x
|
||||
}
|
26
R/print.R
26
R/print.R
@ -192,7 +192,7 @@ prettyprint_df <- function(x,
|
||||
if (n + 1 < nrow(x)) {
|
||||
# remove in between part, 1 extra for ~~~~ between first and last part
|
||||
rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
|
||||
x <- as.data.frame(x.bak[rows_list,])
|
||||
x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE)
|
||||
colnames(x) <- colnames(x.bak)
|
||||
rownames(x) <- rownames(x.bak)[rows_list]
|
||||
# set inbetweener between parts
|
||||
@ -204,12 +204,12 @@ prettyprint_df <- function(x,
|
||||
# class will be marked up per column
|
||||
if (NROW(x.bak) > 0) {
|
||||
rownames.x <- rownames(x)
|
||||
x <- x %>%
|
||||
filter(
|
||||
suppressWarnings(
|
||||
row_number() == 1)
|
||||
) %>%
|
||||
rbind(x, stringsAsFactors = FALSE)
|
||||
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
|
||||
suppressWarnings(
|
||||
x <- x %>%
|
||||
filter(row_number() == 1) %>%
|
||||
rbind(x, stringsAsFactors = FALSE)
|
||||
)
|
||||
rownames(x) <- c('*', rownames.x)
|
||||
}
|
||||
|
||||
@ -252,12 +252,12 @@ prettyprint_df <- function(x,
|
||||
}
|
||||
|
||||
# markup cols
|
||||
|
||||
for (i in 1:ncol(x)) {
|
||||
if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
|
||||
class(x[, i]) <- class(x.bak[, i])
|
||||
}
|
||||
try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
|
||||
|
||||
# replace NAs
|
||||
if (nchar(na) < 2) {
|
||||
# make as long as the text "NA"
|
||||
@ -297,16 +297,16 @@ prettyprint_df <- function(x,
|
||||
colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
|
||||
}
|
||||
|
||||
# strip columns that do not fit (3 chars as margin)
|
||||
# strip columns that do not fit (width + 2 extra chars as margin)
|
||||
width_console <- options()$width
|
||||
width_until_col <- x %>%
|
||||
select(1:i) %>%
|
||||
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
|
||||
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
|
||||
nchar() %>%
|
||||
max()
|
||||
width_until_col_before <- x %>%
|
||||
select(1:(max(i, 2) - 1)) %>%
|
||||
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
|
||||
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
|
||||
nchar() %>%
|
||||
max()
|
||||
extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
|
||||
@ -340,9 +340,9 @@ prettyprint_df <- function(x,
|
||||
if (ncol(x) < ncol(x.bak)) {
|
||||
x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
|
||||
if (ncol(x.notshown) == 1) {
|
||||
cat('...and 1 more column: ')
|
||||
cat('... and 1 more column: ')
|
||||
} else {
|
||||
cat('...and', ncol(x.notshown), 'more columns: ')
|
||||
cat('... and', ncol(x.notshown), 'more columns: ')
|
||||
}
|
||||
cat(x.notshown %>%
|
||||
colnames() %>%
|
||||
|
Binary file not shown.
@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,
|
||||
|
||||
\item{col_patient_id}{column name of the unique IDs of the patients}
|
||||
|
||||
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)}
|
||||
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
|
||||
|
||||
\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/atc.R
|
||||
% Please edit documentation in R/guess_bactid.R
|
||||
\name{guess_bactid}
|
||||
\alias{guess_bactid}
|
||||
\title{Find bacteria ID based on genus/species}
|
||||
@ -7,13 +7,13 @@
|
||||
guess_bactid(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{character vector to determine \code{bactid}}
|
||||
\item{x}{character vector or a dataframe with one or two columns}
|
||||
}
|
||||
\value{
|
||||
Character (vector).
|
||||
}
|
||||
\description{
|
||||
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
|
||||
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
|
||||
}
|
||||
\examples{
|
||||
# These examples all return "STAAUR", the ID of S. aureus:
|
||||
@ -25,6 +25,18 @@ guess_bactid("S aureus")
|
||||
guess_bactid("Staphylococcus aureus")
|
||||
guess_bactid("MRSA") # Methicillin-resistant S. aureus
|
||||
guess_bactid("VISA") # Vancomycin Intermediate S. aureus
|
||||
|
||||
\dontrun{
|
||||
df$bactid <- guess_bactid(df$microorganism_name)
|
||||
|
||||
# the select function of tidyverse is also supported:
|
||||
df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid()
|
||||
|
||||
# and can even contain 2 columns, which is convenient for genus/species combinations:
|
||||
df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid()
|
||||
# same result:
|
||||
df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid())
|
||||
}
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
|
@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl",
|
||||
\arguments{
|
||||
\item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}
|
||||
|
||||
\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
|
||||
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
|
||||
|
||||
\item{info}{print warnings}
|
||||
\item{info}{print progress}
|
||||
|
||||
\item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
context("atc.R")
|
||||
|
||||
|
||||
test_that("atc_property works", {
|
||||
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
|
||||
expect_equivalent(atc_property("J01CA04", "DDD"), 1)
|
||||
@ -15,21 +14,6 @@ test_that("abname works", {
|
||||
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
|
||||
})
|
||||
|
||||
test_that("guess_bactid works", {
|
||||
expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
|
||||
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
||||
expect_equal(guess_bactid("Negative rods"), "GNR")
|
||||
expect_equal(guess_bactid(c("stau",
|
||||
"STAU",
|
||||
"staaur",
|
||||
"S. aureus",
|
||||
"S aureus",
|
||||
"Staphylococcus aureus",
|
||||
"MRSA",
|
||||
"VISA")),
|
||||
rep("STAAUR", 8))
|
||||
})
|
||||
|
||||
test_that("guess_atc works", {
|
||||
expect_equal(guess_atc(c("J01FA01",
|
||||
"Erythromycin",
|
||||
|
40
tests/testthat/test-guess_bactid.R
Normal file
40
tests/testthat/test-guess_bactid.R
Normal file
@ -0,0 +1,40 @@
|
||||
context("guess_bactid.R")
|
||||
|
||||
test_that("guess_bactid works", {
|
||||
expect_identical(
|
||||
guess_bactid(c("E. coli", "H. influenzae")),
|
||||
c("ESCCOL", "HAEINF"))
|
||||
|
||||
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
|
||||
|
||||
expect_equal(guess_bactid("Negative rods"), "GNR")
|
||||
|
||||
expect_identical(
|
||||
guess_bactid(c("stau",
|
||||
"STAU",
|
||||
"staaur",
|
||||
"S. aureus",
|
||||
"S aureus",
|
||||
"Staphylococcus aureus",
|
||||
"MRSA",
|
||||
"VISA")),
|
||||
rep("STAAUR", 8))
|
||||
|
||||
# select with one column
|
||||
expect_identical(
|
||||
septic_patients[1:10,] %>%
|
||||
left_join_microorganisms() %>%
|
||||
select(genus) %>%
|
||||
guess_bactid(),
|
||||
c("STC", "STC", "NEI", "STA", "STA",
|
||||
"NEI", "ENT", "ENT", "ESC", "KLE"))
|
||||
|
||||
# select with two columns
|
||||
expect_identical(
|
||||
septic_patients[1:10,] %>%
|
||||
pull(bactid),
|
||||
septic_patients[1:10,] %>%
|
||||
left_join_microorganisms() %>%
|
||||
select(genus, species) %>%
|
||||
guess_bactid())
|
||||
})
|
Loading…
Reference in New Issue
Block a user