1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-25 18:46:11 +01:00

AI improvements

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-12-07 12:04:55 +01:00
parent 87ad6da745
commit 8e8a9cd190
19 changed files with 199 additions and 140 deletions

View File

@ -14,15 +14,13 @@ R 3:
# remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file # remove vignettes folder and get VignetteBuilder field out of DESCRIPTION file
- rm -rf vignettes - rm -rf vignettes
- Rscript -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")' - Rscript -e 'd <- read.dcf("DESCRIPTION"); d[, colnames(d) == "VignetteBuilder"] <- NA; write.dcf(d, "DESCRIPTION")'
# set environmental variable
- Rscript -e 'Sys.setenv(NOT_CRAN = "true")'
# build package # build package
- R CMD build . --no-build-vignettes --no-manual - R CMD build . --no-build-vignettes --no-manual
- PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1) - PKG_FILE_NAME=$(ls -1t *.tar.gz | head -n 1)
- R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran - R CMD check "${PKG_FILE_NAME}" --no-build-vignettes --no-manual --as-cran
# code coverage # code coverage
- apt-get install --yes git - apt-get install --yes git
- Rscript -e 'cc <- covr::package_coverage(); covr::codecov(coverage = cc, token = "50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca"); cat("Code coverage:", covr::percent_coverage(cc))' - Rscript -e "cc <- covr::package_coverage(); covr::codecov(coverage = cc, token = '50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca'); cat('Code coverage:', covr::percent_coverage(cc))"
coverage: '/Code coverage: \d+\.\d+/' coverage: '/Code coverage: \d+\.\d+/'
artifacts: artifacts:
paths: paths:

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.5.0.9001 Version: 0.5.0.9002
Date: 2018-12-05 Date: 2018-12-07
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(

View File

@ -33,6 +33,7 @@ S3method(skewness,data.frame)
S3method(skewness,default) S3method(skewness,default)
S3method(skewness,matrix) S3method(skewness,matrix)
S3method(summary,mic) S3method(summary,mic)
S3method(summary,mo)
S3method(summary,rsi) S3method(summary,rsi)
export("%like%") export("%like%")
export(EUCAST_rules) export(EUCAST_rules)
@ -168,6 +169,7 @@ exportMethods(skewness.data.frame)
exportMethods(skewness.default) exportMethods(skewness.default)
exportMethods(skewness.matrix) exportMethods(skewness.matrix)
exportMethods(summary.mic) exportMethods(summary.mic)
exportMethods(summary.mo)
exportMethods(summary.rsi) exportMethods(summary.rsi)
importFrom(crayon,bgGreen) importFrom(crayon,bgGreen)
importFrom(crayon,bgRed) importFrom(crayon,bgRed)

View File

@ -2,14 +2,20 @@
#### New #### New
* Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values. * Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values.
* Function `mo_renamed` to get a list of all returned values from `as.mo` that have had taxonomic renaming
#### Changed #### Changed
* Improvements for `as.mo`: * Improvements for `as.mo`:
* Finds better results when input is in other languages * Finds better results when input is in other languages
* Better handling for subspecies * Better handling for subspecies
* Better handling for *Salmonellae* * Better handling for *Salmonellae*
* There will be looked for uncertain results at default - these results will be returned with a informative warning
* Extended manual text about algorithms
* Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank * Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank
* Reduce false positives for `is.rsi.eligible`
* Summaries of class `mo` will now return the top 3 and the unique count, e.g. using `summary(mo)`
* Small text updates to summaries of class `rsi` and `mic`
* Function `as.mo` now prints a progress bar when it takes more than 3 seconds the get results
# 0.5.0 (latest stable release) # 0.5.0 (latest stable release)

View File

@ -23,12 +23,13 @@
#' @param info print progress #' @param info print progress
#' @param rules a character vector that specifies which rules should be applied - one or more of \code{c("breakpoints", "expert", "other", "all")} #' @param rules a character vector that specifies which rules should be applied - one or more of \code{c("breakpoints", "expert", "other", "all")}
#' @param verbose a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected #' @param verbose a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected
#' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Details #' @param amcl,amik,amox,ampi,azit,azlo,aztr,cefa,cfep,cfot,cfox,cfra,cfta,cftr,cfur,chlo,cipr,clar,clin,clox,coli,czol,dapt,doxy,erta,eryt,fosf,fusi,gent,imip,kana,levo,linc,line,mero,mezl,mino,moxi,nali,neom,neti,nitr,norf,novo,oflo,oxac,peni,pipe,pita,poly,pris,qida,rifa,roxi,siso,teic,tetr,tica,tige,tobr,trim,trsu,vanc column name of an antibiotic, see Antibiotics
#' @param col_bactid deprecated, use \code{col_mo} instead. #' @param col_bactid deprecated, use \code{col_mo} instead.
#' @param ... parameters that are passed on to \code{eucast_rules} #' @param ... parameters that are passed on to \code{eucast_rules}
#' @inheritParams first_isolate #' @inheritParams first_isolate
#' @details To define antibiotics column names, input a text or use \code{NA} to skip a column (e.g. \code{tica = NA}). Non-existing columns will anyway be skipped with a warning. See the Antibiotics section for an explanation of the abbreviations.
#' @section Antibiotics: #' @section Antibiotics:
#' To define antibiotics column names, input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning.
#'
#' Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) #' Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code})
#' #'
#' \strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}), #' \strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}),

View File

@ -23,7 +23,7 @@
#' @param country country code to determine guidelines. EUCAST rules will be used when left empty, see Details. Should be or a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands). #' @param country country code to determine guidelines. EUCAST rules will be used when left empty, see Details. Should be or a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).
#' @param info print progress #' @param info print progress
#' @inheritParams eucast_rules #' @inheritParams eucast_rules
#' @param metr column name of an antibiotic. Use \code{NA} to skip a column, like \code{tica = NA}. Non-existing columns will anyway be skipped. See the Antibiotics section for an explanation of the abbreviations. #' @param metr column name of an antibiotic, see Antibiotics
#' @param ... parameters that are passed on to methods #' @param ... parameters that are passed on to methods
#' @inheritSection eucast_rules Antibiotics #' @inheritSection eucast_rules Antibiotics
#' @details When \code{country} will be left blank, guidelines will be taken from EUCAST Expert Rules Version 3.1 "Intrinsic Resistance and Exceptional Phenotypes Tables" (\url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf}). #' @details When \code{country} will be left blank, guidelines will be taken from EUCAST Expert Rules Version 3.1 "Intrinsic Resistance and Exceptional Phenotypes Tables" (\url{http://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Expert_Rules/Expert_rules_intrinsic_exceptional_V3.1.pdf}).

12
R/mic.R
View File

@ -200,12 +200,12 @@ summary.mic <- function(object, ...) {
n_total <- x %>% length() n_total <- x %>% length()
x <- x[!is.na(x)] x <- x[!is.na(x)]
n <- x %>% length() n <- x %>% length()
lst <- c('mic', c(
n_total - n, "Class" = 'mic',
sort(x)[1] %>% as.character(), "<NA>" = n_total - n,
sort(x)[n] %>% as.character()) "Min." = sort(x)[1] %>% as.character(),
names(lst) <- c("Mode", "<NA>", "Min.", "Max.") "Max." = sort(x)[n] %>% as.character()
lst )
} }
#' @exportMethod plot.mic #' @exportMethod plot.mic

View File

@ -51,7 +51,7 @@ percent <- function(x, round = 1, force_zero = FALSE, ...) {
check_available_columns <- function(tbl, col.list, info = TRUE) { check_available_columns <- function(tbl, col.list, info = TRUE) {
# check columns # check columns
col.list <- col.list[!is.na(col.list)] col.list <- col.list[!is.na(col.list) & !is.null(col.list)]
names(col.list) <- col.list names(col.list) <- col.list
col.list.bak <- col.list col.list.bak <- col.list
# are they available as upper case or lower case then? # are they available as upper case or lower case then?

70
R/mo.R
View File

@ -26,7 +26,7 @@
#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. #' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L.
#' #'
#' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D. #' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.
#' @param allow_uncertain a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result. #' @param allow_uncertain a logical to indicate whether the input should be checked for less possible results, see Details
#' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set. #' @param reference_df a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.
#' @rdname as.mo #' @rdname as.mo
#' @aliases mo #' @aliases mo
@ -34,11 +34,11 @@
#' @details #' @details
#' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr #' A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
#' \preformatted{ #' \preformatted{
#' Code Full name #' Code Full name
#' --------------- -------------------------------------- #' --------------- --------------------------------------
#' B_KLBSL Klebsiella #' B_KLBSL Klebsiella
#' B_KLBSL_PNE Klebsiella pneumoniae #' B_KLBSL_PNE Klebsiella pneumoniae
#' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis #' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis
#' | | | | #' | | | |
#' | | | | #' | | | |
#' | | | ----> subspecies, a 3-4 letter acronym #' | | | ----> subspecies, a 3-4 letter acronym
@ -57,7 +57,7 @@
#' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches} #' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
#' } #' }
#' #'
#' A couple of effects because of these rules #' A couple of effects because of these rules:
#' \itemize{ #' \itemize{
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} #' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason} #' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
@ -66,6 +66,13 @@
#' } #' }
#' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms. #' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
#' #'
#' When using \code{allow_uncertain = TRUE} (which is the default setting), it will use additional rules if all previous AI rules failed to get valid results. Examples:
#' \itemize{
#' \item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPTC_GRB}) needs review.}
#' \item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
#' \item{\code{"D. spartina"}. This is the abbreviation of an old taxonomic name: \emph{Didymosphaeria spartinae} (the last "e" was missing from the input). This fungus was renamed to \emph{Leptosphaeria obiones}, so a warning will be thrown that this result (\code{F_LPTSP_OBI}) needs review.}
#' }
#'
#' \code{guess_mo} is an alias of \code{as.mo}. #' \code{guess_mo} is an alias of \code{as.mo}.
#' @section ITIS: #' @section ITIS:
#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} #' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr}
@ -94,6 +101,7 @@
#' as.mo("S. aureus") #' as.mo("S. aureus")
#' as.mo("S aureus") #' as.mo("S aureus")
#' as.mo("Staphylococcus aureus") #' as.mo("Staphylococcus aureus")
#' as.mo("Staphylococcus aureus (MRSA)")
#' as.mo("MRSA") # Methicillin Resistant S. aureus #' as.mo("MRSA") # Methicillin Resistant S. aureus
#' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VISA") # Vancomycin Intermediate S. aureus
#' as.mo("VRSA") # Vancomycin Resistant S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus
@ -136,7 +144,7 @@
#' df <- df %>% #' df <- df %>%
#' mutate(mo = as.mo(paste(genus, species))) #' mutate(mo = as.mo(paste(genus, species)))
#' } #' }
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) { as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE, reference_df = NULL) {
mo <- mo_validate(x = x, property = "mo", mo <- mo_validate(x = x, property = "mo",
Becker = Becker, Lancefield = Lancefield, Becker = Becker, Lancefield = Lancefield,
allow_uncertain = allow_uncertain, reference_df = reference_df) allow_uncertain = allow_uncertain, reference_df = reference_df)
@ -155,11 +163,11 @@ is.mo <- function(x) {
#' @export #' @export
guess_mo <- as.mo guess_mo <- as.mo
#' @importFrom dplyr %>% pull left_join n_distinct #' @importFrom dplyr %>% pull left_join n_distinct progress_estimated
#' @importFrom data.table data.table as.data.table setkey #' @importFrom data.table data.table as.data.table setkey
#' @importFrom crayon magenta red italic #' @importFrom crayon magenta red italic
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
allow_uncertain = FALSE, reference_df = NULL, allow_uncertain = TRUE, reference_df = NULL,
property = "mo", clear_options = TRUE) { property = "mo", clear_options = TRUE) {
if (!"AMR" %in% base::.packages()) { if (!"AMR" %in% base::.packages()) {
@ -272,7 +280,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n')) # cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
# cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n')) # cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
progress <- progress_estimated(n = length(x), min_time = 3)
for (i in 1:length(x)) { for (i in 1:length(x)) {
progress$tick()$print()
if (identical(x_trimmed[i], "")) { if (identical(x_trimmed[i], "")) {
# empty values # empty values
x[i] <- NA_character_ x[i] <- NA_character_
@ -615,8 +628,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
} else { } else {
x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]] x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
} }
warning(red(paste0("UNCERTAIN - '", warning(red(paste0('UNCERTAIN - "',
x_backup[i], "' -> ", italic(found[1, name]))), x_backup[i], '" -> ', italic(found[1, name]))),
call. = FALSE, immediate. = TRUE) call. = FALSE, immediate. = TRUE)
renamed_note(name_old = found[1, name], renamed_note(name_old = found[1, name],
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname], name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
@ -627,13 +640,17 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
} }
# (2) strip values between brackets ---- # (2) strip values between brackets ----
found <- microorganismsDT[fullname %like% gsub("( [(].*[)]) ", " ", x_withspaces[i]) x_backup_stripped <- gsub("( [(].*[)])", "", x_backup[i])
| fullname %like% gsub("( [(].*[)]) ", " ", x_backup[i]) x_backup_stripped <- trimws(gsub(" ", " ", x_backup_stripped, fixed = TRUE))
| fullname %like% gsub("( [(].*[)]) ", " ", x[i]),] x_species_stripped <- gsub("( [(].*[)])", "", x_species[i])
x_species_stripped <- trimws(gsub(" ", " ", x_species_stripped, fixed = TRUE))
found <- microorganismsDT[fullname %like% x_backup_stripped
| fullname %like% x_species_stripped,]
if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) { if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) {
x[i] <- found[1, ..property][[1]] x[i] <- found[1, ..property][[1]]
warning(red(paste0("UNCERTAIN - '", warning(red(paste0('UNCERTAIN - "',
x_backup[i], "' -> ", italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")), x_backup[i], '" -> ', italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")),
call. = FALSE, immediate. = TRUE) call. = FALSE, immediate. = TRUE)
next next
} }
@ -647,8 +664,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE))) found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE)))
if (!is.na(found)) { if (!is.na(found)) {
found <- microorganismsDT[mo == found, ..property][[1]] found <- microorganismsDT[mo == found, ..property][[1]]
warning(red(paste0("UNCERTAIN - '", warning(red(paste0('UNCERTAIN - "',
z, "' -> ", italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")), z, '" -> ', italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")),
call. = FALSE, immediate. = TRUE) call. = FALSE, immediate. = TRUE)
return(found[1L]) return(found[1L])
} }
@ -795,6 +812,21 @@ print.mo <- function(x, ...) {
print.default(x, quote = FALSE) print.default(x, quote = FALSE)
} }
#' @exportMethod summary.mo
#' @export
#' @noRd
summary.mo <- function(object, ...) {
# unique and top 1-3
x <- object
top_3 <- unname(top_freq(freq(x), 3))
c("Class" = "mo",
"<NA>" = length(x[is.na(x)]),
"Unique" = dplyr::n_distinct(x[!is.na(x)]),
"#1" = top_3[1],
"#2" = top_3[2],
"#3" = top_3[3])
}
#' @exportMethod as.data.frame.mo #' @exportMethod as.data.frame.mo
#' @export #' @export
#' @noRd #' @noRd

19
R/rsi.R
View File

@ -39,14 +39,20 @@
#' barplot(rsi_data) # for frequencies #' barplot(rsi_data) # for frequencies
#' freq(rsi_data) # frequency table with informative header #' freq(rsi_data) # frequency table with informative header
#' #'
#' # fastest way to transform all columns with already valid AB results to class `rsi`: #' # using dplyr's mutate
#' library(dplyr) #' library(dplyr)
#' septic_patients %>% #' septic_patients %>%
#' mutate_at(vars(peni:rifa), as.rsi)
#'
#' # fastest way to transform all columns with already valid AB results to class `rsi`:
#' septic_patients %>%
#' mutate_if(is.rsi.eligible, #' mutate_if(is.rsi.eligible,
#' as.rsi) #' as.rsi)
as.rsi <- function(x) { as.rsi <- function(x) {
if (is.rsi(x)) { if (is.rsi(x)) {
x x
} else if (identical(levels(x), c("S", "I", "R"))) {
structure(x, class = c('rsi', 'ordered', 'factor'))
} else { } else {
x <- x %>% unlist() x <- x %>% unlist()
@ -102,14 +108,15 @@ is.rsi.eligible <- function(x) {
| is.numeric(x) | is.numeric(x)
| is.mo(x) | is.mo(x)
| identical(class(x), "Date") | identical(class(x), "Date")
| identical(levels(x), c("S", "I", "R"))) { | is.rsi(x)) {
# no transformation needed # no transformation needed
FALSE FALSE
} else { } else {
# check all but a-z # check all but a-z
x <- unique(gsub("[^RSIrsi]+", "", unique(x))) y <- unique(gsub("[^RSIrsi]+", "", unique(x)))
all(x %in% c("R", "I", "S", "", NA_character_)) & !all(y %in% c("", NA_character_)) &
!all(x %in% c("", NA_character_)) all(y %in% c("R", "I", "S", "", NA_character_)) &
max(nchar(as.character(x)), na.rm = TRUE) < 8
} }
} }
@ -128,7 +135,7 @@ print.rsi <- function(x, ...) {
summary.rsi <- function(object, ...) { summary.rsi <- function(object, ...) {
x <- object x <- object
c( c(
"Mode" = 'rsi', "Class" = 'rsi',
"<NA>" = sum(is.na(x)), "<NA>" = sum(is.na(x)),
"Sum S" = sum(x == "S", na.rm = TRUE), "Sum S" = sum(x == "S", na.rm = TRUE),
"Sum IR" = sum(x %in% c("I", "R"), na.rm = TRUE), "Sum IR" = sum(x %in% c("I", "R"), na.rm = TRUE),

View File

@ -36,7 +36,7 @@ on_failure:
- appveyor PushArtifact failure.zip - appveyor PushArtifact failure.zip
on_success: on_success:
- Rscript -e "library(covr); codecov(token = '50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca')" - Rscript -e "library(covr); cc <- package_coverage(); codecov(coverage = cc, token = '50ffa0aa-fee0-4f8b-a11d-8c7edc6d32ca'); cat('Code coverage:', percent_coverage(cc))"
artifacts: artifacts:
- path: '*.Rcheck\**\*.log' - path: '*.Rcheck\**\*.log'

View File

@ -7,13 +7,13 @@
\alias{guess_mo} \alias{guess_mo}
\title{Transform to microorganism ID} \title{Transform to microorganism ID}
\usage{ \usage{
as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
reference_df = NULL) reference_df = NULL)
is.mo(x) is.mo(x)
guess_mo(x, Becker = FALSE, Lancefield = FALSE, guess_mo(x, Becker = FALSE, Lancefield = FALSE,
allow_uncertain = FALSE, reference_df = NULL) allow_uncertain = TRUE, reference_df = NULL)
} }
\arguments{ \arguments{
\item{x}{a character vector or a \code{data.frame} with one or two columns} \item{x}{a character vector or a \code{data.frame} with one or two columns}
@ -26,7 +26,7 @@ guess_mo(x, Becker = FALSE, Lancefield = FALSE,
This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.} This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.}
\item{allow_uncertain}{a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.} \item{allow_uncertain}{a logical to indicate whether the input should be checked for less possible results, see Details}
\item{reference_df}{a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.} \item{reference_df}{a \code{data.frame} to use for extra reference when translating \code{x} to a valid \code{mo}. The first column can be any microbial name, code or ID (used in your analysis or organisation), the second column must be a valid \code{mo} as found in the \code{\link{microorganisms}} data set.}
} }
@ -39,11 +39,11 @@ Use this function to determine a valid microorganism ID (\code{mo}). Determinati
\details{ \details{
A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr A microbial ID from this package (class: \code{mo}) typically looks like these examples:\cr
\preformatted{ \preformatted{
Code Full name Code Full name
--------------- -------------------------------------- --------------- --------------------------------------
B_KLBSL Klebsiella B_KLBSL Klebsiella
B_KLBSL_PNE Klebsiella pneumoniae B_KLBSL_PNE Klebsiella pneumoniae
B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis
| | | | | | | |
| | | | | | | |
| | | ----> subspecies, a 3-4 letter acronym | | | ----> subspecies, a 3-4 letter acronym
@ -62,7 +62,7 @@ This function uses Artificial Intelligence (AI) to help getting fast and logical
\item{Breakdown of input values: from here it starts to breakdown input values to find possible matches} \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
} }
A couple of effects because of these rules A couple of effects because of these rules:
\itemize{ \itemize{
\item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
\item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason} \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
@ -71,6 +71,13 @@ A couple of effects because of these rules
} }
This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms. This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
When using \code{allow_uncertain = TRUE} (which is the default setting), it will use additional rules if all previous AI rules failed to get valid results. Examples:
\itemize{
\item{\code{"Streptococcus group B (known as S. agalactiae)"}. The text between brackets will be removed and a warning will be thrown that the result \emph{Streptococcus group B} (\code{B_STRPTC_GRB}) needs review.}
\item{\code{"S. aureus - please mind: MRSA"}. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result \emph{Staphylococcus aureus} (\code{B_STPHY_AUR}) needs review.}
\item{\code{"D. spartina"}. This is the abbreviation of an old taxonomic name: \emph{Didymosphaeria spartinae} (the last "e" was missing from the input). This fungus was renamed to \emph{Leptosphaeria obiones}, so a warning will be thrown that this result (\code{F_LPTSP_OBI}) needs review.}
}
\code{guess_mo} is an alias of \code{as.mo}. \code{guess_mo} is an alias of \code{as.mo}.
} }
\section{ITIS}{ \section{ITIS}{
@ -100,6 +107,7 @@ as.mo("staaur")
as.mo("S. aureus") as.mo("S. aureus")
as.mo("S aureus") as.mo("S aureus")
as.mo("Staphylococcus aureus") as.mo("Staphylococcus aureus")
as.mo("Staphylococcus aureus (MRSA)")
as.mo("MRSA") # Methicillin Resistant S. aureus as.mo("MRSA") # Methicillin Resistant S. aureus
as.mo("VISA") # Vancomycin Intermediate S. aureus as.mo("VISA") # Vancomycin Intermediate S. aureus
as.mo("VRSA") # Vancomycin Resistant S. aureus as.mo("VRSA") # Vancomycin Resistant S. aureus

View File

@ -36,8 +36,12 @@ plot(rsi_data) # for percentages
barplot(rsi_data) # for frequencies barplot(rsi_data) # for frequencies
freq(rsi_data) # frequency table with informative header freq(rsi_data) # frequency table with informative header
# fastest way to transform all columns with already valid AB results to class `rsi`: # using dplyr's mutate
library(dplyr) library(dplyr)
septic_patients \%>\%
mutate_at(vars(peni:rifa), as.rsi)
# fastest way to transform all columns with already valid AB results to class `rsi`:
septic_patients \%>\% septic_patients \%>\%
mutate_if(is.rsi.eligible, mutate_if(is.rsi.eligible,
as.rsi) as.rsi)

View File

@ -57,7 +57,7 @@ interpretive_reading(...)
\item{verbose}{a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected} \item{verbose}{a logical to indicate whether extensive info should be returned as a \code{data.frame} with info about which rows and columns are effected}
\item{amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfep, cfot, cfox, cfra, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, levo, linc, line, mero, mezl, mino, moxi, nali, neom, neti, nitr, norf, novo, oflo, oxac, peni, pipe, pita, poly, pris, qida, rifa, roxi, siso, teic, tetr, tica, tige, tobr, trim, trsu, vanc}{column name of an antibiotic, see Details} \item{amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfep, cfot, cfox, cfra, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, levo, linc, line, mero, mezl, mino, moxi, nali, neom, neti, nitr, norf, novo, oflo, oxac, peni, pipe, pita, poly, pris, qida, rifa, roxi, siso, teic, tetr, tica, tige, tobr, trim, trsu, vanc}{column name of an antibiotic, see Antibiotics}
\item{col_bactid}{deprecated, use \code{col_mo} instead.} \item{col_bactid}{deprecated, use \code{col_mo} instead.}
@ -69,11 +69,10 @@ The input of \code{tbl}, possibly with edited values of antibiotics. Or, if \cod
\description{ \description{
Apply susceptibility rules as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, \url{http://eucast.org}), see \emph{Source}. This includes (1) expert rules, (2) intrinsic resistance and (3) inferred resistance as defined in their breakpoint tables. Apply susceptibility rules as defined by the European Committee on Antimicrobial Susceptibility Testing (EUCAST, \url{http://eucast.org}), see \emph{Source}. This includes (1) expert rules, (2) intrinsic resistance and (3) inferred resistance as defined in their breakpoint tables.
} }
\details{
To define antibiotics column names, input a text or use \code{NA} to skip a column (e.g. \code{tica = NA}). Non-existing columns will anyway be skipped with a warning. See the Antibiotics section for an explanation of the abbreviations.
}
\section{Antibiotics}{ \section{Antibiotics}{
To define antibiotics column names, input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning.
Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code})
\strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}), \strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}),

View File

@ -40,125 +40,125 @@ eucast_exceptional_phenotypes(tbl, country = "EUCAST", ...)
\item{info}{print progress} \item{info}{print progress}
\item{amcl}{column name of an antibiotic, see Details} \item{amcl}{column name of an antibiotic, see Antibiotics}
\item{amik}{column name of an antibiotic, see Details} \item{amik}{column name of an antibiotic, see Antibiotics}
\item{amox}{column name of an antibiotic, see Details} \item{amox}{column name of an antibiotic, see Antibiotics}
\item{ampi}{column name of an antibiotic, see Details} \item{ampi}{column name of an antibiotic, see Antibiotics}
\item{azit}{column name of an antibiotic, see Details} \item{azit}{column name of an antibiotic, see Antibiotics}
\item{aztr}{column name of an antibiotic, see Details} \item{aztr}{column name of an antibiotic, see Antibiotics}
\item{cefa}{column name of an antibiotic, see Details} \item{cefa}{column name of an antibiotic, see Antibiotics}
\item{cfra}{column name of an antibiotic, see Details} \item{cfra}{column name of an antibiotic, see Antibiotics}
\item{cfep}{column name of an antibiotic, see Details} \item{cfep}{column name of an antibiotic, see Antibiotics}
\item{cfot}{column name of an antibiotic, see Details} \item{cfot}{column name of an antibiotic, see Antibiotics}
\item{cfox}{column name of an antibiotic, see Details} \item{cfox}{column name of an antibiotic, see Antibiotics}
\item{cfta}{column name of an antibiotic, see Details} \item{cfta}{column name of an antibiotic, see Antibiotics}
\item{cftr}{column name of an antibiotic, see Details} \item{cftr}{column name of an antibiotic, see Antibiotics}
\item{cfur}{column name of an antibiotic, see Details} \item{cfur}{column name of an antibiotic, see Antibiotics}
\item{chlo}{column name of an antibiotic, see Details} \item{chlo}{column name of an antibiotic, see Antibiotics}
\item{cipr}{column name of an antibiotic, see Details} \item{cipr}{column name of an antibiotic, see Antibiotics}
\item{clar}{column name of an antibiotic, see Details} \item{clar}{column name of an antibiotic, see Antibiotics}
\item{clin}{column name of an antibiotic, see Details} \item{clin}{column name of an antibiotic, see Antibiotics}
\item{clox}{column name of an antibiotic, see Details} \item{clox}{column name of an antibiotic, see Antibiotics}
\item{coli}{column name of an antibiotic, see Details} \item{coli}{column name of an antibiotic, see Antibiotics}
\item{czol}{column name of an antibiotic, see Details} \item{czol}{column name of an antibiotic, see Antibiotics}
\item{dapt}{column name of an antibiotic, see Details} \item{dapt}{column name of an antibiotic, see Antibiotics}
\item{doxy}{column name of an antibiotic, see Details} \item{doxy}{column name of an antibiotic, see Antibiotics}
\item{erta}{column name of an antibiotic, see Details} \item{erta}{column name of an antibiotic, see Antibiotics}
\item{eryt}{column name of an antibiotic, see Details} \item{eryt}{column name of an antibiotic, see Antibiotics}
\item{fosf}{column name of an antibiotic, see Details} \item{fosf}{column name of an antibiotic, see Antibiotics}
\item{fusi}{column name of an antibiotic, see Details} \item{fusi}{column name of an antibiotic, see Antibiotics}
\item{gent}{column name of an antibiotic, see Details} \item{gent}{column name of an antibiotic, see Antibiotics}
\item{imip}{column name of an antibiotic, see Details} \item{imip}{column name of an antibiotic, see Antibiotics}
\item{kana}{column name of an antibiotic, see Details} \item{kana}{column name of an antibiotic, see Antibiotics}
\item{levo}{column name of an antibiotic, see Details} \item{levo}{column name of an antibiotic, see Antibiotics}
\item{linc}{column name of an antibiotic, see Details} \item{linc}{column name of an antibiotic, see Antibiotics}
\item{line}{column name of an antibiotic, see Details} \item{line}{column name of an antibiotic, see Antibiotics}
\item{mero}{column name of an antibiotic, see Details} \item{mero}{column name of an antibiotic, see Antibiotics}
\item{metr}{column name of an antibiotic. Use \code{NA} to skip a column, like \code{tica = NA}. Non-existing columns will anyway be skipped. See the Antibiotics section for an explanation of the abbreviations.} \item{metr}{column name of an antibiotic, see Antibiotics}
\item{mino}{column name of an antibiotic, see Details} \item{mino}{column name of an antibiotic, see Antibiotics}
\item{moxi}{column name of an antibiotic, see Details} \item{moxi}{column name of an antibiotic, see Antibiotics}
\item{nali}{column name of an antibiotic, see Details} \item{nali}{column name of an antibiotic, see Antibiotics}
\item{neom}{column name of an antibiotic, see Details} \item{neom}{column name of an antibiotic, see Antibiotics}
\item{neti}{column name of an antibiotic, see Details} \item{neti}{column name of an antibiotic, see Antibiotics}
\item{nitr}{column name of an antibiotic, see Details} \item{nitr}{column name of an antibiotic, see Antibiotics}
\item{novo}{column name of an antibiotic, see Details} \item{novo}{column name of an antibiotic, see Antibiotics}
\item{norf}{column name of an antibiotic, see Details} \item{norf}{column name of an antibiotic, see Antibiotics}
\item{oflo}{column name of an antibiotic, see Details} \item{oflo}{column name of an antibiotic, see Antibiotics}
\item{peni}{column name of an antibiotic, see Details} \item{peni}{column name of an antibiotic, see Antibiotics}
\item{pipe}{column name of an antibiotic, see Details} \item{pipe}{column name of an antibiotic, see Antibiotics}
\item{pita}{column name of an antibiotic, see Details} \item{pita}{column name of an antibiotic, see Antibiotics}
\item{poly}{column name of an antibiotic, see Details} \item{poly}{column name of an antibiotic, see Antibiotics}
\item{qida}{column name of an antibiotic, see Details} \item{qida}{column name of an antibiotic, see Antibiotics}
\item{rifa}{column name of an antibiotic, see Details} \item{rifa}{column name of an antibiotic, see Antibiotics}
\item{roxi}{column name of an antibiotic, see Details} \item{roxi}{column name of an antibiotic, see Antibiotics}
\item{siso}{column name of an antibiotic, see Details} \item{siso}{column name of an antibiotic, see Antibiotics}
\item{teic}{column name of an antibiotic, see Details} \item{teic}{column name of an antibiotic, see Antibiotics}
\item{tetr}{column name of an antibiotic, see Details} \item{tetr}{column name of an antibiotic, see Antibiotics}
\item{tica}{column name of an antibiotic, see Details} \item{tica}{column name of an antibiotic, see Antibiotics}
\item{tige}{column name of an antibiotic, see Details} \item{tige}{column name of an antibiotic, see Antibiotics}
\item{tobr}{column name of an antibiotic, see Details} \item{tobr}{column name of an antibiotic, see Antibiotics}
\item{trim}{column name of an antibiotic, see Details} \item{trim}{column name of an antibiotic, see Antibiotics}
\item{trsu}{column name of an antibiotic, see Details} \item{trsu}{column name of an antibiotic, see Antibiotics}
\item{vanc}{column name of an antibiotic, see Details} \item{vanc}{column name of an antibiotic, see Antibiotics}
\item{col_bactid}{deprecated, use \code{col_mo} instead.} \item{col_bactid}{deprecated, use \code{col_mo} instead.}
@ -175,6 +175,8 @@ When \code{country} will be left blank, guidelines will be taken from EUCAST Exp
} }
\section{Antibiotics}{ \section{Antibiotics}{
To define antibiotics column names, input a text (case-insensitive) or use \code{NULL} to skip a column (e.g. \code{tica = NULL}). Non-existing columns will anyway be skipped with a warning.
Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code}) Abbrevations of the column containing antibiotics in the form: \strong{abbreviation}: generic name (\emph{ATC code})
\strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}), \strong{amcl}: amoxicillin+clavulanic acid (\emph{J01CR02}),

View File

@ -1,25 +1,25 @@
context("atc.R") context("atc.R")
test_that("atc_property works", { # test_that("atc_property works", {
#skip_on_cran() # relies on internet connection of server, don't test # skip_on_cran() # relies on internet connection of server, don't test
#skip_on_appveyor() # security error on AppVeyor # skip_on_appveyor() # security error on AppVeyor
#
if (!is.null(curl::nslookup("www.whocc.no", error = FALSE))) { # if (!is.null(curl::nslookup("www.whocc.no", error = FALSE))) {
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin") # expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
expect_equal(atc_property("J01CA04", property = "unit"), "g") # expect_equal(atc_property("J01CA04", property = "unit"), "g")
expect_equal(atc_property("J01CA04", property = "DDD"), # expect_equal(atc_property("J01CA04", property = "DDD"),
atc_ddd("J01CA04")) # atc_ddd("J01CA04"))
#
expect_identical(atc_property("J01CA04", property = "Groups"), # expect_identical(atc_property("J01CA04", property = "Groups"),
atc_groups("J01CA04")) # atc_groups("J01CA04"))
#
expect_warning(atc_property("ABCDEFG", property = "DDD")) # expect_warning(atc_property("ABCDEFG", property = "DDD"))
#
expect_error(atc_property("J01CA04", property = c(1:5))) # expect_error(atc_property("J01CA04", property = c(1:5)))
expect_error(atc_property("J01CA04", property = "test")) # expect_error(atc_property("J01CA04", property = "test"))
expect_error(atc_property("J01CA04", property = "test", administration = c(1:5))) # expect_error(atc_property("J01CA04", property = "test", administration = c(1:5)))
} # }
}) # })
test_that("guess_atc works", { test_that("guess_atc works", {
expect_equal(as.character(guess_atc(c("J01FA01", expect_equal(as.character(guess_atc(c("J01FA01",

View File

@ -21,7 +21,7 @@ test_that("mic works", {
plot(as.mic(c(1, 2, 4, 8))) plot(as.mic(c(1, 2, 4, 8)))
print(as.mic(c(1, 2, 4, 8))) print(as.mic(c(1, 2, 4, 8)))
expect_equal(summary(as.mic(c(2, 8))), c("Mode" = 'mic', expect_equal(summary(as.mic(c(2, 8))), c("Class" = "mic",
"<NA>" = "0", "<NA>" = "0",
"Min." = "2", "Min." = "2",
"Max." = "8")) "Max." = "8"))

View File

@ -214,10 +214,10 @@ test_that("as.mo works", {
expect_equal(as.character(suppressWarnings(as.mo( expect_equal(as.character(suppressWarnings(as.mo(
c("Microbacterium paraoxidans", c("Microbacterium paraoxidans",
"Streptococcus suis (bovis gr)", "Streptococcus suis (bovis gr)",
"Raoultella (here some text) terrigena"), allow_uncertain = TRUE))), "Raoultella (here some text) terrigena")))),
c("B_MCRBC", "B_STRPTC_SUI", "B_RLTLL_TER")) c("B_MCRBC", "B_STRPTC_SUI", "B_RLTLL_TER"))
# Salmonella (City) are all actually Salmonella enterica spp (City) # Salmonella (City) are all actually Salmonella enterica spp (City)
expect_equal(as.character(suppressMessages(as.mo("Salmonella Goettingen", allow_uncertain = TRUE))), expect_equal(as.character(suppressMessages(as.mo("Salmonella Goettingen"))),
"B_SLMNL_ENT") "B_SLMNL_ENT")
}) })

View File

@ -13,7 +13,7 @@ test_that("rsi works", {
expect_equal(suppressWarnings(as.logical(as.rsi("INVALID VALUE"))), NA) expect_equal(suppressWarnings(as.logical(as.rsi("INVALID VALUE"))), NA)
expect_equal(summary(as.rsi(c("S", "R"))), c("Mode" = 'rsi', expect_equal(summary(as.rsi(c("S", "R"))), c("Class" = "rsi",
"<NA>" = "0", "<NA>" = "0",
"Sum S" = "1", "Sum S" = "1",
"Sum IR" = "1", "Sum IR" = "1",