speed improvement eucast_rules(), support more old MO codes

This commit is contained in:
dr. M.S. (Matthijs) Berends 2019-05-20 12:00:18 +02:00
parent b0033dae1b
commit 4ca00e1868
35 changed files with 590 additions and 683 deletions

View File

@ -19,6 +19,9 @@
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
# ==================================================================== #
# to check with R-Hub:
# rhub::check_for_cran(devtools::build(args = c('--no-build-vignettes')))
stages:
- build
- test
@ -78,10 +81,6 @@ R-devel:
image: rocker/r-devel
allow_failure: true
script:
# set language
- echo 'LANGUAGE="en_US.utf8"' > .Renviron
- echo 'LANG="en_US.utf8"' >> .Renviron
- echo 'LANGUAGE="en_US.utf8"' > ~/.Renviron
- Rscriptdevel -e 'sessionInfo()'
# install missing and outdated packages
- Rscriptdevel -e 'source(".gitlab-ci.R"); gl_update_pkg_all(repos = "https://cran.rstudio.com", quiet = TRUE, install_pkgdown = TRUE)'

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.6.1.9003
Date: 2019-05-17
Date: 2019-05-20
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

View File

@ -226,7 +226,7 @@ eucast_rules <- function(x,
}
}
cols_ab <- get_column_abx(tbl = x,
cols_ab <- get_column_abx(x = x,
soft_dependencies = c("AMC",
"AMK",
"AMX",
@ -291,8 +291,7 @@ eucast_rules <- function(x,
"SXT",
"VAN"),
hard_dependencies = NULL,
verbose = verbose,
...)
verbose = verbose)
AMC <- cols_ab['AMC']
AMK <- cols_ab['AMK']
@ -674,8 +673,11 @@ eucast_rules <- function(x,
'out of', formatnr(nrow(tbl_original)),
'rows, making a total of', formatnr(nrow(verbose_info)), 'edits\n')))
n_added <- verbose_info %>% filter(is.na(old)) %>% nrow()
n_changed <- verbose_info %>% filter(!is.na(old)) %>% nrow()
# print added values ----
if (verbose_info %>% filter(is.na(old)) %>% nrow() == 0) {
if (n_added == 0) {
colour <- cat # is function
} else {
colour <- blue # is function
@ -685,7 +687,7 @@ eucast_rules <- function(x,
filter(is.na(old)) %>%
nrow()), "test results"),
"\n")))
if (verbose_info %>% filter(is.na(old)) %>% nrow() > 0) {
if (n_added > 0) {
verbose_info %>%
filter(is.na(old)) %>%
# sort it well: S < I < R
@ -700,17 +702,20 @@ eucast_rules <- function(x,
}
# print changed values ----
if (verbose_info %>% filter(!is.na(old)) %>% nrow() == 0) {
if (n_changed == 0) {
colour <- cat # is function
} else {
colour <- blue # is function
}
cat(colour(paste0("\n=> ", wouldve, "changed ",
if (n_added + n_changed > 0) {
cat("\n")
}
cat(colour(paste0("=> ", wouldve, "changed ",
bold(formatnr(verbose_info %>%
filter(!is.na(old)) %>%
nrow()), "test results"),
"\n")))
if (verbose_info %>% filter(!is.na(old)) %>% nrow() > 0) {
if (n_changed > 0) {
verbose_info %>%
filter(!is.na(old)) %>%
# sort it well: S < I < R

View File

@ -56,15 +56,15 @@ guess_ab_col <- function(x = NULL, search_string = NULL, verbose = FALSE) {
if (is.null(x) & is.null(search_string)) {
return(as.name("guess_ab_col"))
}
if (!is.data.frame(x)) {
stop("`x` must be a data.frame")
}
if (length(search_string) > 1) {
warning("argument 'search_string' has length > 1 and only the first element will be used")
search_string <- search_string[1]
}
search_string <- as.character(search_string)
if (!is.data.frame(x)) {
stop("`x` must be a data.frame")
}
if (search_string %in% colnames(x)) {
ab_result <- search_string

View File

@ -110,8 +110,7 @@ mdro <- function(x,
}
cols_ab <- get_column_abx(tbl = x,
...)
cols_ab <- get_column_abx(x = x, verbose = verbose)
AMC <- cols_ab['AMC']
AMK <- cols_ab['AMK']

176
R/misc.R
View File

@ -158,177 +158,45 @@ get_ab_col <- function(columns, ab) {
columns[names(columns) == ab]
}
get_column_abx <- function(tbl,
get_column_abx <- function(x,
soft_dependencies = NULL,
hard_dependencies = NULL,
verbose = FALSE,
AMC = guess_ab_col(),
AMK = guess_ab_col(),
AMX = guess_ab_col(),
AMP = guess_ab_col(),
AZM = guess_ab_col(),
AZL = guess_ab_col(),
ATM = guess_ab_col(),
RID = guess_ab_col(),
FEP = guess_ab_col(),
CTX = guess_ab_col(),
FOX = guess_ab_col(),
CED = guess_ab_col(),
CAZ = guess_ab_col(),
CRO = guess_ab_col(),
CXM = guess_ab_col(),
CHL = guess_ab_col(),
CIP = guess_ab_col(),
CLR = guess_ab_col(),
CLI = guess_ab_col(),
FLC = guess_ab_col(),
COL = guess_ab_col(),
CZO = guess_ab_col(),
DAP = guess_ab_col(),
DOX = guess_ab_col(),
ETP = guess_ab_col(),
ERY = guess_ab_col(),
FOS = guess_ab_col(),
FUS = guess_ab_col(),
GEN = guess_ab_col(),
IPM = guess_ab_col(),
KAN = guess_ab_col(),
LVX = guess_ab_col(),
LIN = guess_ab_col(),
LNZ = guess_ab_col(),
MEM = guess_ab_col(),
MTR = guess_ab_col(),
MEZ = guess_ab_col(),
MNO = guess_ab_col(),
MFX = guess_ab_col(),
NAL = guess_ab_col(),
NEO = guess_ab_col(),
NET = guess_ab_col(),
NIT = guess_ab_col(),
NOR = guess_ab_col(),
NOV = guess_ab_col(),
OFX = guess_ab_col(),
OXA = guess_ab_col(),
PEN = guess_ab_col(),
PIP = guess_ab_col(),
TZP = guess_ab_col(),
PLB = guess_ab_col(),
PRI = guess_ab_col(),
QDA = guess_ab_col(),
RIF = guess_ab_col(),
RXT = guess_ab_col(),
SIS = guess_ab_col(),
TEC = guess_ab_col(),
TCY = guess_ab_col(),
TIC = guess_ab_col(),
TGC = guess_ab_col(),
TOB = guess_ab_col(),
TMP = guess_ab_col(),
SXT = guess_ab_col(),
VAN = guess_ab_col()) {
# check columns
if (identical(AMC, as.name("guess_ab_col"))) AMC <- guess_ab_col(tbl, "AMC", verbose = verbose)
if (identical(AMK, as.name("guess_ab_col"))) AMK <- guess_ab_col(tbl, "AMK", verbose = verbose)
if (identical(AMX, as.name("guess_ab_col"))) AMX <- guess_ab_col(tbl, "AMX", verbose = verbose)
if (identical(AMP, as.name("guess_ab_col"))) AMP <- guess_ab_col(tbl, "AMP", verbose = verbose)
if (identical(AZM, as.name("guess_ab_col"))) AZM <- guess_ab_col(tbl, "AZM", verbose = verbose)
if (identical(AZL, as.name("guess_ab_col"))) AZL <- guess_ab_col(tbl, "AZL", verbose = verbose)
if (identical(ATM, as.name("guess_ab_col"))) ATM <- guess_ab_col(tbl, "ATM", verbose = verbose)
if (identical(RID, as.name("guess_ab_col"))) RID <- guess_ab_col(tbl, "RID", verbose = verbose)
if (identical(FEP, as.name("guess_ab_col"))) FEP <- guess_ab_col(tbl, "FEP", verbose = verbose)
if (identical(CTX, as.name("guess_ab_col"))) CTX <- guess_ab_col(tbl, "CTX", verbose = verbose)
if (identical(FOX, as.name("guess_ab_col"))) FOX <- guess_ab_col(tbl, "FOX", verbose = verbose)
if (identical(CED, as.name("guess_ab_col"))) CED <- guess_ab_col(tbl, "CED", verbose = verbose)
if (identical(CAZ, as.name("guess_ab_col"))) CAZ <- guess_ab_col(tbl, "CAZ", verbose = verbose)
if (identical(CRO, as.name("guess_ab_col"))) CRO <- guess_ab_col(tbl, "CRO", verbose = verbose)
if (identical(CXM, as.name("guess_ab_col"))) CXM <- guess_ab_col(tbl, "CXM", verbose = verbose)
if (identical(CHL, as.name("guess_ab_col"))) CHL <- guess_ab_col(tbl, "CHL", verbose = verbose)
if (identical(CIP, as.name("guess_ab_col"))) CIP <- guess_ab_col(tbl, "CIP", verbose = verbose)
if (identical(CLR, as.name("guess_ab_col"))) CLR <- guess_ab_col(tbl, "CLR", verbose = verbose)
if (identical(CLI, as.name("guess_ab_col"))) CLI <- guess_ab_col(tbl, "CLI", verbose = verbose)
if (identical(FLC, as.name("guess_ab_col"))) FLC <- guess_ab_col(tbl, "FLC", verbose = verbose)
if (identical(COL, as.name("guess_ab_col"))) COL <- guess_ab_col(tbl, "COL", verbose = verbose)
if (identical(CZO, as.name("guess_ab_col"))) CZO <- guess_ab_col(tbl, "CZO", verbose = verbose)
if (identical(DAP, as.name("guess_ab_col"))) DAP <- guess_ab_col(tbl, "DAP", verbose = verbose)
if (identical(DOX, as.name("guess_ab_col"))) DOX <- guess_ab_col(tbl, "DOX", verbose = verbose)
if (identical(ETP, as.name("guess_ab_col"))) ETP <- guess_ab_col(tbl, "ETP", verbose = verbose)
if (identical(ERY, as.name("guess_ab_col"))) ERY <- guess_ab_col(tbl, "ERY", verbose = verbose)
if (identical(FOS, as.name("guess_ab_col"))) FOS <- guess_ab_col(tbl, "FOS", verbose = verbose)
if (identical(FUS, as.name("guess_ab_col"))) FUS <- guess_ab_col(tbl, "FUS", verbose = verbose)
if (identical(GEN, as.name("guess_ab_col"))) GEN <- guess_ab_col(tbl, "GEN", verbose = verbose)
if (identical(IPM, as.name("guess_ab_col"))) IPM <- guess_ab_col(tbl, "IPM", verbose = verbose)
if (identical(KAN, as.name("guess_ab_col"))) KAN <- guess_ab_col(tbl, "KAN", verbose = verbose)
if (identical(LVX, as.name("guess_ab_col"))) LVX <- guess_ab_col(tbl, "LVX", verbose = verbose)
if (identical(LIN, as.name("guess_ab_col"))) LIN <- guess_ab_col(tbl, "LIN", verbose = verbose)
if (identical(LNZ, as.name("guess_ab_col"))) LNZ <- guess_ab_col(tbl, "LNZ", verbose = verbose)
if (identical(MEM, as.name("guess_ab_col"))) MEM <- guess_ab_col(tbl, "MEM", verbose = verbose)
if (identical(MTR, as.name("guess_ab_col"))) MTR <- guess_ab_col(tbl, "MTR", verbose = verbose)
if (identical(MEZ, as.name("guess_ab_col"))) MEZ <- guess_ab_col(tbl, "MEZ", verbose = verbose)
if (identical(MNO, as.name("guess_ab_col"))) MNO <- guess_ab_col(tbl, "MNO", verbose = verbose)
if (identical(MFX, as.name("guess_ab_col"))) MFX <- guess_ab_col(tbl, "MFX", verbose = verbose)
if (identical(NAL, as.name("guess_ab_col"))) NAL <- guess_ab_col(tbl, "NAL", verbose = verbose)
if (identical(NEO, as.name("guess_ab_col"))) NEO <- guess_ab_col(tbl, "NEO", verbose = verbose)
if (identical(NET, as.name("guess_ab_col"))) NET <- guess_ab_col(tbl, "NET", verbose = verbose)
if (identical(NIT, as.name("guess_ab_col"))) NIT <- guess_ab_col(tbl, "NIT", verbose = verbose)
if (identical(NOR, as.name("guess_ab_col"))) NOR <- guess_ab_col(tbl, "NOR", verbose = verbose)
if (identical(NOV, as.name("guess_ab_col"))) NOV <- guess_ab_col(tbl, "NOV", verbose = verbose)
if (identical(OFX, as.name("guess_ab_col"))) OFX <- guess_ab_col(tbl, "OFX", verbose = verbose)
if (identical(OXA, as.name("guess_ab_col"))) OXA <- guess_ab_col(tbl, "OXA", verbose = verbose)
if (identical(PEN, as.name("guess_ab_col"))) PEN <- guess_ab_col(tbl, "PEN", verbose = verbose)
if (identical(PIP, as.name("guess_ab_col"))) PIP <- guess_ab_col(tbl, "PIP", verbose = verbose)
if (identical(TZP, as.name("guess_ab_col"))) TZP <- guess_ab_col(tbl, "TZP", verbose = verbose)
if (identical(PLB, as.name("guess_ab_col"))) PLB <- guess_ab_col(tbl, "PLB", verbose = verbose)
if (identical(PRI, as.name("guess_ab_col"))) PRI <- guess_ab_col(tbl, "PRI", verbose = verbose)
if (identical(QDA, as.name("guess_ab_col"))) QDA <- guess_ab_col(tbl, "QDA", verbose = verbose)
if (identical(RIF, as.name("guess_ab_col"))) RIF <- guess_ab_col(tbl, "RIF", verbose = verbose)
if (identical(RXT, as.name("guess_ab_col"))) RXT <- guess_ab_col(tbl, "RXT", verbose = verbose)
if (identical(SIS, as.name("guess_ab_col"))) SIS <- guess_ab_col(tbl, "SIS", verbose = verbose)
if (identical(TEC, as.name("guess_ab_col"))) TEC <- guess_ab_col(tbl, "TEC", verbose = verbose)
if (identical(TCY, as.name("guess_ab_col"))) TCY <- guess_ab_col(tbl, "TCY", verbose = verbose)
if (identical(TIC, as.name("guess_ab_col"))) TIC <- guess_ab_col(tbl, "TIC", verbose = verbose)
if (identical(TGC, as.name("guess_ab_col"))) TGC <- guess_ab_col(tbl, "TGC", verbose = verbose)
if (identical(TOB, as.name("guess_ab_col"))) TOB <- guess_ab_col(tbl, "TOB", verbose = verbose)
if (identical(TMP, as.name("guess_ab_col"))) TMP <- guess_ab_col(tbl, "TMP", verbose = verbose)
if (identical(SXT, as.name("guess_ab_col"))) SXT <- guess_ab_col(tbl, "SXT", verbose = verbose)
if (identical(VAN, as.name("guess_ab_col"))) VAN <- guess_ab_col(tbl, "VAN", verbose = verbose)
columns_available <- c(AMC = AMC, AMK = AMK, AMX = AMX, AMP = AMP, AZM = AZM,
AZL = AZL, ATM = ATM, RID = RID, FEP = FEP, CTX = CTX,
FOX = FOX, CED = CED, CAZ = CAZ, CRO = CRO, CXM = CXM,
CHL = CHL, CIP = CIP, CLR = CLR, CLI = CLI, FLC = FLC,
COL = COL, CZO = CZO, DAP = DAP, DOX = DOX, ETP = ETP,
ERY = ERY, FOS = FOS, FUS = FUS, GEN = GEN, IPM = IPM,
KAN = KAN, LVX = LVX, LIN = LIN, LNZ = LNZ, MEM = MEM,
MTR = MTR, MEZ = MEZ, MNO = MNO, MFX = MFX, NAL = NAL,
NEO = NEO, NET = NET, NIT = NIT, NOR = NOR, NOV = NOV,
OFX = OFX, OXA = OXA, PEN = PEN, PIP = PIP, TZP = TZP,
PLB = PLB, PRI = PRI, QDA = QDA, RIF = RIF, RXT = RXT,
SIS = SIS, TEC = TEC, TCY = TCY, TIC = TIC, TGC = TGC,
TOB = TOB, TMP = TMP, SXT = SXT, VAN = VAN)
verbose = FALSE) {
df_trans <- data.frame(colnames = colnames(x),
abcode = suppressWarnings(as.ab(colnames(x))))
df_trans <- df_trans[!is.na(df_trans$abcode),]
x <- as.character(df_trans$colnames)
names(x) <- df_trans$abcode
# sort on name
x <- x[sort(names(x))]
if (verbose == TRUE) {
for (i in 1:length(x)) {
message(blue(paste0("NOTE: Using column `", bold(x[i]), "` as input for ", names(x)[i],
" (", ab_name(names(x)[i], language = "en", tolower = TRUE), ").")))
}
}
if (!is.null(hard_dependencies)) {
if (!all(hard_dependencies %in% names(columns_available[!is.na(columns_available)]))) {
if (!all(hard_dependencies %in% names(x))) {
# missing a hard dependency will return NA and consequently the data will not be analysed
missing <- hard_dependencies[!hard_dependencies %in% names(columns_available[!is.na(columns_available)])]
missing <- hard_dependencies[!hard_dependencies %in% names(x)]
generate_warning_abs_missing(missing, any = FALSE)
return(NA)
}
}
if (!is.null(soft_dependencies)) {
if (!all(soft_dependencies %in% names(columns_available[!is.na(columns_available)]))) {
if (!all(soft_dependencies %in% names(x))) {
# missing a soft dependency may lower the reliability
missing <- soft_dependencies[!soft_dependencies %in% names(columns_available[!is.na(columns_available)])]
missing <- soft_dependencies[!soft_dependencies %in% names(x)]
missing <- paste0("`", missing, "` (", ab_name(missing, tolower = TRUE), ")")
warning('Reliability might be improved if these antimicrobial results would be available too: ', paste(missing, collapse = ", "),
immediate. = TRUE,
call. = FALSE)
}
}
#deps <- c(soft_dependencies, hard_dependencies)
#if (length(deps) > 0) {
# columns_available[names(columns_available) %in% deps]
#} else {
columns_available
#}
x
}
generate_warning_abs_missing <- function(missing, any = FALSE) {

21
R/mo.R
View File

@ -111,18 +111,18 @@
#' Use \code{mo_renamed()} to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.
#'
#' \strong{Microbial prevalence of pathogens in humans} \cr
#' The intelligent rules takes into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
#' The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
#' \itemize{
#' \item{1 (most prevalent): class is Gammaproteobacteria \strong{or} genus is one of: \emph{Enterococcus}, \emph{Staphylococcus}, \emph{Streptococcus}.}
#' \item{2: phylum is one of: Proteobacteria, Firmicutes, Actinobacteria, Sarcomastigophora \strong{or} genus is one of: \emph{Aspergillus}, \emph{Bacteroides}, \emph{Candida}, \emph{Capnocytophaga}, \emph{Chryseobacterium}, \emph{Cryptococcus}, \emph{Elisabethkingia}, \emph{Flavobacterium}, \emph{Fusobacterium}, \emph{Giardia}, \emph{Leptotrichia}, \emph{Mycoplasma}, \emph{Prevotella}, \emph{Rhodotorula}, \emph{Treponema}, \emph{Trichophyton}, \emph{Ureaplasma}.}
#' \item{3 (least prevalent): all others.}
#' }
#'
#' Group 1 contains all common Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
#' Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
#'
#' Group 2 probably contains all other microbial pathogens ever found in humans.
#' Group 2 probably contains less microbial pathogens; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.
#' @inheritSection catalogue_of_life Catalogue of Life
# (source as a section, so it can be inherited by other man pages)
# (source as a section here, so it can be inherited by other man pages:)
#' @section Source:
#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870926. \url{https://dx.doi.org/10.1128/CMR.00109-13}
#'
@ -349,13 +349,24 @@ exec_as.mo <- function(x,
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
if (any(x %like% "^[BFP]_[A-Z]{3,7}") & !all(x %in% microorganisms$mo)) {
x <- gsub("^F_CANDD_GLB$", "F_CANDD_GLA", x) # specific old code for C. glabrata
leftpart <- gsub("^([BFP]_[A-Z]{3,7}).*", "\\1", x)
if (any(leftpart %in% names(mo_codes_v0.5.0))) {
rightpart <- gsub("^[BFP]_[A-Z]{3,7}(.*)", "\\1", x)
leftpart <- mo_codes_v0.5.0[leftpart]
x[!is.na(leftpart)] <- paste0(leftpart[!is.na(leftpart)], rightpart[!is.na(leftpart)])
}
# now check if some are still old
still_old <- x[x %in% names(mo_codes_v0.5.0)]
if (length(still_old) > 0) {
x[x %in% names(mo_codes_v0.5.0)] <- data.frame(old = still_old, stringsAsFactors = FALSE) %>%
left_join(data.frame(old = names(mo_codes_v0.5.0),
new = mo_codes_v0.5.0,
stringsAsFactors = FALSE), by = "old") %>%
# if they couldn't be found, replace them with the old ones again,
# so they will throw a warning in the end
mutate(new = ifelse(is.na(new), old, new)) %>%
pull(new)
}
}
# defined df to check for

View File

@ -23,13 +23,13 @@
#'
#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}.
#' @param x any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}}
#' @param property one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}
#' @param property one of the column names of the \code{\link{microorganisms}} data set or \code{"shortname"}
#' @param language language of the returned text, defaults to system language (see \code{\link{get_locale}}) and can also be set with \code{\link{getOption}("AMR_locale")}. Use \code{language = NULL} or \code{language = ""} to prevent translation.
#' @param ... other parameters passed on to \code{\link{as.mo}}
#' @param open browse the URL using \code{\link[utils]{browseURL}()}
#' @details All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for \code{mo_ref}, \code{mo_authors} and \code{mo_year}. This leads to the following results:
#' \itemize{
#' \item{\code{mo_fullname("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)}
#' \item{\code{mo_name("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)}
#' \item{\code{mo_ref("Chlamydia psittaci")} will return \code{"Page, 1968"} (with a warning about the renaming)}
#' \item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)}
#' }
@ -91,9 +91,10 @@
#'
#'
#' # Known subspecies
#' mo_name("doylei") # "Campylobacter jejuni doylei"
#' mo_genus("doylei") # "Campylobacter"
#' mo_species("doylei") # "jejuni"
#' mo_fullname("doylei") # "Campylobacter jejuni doylei"
#' mo_subspecies("doylei") # "doylei"
#'
#' mo_fullname("K. pneu rh") # "Klebsiella pneumoniae rhinoscleromatis"
#' mo_shortname("K. pneu rh") # "K. pneumoniae"
@ -139,8 +140,7 @@ mo_name <- function(x, language = get_locale(), ...) {
#' @rdname mo_property
#' @export
mo_fullname <- function(x, language = get_locale(), ...) {
x <- mo_validate(x = x, property = "fullname", ...)
t(x, language = language)
t(mo_validate(x = x, property = "fullname", ...), language = language)
}
#' @rdname mo_property

11
R/zzz.R
View File

@ -104,7 +104,7 @@ make_DT <- function() {
make_trans_tbl <- function() {
# conversion of old MO codes from v0.5.0 (ITIS) to later versions (Catalogue of Life)
c(B_ACHRMB = "B_ACHRM", B_ANNMA = "B_ACTNS", B_ACLLS = "B_ALCYC",
B_AHNGM = "B_ARCHN", B_ARMTM = "B_ARMTMN", B_ARTHRS = "B_ARTHR",
B_AHNGM = "B_ARCHN", B_ARMTM = "B_ARMTMN", B_ARTHR = "B_ARTHRB", B_ARTHRS = "B_ARTHR",
B_APHLS = "B_AZRHZP", B_BRCHA = "B_BRCHY", B_BCTRM = "B_BRVBCT",
B_CLRBCT = "B_CLRBC", B_CTRDM = "B_CLSTR", B_CPRMM = "B_CYLND",
B_DLCLN = "B_DPLCL", B_DMCLM = "B_DSLFT", B_DSLFVB = "B_DSLFV",
@ -288,5 +288,12 @@ make_trans_tbl <- function() {
F_PRCHN = "P_PRCHN", F_PRMBD = "P_PRMBD", F_PRTPH = "P_PRTPH",
F_PSRNA = "P_PSRNA", F_PYSRM = "P_PYSRM", F_RTCLR = "P_RTCLR",
F_STMNT = "P_STMNT", F_SYMPH = "P_SYMPH", F_TRBRK = "P_TRBRK",
F_TRICH = "P_TRICH", F_TUBFR = "P_TUBFR")
F_TRICH = "P_TRICH", F_TUBFR = "P_TUBFR",
B_GRDNR = "B_GRLLA", B_SGMNS = "B_SNGMNS", B_TCLLS = "B_THBCL",
F_CCCCS = "F_CRYPT",
# renamings of old genus + species
F_CANDD_GLB = "F_CANDD_GLA", F_CANDD_KRU = "F_ISSTC_ORI",
F_CANDD_LUS = "F_CLVSP_LUS", B_STRPT_TUS = "B_STRPT",
B_PRVTL_OLA = "B_PRVTL_OULO", B_FSBCT_RUM = "B_FSBCT",
B_CRYNB_EYI = "B_CRYNB_FRE", B_OLGLL_LIS = "B_OLGLL_URE")
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 67 KiB

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

@ -192,7 +192,7 @@
<h1>How to apply EUCAST rules</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>EUCAST.Rmd</code></div>

View File

@ -192,7 +192,7 @@
<h1>How to import data from SPSS / SAS / Stata</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>SPSS.Rmd</code></div>
@ -244,38 +244,38 @@
<p>To demonstrate the first point:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb1-1" title="1"><span class="co"># not all values are valid MIC values:</span></a>
<a class="sourceLine" id="cb1-2" title="2"><span class="kw"><a href="../reference/as.mic.html">as.mic</a></span>(<span class="fl">0.125</span>)</a>
<a class="sourceLine" id="cb1-3" title="3"><span class="co">#&gt; Class 'mic'</span></a>
<a class="sourceLine" id="cb1-4" title="4"><span class="co">#&gt; [1] 0.125</span></a>
<a class="sourceLine" id="cb1-3" title="3"><span class="co"># Class 'mic'</span></a>
<a class="sourceLine" id="cb1-4" title="4"><span class="co"># [1] 0.125</span></a>
<a class="sourceLine" id="cb1-5" title="5"><span class="kw"><a href="../reference/as.mic.html">as.mic</a></span>(<span class="st">"testvalue"</span>)</a>
<a class="sourceLine" id="cb1-6" title="6"><span class="co">#&gt; Class 'mic'</span></a>
<a class="sourceLine" id="cb1-7" title="7"><span class="co">#&gt; [1] &lt;NA&gt;</span></a>
<a class="sourceLine" id="cb1-6" title="6"><span class="co"># Class 'mic'</span></a>
<a class="sourceLine" id="cb1-7" title="7"><span class="co"># [1] &lt;NA&gt;</span></a>
<a class="sourceLine" id="cb1-8" title="8"></a>
<a class="sourceLine" id="cb1-9" title="9"><span class="co"># the Gram stain is avaiable for all bacteria:</span></a>
<a class="sourceLine" id="cb1-10" title="10"><span class="kw"><a href="../reference/mo_property.html">mo_gramstain</a></span>(<span class="st">"E. coli"</span>)</a>
<a class="sourceLine" id="cb1-11" title="11"><span class="co">#&gt; [1] "Gram negative"</span></a>
<a class="sourceLine" id="cb1-11" title="11"><span class="co"># [1] "Gram negative"</span></a>
<a class="sourceLine" id="cb1-12" title="12"></a>
<a class="sourceLine" id="cb1-13" title="13"><span class="co"># Klebsiella is intrinsic resistant to amoxicllin, according to EUCAST:</span></a>
<a class="sourceLine" id="cb1-14" title="14">klebsiella_test &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/data.frame">data.frame</a></span>(<span class="dt">mo =</span> <span class="st">"klebsiella"</span>, </a>
<a class="sourceLine" id="cb1-15" title="15"> <span class="dt">amox =</span> <span class="st">"S"</span>,</a>
<a class="sourceLine" id="cb1-16" title="16"> <span class="dt">stringsAsFactors =</span> <span class="ot">FALSE</span>)</a>
<a class="sourceLine" id="cb1-17" title="17">klebsiella_test</a>
<a class="sourceLine" id="cb1-18" title="18"><span class="co">#&gt; mo amox</span></a>
<a class="sourceLine" id="cb1-19" title="19"><span class="co">#&gt; 1 klebsiella S</span></a>
<a class="sourceLine" id="cb1-18" title="18"><span class="co"># mo amox</span></a>
<a class="sourceLine" id="cb1-19" title="19"><span class="co"># 1 klebsiella S</span></a>
<a class="sourceLine" id="cb1-20" title="20"><span class="kw"><a href="../reference/eucast_rules.html">eucast_rules</a></span>(klebsiella_test, <span class="dt">info =</span> <span class="ot">FALSE</span>)</a>
<a class="sourceLine" id="cb1-21" title="21"><span class="co">#&gt; mo amox</span></a>
<a class="sourceLine" id="cb1-22" title="22"><span class="co">#&gt; 1 klebsiella R</span></a>
<a class="sourceLine" id="cb1-21" title="21"><span class="co"># mo amox</span></a>
<a class="sourceLine" id="cb1-22" title="22"><span class="co"># 1 klebsiella R</span></a>
<a class="sourceLine" id="cb1-23" title="23"></a>
<a class="sourceLine" id="cb1-24" title="24"><span class="co"># hundreds of trade names can be translated to a name, trade name or an ATC code:</span></a>
<a class="sourceLine" id="cb1-25" title="25"><span class="kw"><a href="../reference/ab_property.html">ab_name</a></span>(<span class="st">"floxapen"</span>)</a>
<a class="sourceLine" id="cb1-26" title="26"><span class="co">#&gt; [1] "Flucloxacillin"</span></a>
<a class="sourceLine" id="cb1-26" title="26"><span class="co"># [1] "Flucloxacillin"</span></a>
<a class="sourceLine" id="cb1-27" title="27"><span class="kw"><a href="../reference/ab_property.html">ab_tradenames</a></span>(<span class="st">"floxapen"</span>)</a>
<a class="sourceLine" id="cb1-28" title="28"><span class="co">#&gt; [1] "Floxacillin" "FLOXACILLIN" "Floxapen" </span></a>
<a class="sourceLine" id="cb1-29" title="29"><span class="co">#&gt; [4] "Floxapen sodium salt" "Fluclox" "Flucloxacilina" </span></a>
<a class="sourceLine" id="cb1-30" title="30"><span class="co">#&gt; [7] "Flucloxacillin" "Flucloxacilline" "Flucloxacillinum" </span></a>
<a class="sourceLine" id="cb1-31" title="31"><span class="co">#&gt; [10] "Fluorochloroxacillin"</span></a>
<a class="sourceLine" id="cb1-28" title="28"><span class="co"># [1] "Floxacillin" "FLOXACILLIN" "Floxapen" </span></a>
<a class="sourceLine" id="cb1-29" title="29"><span class="co"># [4] "Floxapen sodium salt" "Fluclox" "Flucloxacilina" </span></a>
<a class="sourceLine" id="cb1-30" title="30"><span class="co"># [7] "Flucloxacillin" "Flucloxacilline" "Flucloxacillinum" </span></a>
<a class="sourceLine" id="cb1-31" title="31"><span class="co"># [10] "Fluorochloroxacillin"</span></a>
<a class="sourceLine" id="cb1-32" title="32"><span class="kw"><a href="../reference/ab_property.html">ab_atc</a></span>(<span class="st">"floxapen"</span>)</a>
<a class="sourceLine" id="cb1-33" title="33"><span class="co">#&gt; Class 'atc'</span></a>
<a class="sourceLine" id="cb1-34" title="34"><span class="co">#&gt; [1] J01CF05</span></a></code></pre></div>
<a class="sourceLine" id="cb1-33" title="33"><span class="co"># Class 'atc'</span></a>
<a class="sourceLine" id="cb1-34" title="34"><span class="co"># [1] J01CF05</span></a></code></pre></div>
</div>
<div id="import-data-from-spsssasstata" class="section level2">
<h2 class="hasAnchor">

View File

@ -192,7 +192,7 @@
<h1>How to get properties of an antibiotic</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>ab_property.Rmd</code></div>

View File

@ -192,7 +192,7 @@
<h1>Benchmarks</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>benchmarks.Rmd</code></div>
@ -216,15 +216,15 @@
<a class="sourceLine" id="cb2-7" title="7"> <span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a>
<a class="sourceLine" id="cb2-8" title="8"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb2-9" title="9"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(S.aureus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a>
<a class="sourceLine" id="cb2-10" title="10"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb2-11" title="11"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb2-12" title="12"><span class="co">#&gt; as.mo("sau") 17.0 18 27 18 18 65.0 10</span></a>
<a class="sourceLine" id="cb2-13" title="13"><span class="co">#&gt; as.mo("stau") 47.0 48 57 48 48 92.0 10</span></a>
<a class="sourceLine" id="cb2-14" title="14"><span class="co">#&gt; as.mo("staaur") 17.0 18 24 18 18 81.0 10</span></a>
<a class="sourceLine" id="cb2-15" title="15"><span class="co">#&gt; as.mo("STAAUR") 18.0 18 31 18 62 62.0 10</span></a>
<a class="sourceLine" id="cb2-16" title="16"><span class="co">#&gt; as.mo("S. aureus") 28.0 28 28 28 29 29.0 10</span></a>
<a class="sourceLine" id="cb2-17" title="17"><span class="co">#&gt; as.mo("S. aureus") 28.0 28 42 28 31 110.0 10</span></a>
<a class="sourceLine" id="cb2-18" title="18"><span class="co">#&gt; as.mo("Staphylococcus aureus") 7.9 8 8 8 8 8.6 10</span></a></code></pre></div>
<a class="sourceLine" id="cb2-10" title="10"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb2-11" title="11"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb2-12" title="12"><span class="co"># as.mo("sau") 17 17 22.0 18 18.0 66 10</span></a>
<a class="sourceLine" id="cb2-13" title="13"><span class="co"># as.mo("stau") 47 48 52.0 48 48.0 92 10</span></a>
<a class="sourceLine" id="cb2-14" title="14"><span class="co"># as.mo("staaur") 18 18 35.0 18 62.0 66 10</span></a>
<a class="sourceLine" id="cb2-15" title="15"><span class="co"># as.mo("STAAUR") 17 18 25.0 18 18.0 54 10</span></a>
<a class="sourceLine" id="cb2-16" title="16"><span class="co"># as.mo("S. aureus") 28 28 41.0 28 72.0 73 10</span></a>
<a class="sourceLine" id="cb2-17" title="17"><span class="co"># as.mo("S. aureus") 28 28 41.0 28 28.0 120 10</span></a>
<a class="sourceLine" id="cb2-18" title="18"><span class="co"># as.mo("Staphylococcus aureus") 8 8 9.3 8 8.1 20 10</span></a></code></pre></div>
<p>In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.</p>
<p>To achieve this speed, the <code>as.mo</code> function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of <em>Thermus islandicus</em> (<code>B_THERMS_ISL</code>), a bug probably never found before in humans:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb3-1" title="1">T.islandicus &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(<span class="st">"theisl"</span>),</a>
@ -234,14 +234,14 @@
<a class="sourceLine" id="cb3-5" title="5"> <span class="kw"><a href="../reference/as.mo.html">as.mo</a></span>(<span class="st">"Thermus islandicus"</span>),</a>
<a class="sourceLine" id="cb3-6" title="6"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb3-7" title="7"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(T.islandicus, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">2</span>)</a>
<a class="sourceLine" id="cb3-8" title="8"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb3-9" title="9"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb3-10" title="10"><span class="co">#&gt; as.mo("theisl") 470 470 500 500 520 520 10</span></a>
<a class="sourceLine" id="cb3-11" title="11"><span class="co">#&gt; as.mo("THEISL") 470 470 500 470 520 580 10</span></a>
<a class="sourceLine" id="cb3-12" title="12"><span class="co">#&gt; as.mo("T. islandicus") 76 76 86 76 77 130 10</span></a>
<a class="sourceLine" id="cb3-13" title="13"><span class="co">#&gt; as.mo("T. islandicus") 75 76 82 76 78 120 10</span></a>
<a class="sourceLine" id="cb3-14" title="14"><span class="co">#&gt; as.mo("Thermus islandicus") 74 74 110 120 120 150 10</span></a></code></pre></div>
<p>That takes 8.2 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Thermus islandicus</em>) are almost fast - these are the most probable input from most data sets.</p>
<a class="sourceLine" id="cb3-8" title="8"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb3-9" title="9"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb3-10" title="10"><span class="co"># as.mo("theisl") 470 470 500 510 520 530 10</span></a>
<a class="sourceLine" id="cb3-11" title="11"><span class="co"># as.mo("THEISL") 470 470 490 470 520 520 10</span></a>
<a class="sourceLine" id="cb3-12" title="12"><span class="co"># as.mo("T. islandicus") 74 74 87 74 120 120 10</span></a>
<a class="sourceLine" id="cb3-13" title="13"><span class="co"># as.mo("T. islandicus") 74 74 89 74 120 140 10</span></a>
<a class="sourceLine" id="cb3-14" title="14"><span class="co"># as.mo("Thermus islandicus") 73 73 93 74 120 120 10</span></a></code></pre></div>
<p>That takes 7.8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like <em>Thermus islandicus</em>) are almost fast - these are the most probable input from most data sets.</p>
<p>In the figure below, we compare <em>Escherichia coli</em> (which is very common) with <em>Prevotella brevis</em> (which is moderately common) and with <em>Thermus islandicus</em> (which is very uncommon):</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/graphics/topics/par">par</a></span>(<span class="dt">mar =</span> <span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/c">c</a></span>(<span class="dv">5</span>, <span class="dv">16</span>, <span class="dv">4</span>, <span class="dv">2</span>)) <span class="co"># set more space for left margin text (16)</span></a>
<a class="sourceLine" id="cb4-2" title="2"></a>
@ -275,20 +275,20 @@
<a class="sourceLine" id="cb5-12" title="12"> </a>
<a class="sourceLine" id="cb5-13" title="13"><span class="co"># got indeed 50 times 10,000 = half a million?</span></a>
<a class="sourceLine" id="cb5-14" title="14"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/length">length</a></span>(x)</a>
<a class="sourceLine" id="cb5-15" title="15"><span class="co">#&gt; [1] 500000</span></a>
<a class="sourceLine" id="cb5-15" title="15"><span class="co"># [1] 500000</span></a>
<a class="sourceLine" id="cb5-16" title="16"></a>
<a class="sourceLine" id="cb5-17" title="17"><span class="co"># and how many unique values do we have?</span></a>
<a class="sourceLine" id="cb5-18" title="18"><span class="kw"><a href="https://dplyr.tidyverse.org/reference/n_distinct.html">n_distinct</a></span>(x)</a>
<a class="sourceLine" id="cb5-19" title="19"><span class="co">#&gt; [1] 50</span></a>
<a class="sourceLine" id="cb5-19" title="19"><span class="co"># [1] 50</span></a>
<a class="sourceLine" id="cb5-20" title="20"></a>
<a class="sourceLine" id="cb5-21" title="21"><span class="co"># now let's see:</span></a>
<a class="sourceLine" id="cb5-22" title="22">run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(x),</a>
<a class="sourceLine" id="cb5-23" title="23"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb5-24" title="24"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb5-25" title="25"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb5-26" title="26"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb5-27" title="27"><span class="co">#&gt; mo_fullname(x) 823 826 864 833 869 1080 10</span></a></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.83 seconds (832 ms). You only lose time on your unique input values.</p>
<a class="sourceLine" id="cb5-25" title="25"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb5-26" title="26"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb5-27" title="27"><span class="co"># mo_fullname(x) 639 681 745 720 770 910 10</span></a></code></pre></div>
<p>So transforming 500,000 values (!!) of 50 unique values only takes 0.72 seconds (719 ms). You only lose time on your unique input values.</p>
</div>
<div id="precalculated-results" class="section level3">
<h3 class="hasAnchor">
@ -299,12 +299,12 @@
<a class="sourceLine" id="cb6-3" title="3"> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a>
<a class="sourceLine" id="cb6-4" title="4"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb6-5" title="5"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb6-6" title="6"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb6-7" title="7"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb6-8" title="8"><span class="co">#&gt; A 12.90 13.30 13.7 13.70 14.10 14.5 10</span></a>
<a class="sourceLine" id="cb6-9" title="9"><span class="co">#&gt; B 25.10 25.70 26.2 25.90 26.60 27.8 10</span></a>
<a class="sourceLine" id="cb6-10" title="10"><span class="co">#&gt; C 1.46 1.68 6.2 1.84 1.89 46.1 10</span></a></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_fullname("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0018 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<a class="sourceLine" id="cb6-6" title="6"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb6-7" title="7"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb6-8" title="8"><span class="co"># A 12.90 13.2 13.40 13.40 13.80 14.00 10</span></a>
<a class="sourceLine" id="cb6-9" title="9"><span class="co"># B 25.30 25.7 31.00 27.10 27.50 70.10 10</span></a>
<a class="sourceLine" id="cb6-10" title="10"><span class="co"># C 1.38 1.6 1.69 1.69 1.72 1.97 10</span></a></code></pre></div>
<p>So going from <code><a href="../reference/mo_property.html">mo_fullname("Staphylococcus aureus")</a></code> to <code>"Staphylococcus aureus"</code> takes 0.0017 seconds - it doesnt even start calculating <em>if the result would be the same as the expected resulting value</em>. That goes for all helper functions:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb7-1" title="1">run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="dt">A =</span> <span class="kw"><a href="../reference/mo_property.html">mo_species</a></span>(<span class="st">"aureus"</span>),</a>
<a class="sourceLine" id="cb7-2" title="2"> <span class="dt">B =</span> <span class="kw"><a href="../reference/mo_property.html">mo_genus</a></span>(<span class="st">"Staphylococcus"</span>),</a>
<a class="sourceLine" id="cb7-3" title="3"> <span class="dt">C =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"Staphylococcus aureus"</span>),</a>
@ -315,16 +315,16 @@
<a class="sourceLine" id="cb7-8" title="8"> <span class="dt">H =</span> <span class="kw"><a href="../reference/mo_property.html">mo_kingdom</a></span>(<span class="st">"Bacteria"</span>),</a>
<a class="sourceLine" id="cb7-9" title="9"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb7-10" title="10"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">3</span>)</a>
<a class="sourceLine" id="cb7-11" title="11"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb7-12" title="12"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb7-13" title="13"><span class="co">#&gt; A 0.354 0.404 0.470 0.486 0.527 0.534 10</span></a>
<a class="sourceLine" id="cb7-14" title="14"><span class="co">#&gt; B 0.413 0.529 0.581 0.538 0.642 0.917 10</span></a>
<a class="sourceLine" id="cb7-15" title="15"><span class="co">#&gt; C 1.510 1.720 1.780 1.810 1.870 1.900 10</span></a>
<a class="sourceLine" id="cb7-16" title="16"><span class="co">#&gt; D 0.443 0.522 0.547 0.538 0.595 0.664 10</span></a>
<a class="sourceLine" id="cb7-17" title="17"><span class="co">#&gt; E 0.371 0.431 0.474 0.467 0.515 0.588 10</span></a>
<a class="sourceLine" id="cb7-18" title="18"><span class="co">#&gt; F 0.372 0.427 0.453 0.453 0.494 0.519 10</span></a>
<a class="sourceLine" id="cb7-19" title="19"><span class="co">#&gt; G 0.400 0.430 0.456 0.446 0.488 0.522 10</span></a>
<a class="sourceLine" id="cb7-20" title="20"><span class="co">#&gt; H 0.194 0.276 0.301 0.301 0.332 0.395 10</span></a></code></pre></div>
<a class="sourceLine" id="cb7-11" title="11"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb7-12" title="12"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb7-13" title="13"><span class="co"># A 0.392 0.485 0.577 0.580 0.628 0.808 10</span></a>
<a class="sourceLine" id="cb7-14" title="14"><span class="co"># B 0.444 0.521 0.566 0.567 0.609 0.710 10</span></a>
<a class="sourceLine" id="cb7-15" title="15"><span class="co"># C 1.380 1.680 1.750 1.760 1.820 2.160 10</span></a>
<a class="sourceLine" id="cb7-16" title="16"><span class="co"># D 0.422 0.522 0.564 0.553 0.640 0.644 10</span></a>
<a class="sourceLine" id="cb7-17" title="17"><span class="co"># E 0.362 0.443 0.520 0.543 0.595 0.684 10</span></a>
<a class="sourceLine" id="cb7-18" title="18"><span class="co"># F 0.347 0.432 0.515 0.495 0.585 0.728 10</span></a>
<a class="sourceLine" id="cb7-19" title="19"><span class="co"># G 0.377 0.417 0.479 0.463 0.495 0.681 10</span></a>
<a class="sourceLine" id="cb7-20" title="20"><span class="co"># H 0.263 0.264 0.310 0.282 0.350 0.436 10</span></a></code></pre></div>
<p>Of course, when running <code><a href="../reference/mo_property.html">mo_phylum("Firmicutes")</a></code> the function has zero knowledge about the actual microorganism, namely <em>S. aureus</em>. But since the result would be <code>"Firmicutes"</code> too, there is no point in calculating the result. And because this package knows all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.</p>
</div>
<div id="results-in-other-languages" class="section level3">
@ -332,13 +332,13 @@
<a href="#results-in-other-languages" class="anchor"></a>Results in other languages</h3>
<p>When the system language is non-English and supported by this <code>AMR</code> package, some functions will have a translated result. This almost doest take extra time:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb8-1" title="1"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"en"</span>) <span class="co"># or just mo_fullname("CoNS") on an English system</span></a>
<a class="sourceLine" id="cb8-2" title="2"><span class="co">#&gt; [1] "Coagulase-negative Staphylococcus (CoNS)"</span></a>
<a class="sourceLine" id="cb8-2" title="2"><span class="co"># [1] "Coagulase-negative Staphylococcus (CoNS)"</span></a>
<a class="sourceLine" id="cb8-3" title="3"></a>
<a class="sourceLine" id="cb8-4" title="4"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"es"</span>) <span class="co"># or just mo_fullname("CoNS") on a Spanish system</span></a>
<a class="sourceLine" id="cb8-5" title="5"><span class="co">#&gt; [1] "Staphylococcus coagulasa negativo (SCN)"</span></a>
<a class="sourceLine" id="cb8-5" title="5"><span class="co"># [1] "Staphylococcus coagulasa negativo (SCN)"</span></a>
<a class="sourceLine" id="cb8-6" title="6"></a>
<a class="sourceLine" id="cb8-7" title="7"><span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"nl"</span>) <span class="co"># or just mo_fullname("CoNS") on a Dutch system</span></a>
<a class="sourceLine" id="cb8-8" title="8"><span class="co">#&gt; [1] "Coagulase-negatieve Staphylococcus (CNS)"</span></a>
<a class="sourceLine" id="cb8-8" title="8"><span class="co"># [1] "Coagulase-negatieve Staphylococcus (CNS)"</span></a>
<a class="sourceLine" id="cb8-9" title="9"></a>
<a class="sourceLine" id="cb8-10" title="10">run_it &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/microbenchmark/topics/microbenchmark">microbenchmark</a></span>(<span class="dt">en =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"en"</span>),</a>
<a class="sourceLine" id="cb8-11" title="11"> <span class="dt">de =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"de"</span>),</a>
@ -349,15 +349,15 @@
<a class="sourceLine" id="cb8-16" title="16"> <span class="dt">pt =</span> <span class="kw"><a href="../reference/mo_property.html">mo_fullname</a></span>(<span class="st">"CoNS"</span>, <span class="dt">language =</span> <span class="st">"pt"</span>),</a>
<a class="sourceLine" id="cb8-17" title="17"> <span class="dt">times =</span> <span class="dv">10</span>)</a>
<a class="sourceLine" id="cb8-18" title="18"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/print">print</a></span>(run_it, <span class="dt">unit =</span> <span class="st">"ms"</span>, <span class="dt">signif =</span> <span class="dv">4</span>)</a>
<a class="sourceLine" id="cb8-19" title="19"><span class="co">#&gt; Unit: milliseconds</span></a>
<a class="sourceLine" id="cb8-20" title="20"><span class="co">#&gt; expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb8-21" title="21"><span class="co">#&gt; en 18.18 18.30 18.50 18.46 18.63 18.88 10</span></a>
<a class="sourceLine" id="cb8-22" title="22"><span class="co">#&gt; de 23.02 23.10 23.38 23.27 23.57 24.23 10</span></a>
<a class="sourceLine" id="cb8-23" title="23"><span class="co">#&gt; nl 36.73 36.79 37.05 36.95 37.26 37.74 10</span></a>
<a class="sourceLine" id="cb8-24" title="24"><span class="co">#&gt; es 22.97 23.14 23.35 23.22 23.68 23.79 10</span></a>
<a class="sourceLine" id="cb8-25" title="25"><span class="co">#&gt; it 22.99 23.05 23.17 23.13 23.22 23.55 10</span></a>
<a class="sourceLine" id="cb8-26" title="26"><span class="co">#&gt; fr 23.12 23.14 32.29 23.25 23.90 68.31 10</span></a>
<a class="sourceLine" id="cb8-27" title="27"><span class="co">#&gt; pt 23.10 23.12 32.39 23.56 23.79 68.65 10</span></a></code></pre></div>
<a class="sourceLine" id="cb8-19" title="19"><span class="co"># Unit: milliseconds</span></a>
<a class="sourceLine" id="cb8-20" title="20"><span class="co"># expr min lq mean median uq max neval</span></a>
<a class="sourceLine" id="cb8-21" title="21"><span class="co"># en 18.15 18.27 22.87 18.31 18.68 63.44 10</span></a>
<a class="sourceLine" id="cb8-22" title="22"><span class="co"># de 23.03 23.14 27.80 23.20 23.53 68.60 10</span></a>
<a class="sourceLine" id="cb8-23" title="23"><span class="co"># nl 36.67 36.71 41.51 37.01 37.41 81.57 10</span></a>
<a class="sourceLine" id="cb8-24" title="24"><span class="co"># es 23.07 23.11 23.25 23.21 23.26 23.63 10</span></a>
<a class="sourceLine" id="cb8-25" title="25"><span class="co"># it 22.93 23.04 23.17 23.08 23.19 23.62 10</span></a>
<a class="sourceLine" id="cb8-26" title="26"><span class="co"># fr 23.13 23.21 27.71 23.30 23.31 67.70 10</span></a>
<a class="sourceLine" id="cb8-27" title="27"><span class="co"># pt 23.08 23.18 27.85 23.23 23.98 67.96 10</span></a></code></pre></div>
<p>Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.</p>
</div>
</div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 27 KiB

View File

@ -192,7 +192,7 @@
<h1>How to get properties of a microorganism</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>mo_property.Rmd</code></div>

View File

@ -192,7 +192,7 @@
<h1>How to predict antimicrobial resistance</h1>
<h4 class="author">Matthijs S. Berends</h4>
<h4 class="date">17 May 2019</h4>
<h4 class="date">20 May 2019</h4>
<div class="hidden name"><code>resistance_predict.Rmd</code></div>
@ -230,62 +230,62 @@
<a class="sourceLine" id="cb2-10" title="10"><span class="st"> </span><span class="kw"><a href="../reference/resistance_predict.html">resistance_predict</a></span>(<span class="dt">col_ab =</span> <span class="st">"TZP"</span>)</a></code></pre></div>
<p>The function will look for a date column itself if <code>col_date</code> is not set.</p>
<p>When running any of these commands, a summary of the regression model will be printed unless using <code><a href="../reference/resistance_predict.html">resistance_predict(..., info = FALSE)</a></code>.</p>
<pre><code>#&gt; NOTE: Using column `date` as input for `col_date`.
#&gt;
#&gt; Logistic regression model (logit) with binomial distribution
#&gt; ------------------------------------------------------------
#&gt;
#&gt; Call:
#&gt; glm(formula = df_matrix ~ year, family = binomial)
#&gt;
#&gt; Deviance Residuals:
#&gt; Min 1Q Median 3Q Max
#&gt; -2.6817 -1.4087 -0.5657 0.9672 3.5728
#&gt;
#&gt; Coefficients:
#&gt; Estimate Std. Error z value Pr(&gt;|z|)
#&gt; (Intercept) -224.39872 48.03354 -4.672 2.99e-06 ***
#&gt; year 0.11061 0.02388 4.633 3.61e-06 ***
#&gt; ---
#&gt; Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#&gt;
#&gt; (Dispersion parameter for binomial family taken to be 1)
#&gt;
#&gt; Null deviance: 61.512 on 14 degrees of freedom
#&gt; Residual deviance: 38.692 on 13 degrees of freedom
#&gt; AIC: 95.212
#&gt;
#&gt; Number of Fisher Scoring iterations: 4</code></pre>
<pre><code># NOTE: Using column `date` as input for `col_date`.
#
# Logistic regression model (logit) with binomial distribution
# ------------------------------------------------------------
#
# Call:
# glm(formula = df_matrix ~ year, family = binomial)
#
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -2.6817 -1.4087 -0.5657 0.9672 3.5728
#
# Coefficients:
# Estimate Std. Error z value Pr(&gt;|z|)
# (Intercept) -224.39872 48.03354 -4.672 2.99e-06 ***
# year 0.11061 0.02388 4.633 3.61e-06 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# (Dispersion parameter for binomial family taken to be 1)
#
# Null deviance: 61.512 on 14 degrees of freedom
# Residual deviance: 38.692 on 13 degrees of freedom
# AIC: 95.212
#
# Number of Fisher Scoring iterations: 4</code></pre>
<p>This text is only a printed summary - the actual result (output) of the function is a <code>data.frame</code> containing for each year: the number of observations, the actual observed resistance, the estimated resistance and the standard error below and above the estimation:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb4-1" title="1">predict_TZP</a>
<a class="sourceLine" id="cb4-2" title="2"><span class="co">#&gt; year value se_min se_max observations observed estimated</span></a>
<a class="sourceLine" id="cb4-3" title="3"><span class="co">#&gt; 1 2003 0.06250000 NA NA 32 0.06250000 0.05486389</span></a>
<a class="sourceLine" id="cb4-4" title="4"><span class="co">#&gt; 2 2004 0.08536585 NA NA 82 0.08536585 0.06089002</span></a>
<a class="sourceLine" id="cb4-5" title="5"><span class="co">#&gt; 3 2005 0.05000000 NA NA 60 0.05000000 0.06753075</span></a>
<a class="sourceLine" id="cb4-6" title="6"><span class="co">#&gt; 4 2006 0.05084746 NA NA 59 0.05084746 0.07483801</span></a>
<a class="sourceLine" id="cb4-7" title="7"><span class="co">#&gt; 5 2007 0.12121212 NA NA 66 0.12121212 0.08286570</span></a>
<a class="sourceLine" id="cb4-8" title="8"><span class="co">#&gt; 6 2008 0.04166667 NA NA 72 0.04166667 0.09166918</span></a>
<a class="sourceLine" id="cb4-9" title="9"><span class="co">#&gt; 7 2009 0.01639344 NA NA 61 0.01639344 0.10130461</span></a>
<a class="sourceLine" id="cb4-10" title="10"><span class="co">#&gt; 8 2010 0.05660377 NA NA 53 0.05660377 0.11182814</span></a>
<a class="sourceLine" id="cb4-11" title="11"><span class="co">#&gt; 9 2011 0.18279570 NA NA 93 0.18279570 0.12329488</span></a>
<a class="sourceLine" id="cb4-12" title="12"><span class="co">#&gt; 10 2012 0.30769231 NA NA 65 0.30769231 0.13575768</span></a>
<a class="sourceLine" id="cb4-13" title="13"><span class="co">#&gt; 11 2013 0.06896552 NA NA 58 0.06896552 0.14926576</span></a>
<a class="sourceLine" id="cb4-14" title="14"><span class="co">#&gt; 12 2014 0.10000000 NA NA 60 0.10000000 0.16386307</span></a>
<a class="sourceLine" id="cb4-15" title="15"><span class="co">#&gt; 13 2015 0.23636364 NA NA 55 0.23636364 0.17958657</span></a>
<a class="sourceLine" id="cb4-16" title="16"><span class="co">#&gt; 14 2016 0.22619048 NA NA 84 0.22619048 0.19646431</span></a>
<a class="sourceLine" id="cb4-17" title="17"><span class="co">#&gt; 15 2017 0.16279070 NA NA 86 0.16279070 0.21451350</span></a>
<a class="sourceLine" id="cb4-18" title="18"><span class="co">#&gt; 16 2018 0.23373852 0.2021578 0.2653193 NA NA 0.23373852</span></a>
<a class="sourceLine" id="cb4-19" title="19"><span class="co">#&gt; 17 2019 0.25412909 0.2168525 0.2914057 NA NA 0.25412909</span></a>
<a class="sourceLine" id="cb4-20" title="20"><span class="co">#&gt; 18 2020 0.27565854 0.2321869 0.3191302 NA NA 0.27565854</span></a>
<a class="sourceLine" id="cb4-21" title="21"><span class="co">#&gt; 19 2021 0.29828252 0.2481942 0.3483709 NA NA 0.29828252</span></a>
<a class="sourceLine" id="cb4-22" title="22"><span class="co">#&gt; 20 2022 0.32193804 0.2649008 0.3789753 NA NA 0.32193804</span></a>
<a class="sourceLine" id="cb4-23" title="23"><span class="co">#&gt; 21 2023 0.34654311 0.2823269 0.4107593 NA NA 0.34654311</span></a>
<a class="sourceLine" id="cb4-24" title="24"><span class="co">#&gt; 22 2024 0.37199700 0.3004860 0.4435080 NA NA 0.37199700</span></a>
<a class="sourceLine" id="cb4-25" title="25"><span class="co">#&gt; 23 2025 0.39818127 0.3193839 0.4769787 NA NA 0.39818127</span></a>
<a class="sourceLine" id="cb4-26" title="26"><span class="co">#&gt; 24 2026 0.42496142 0.3390173 0.5109056 NA NA 0.42496142</span></a>
<a class="sourceLine" id="cb4-27" title="27"><span class="co">#&gt; 25 2027 0.45218939 0.3593720 0.5450068 NA NA 0.45218939</span></a>
<a class="sourceLine" id="cb4-28" title="28"><span class="co">#&gt; 26 2028 0.47970658 0.3804212 0.5789920 NA NA 0.47970658</span></a>
<a class="sourceLine" id="cb4-29" title="29"><span class="co">#&gt; 27 2029 0.50734745 0.4021241 0.6125708 NA NA 0.50734745</span></a></code></pre></div>
<a class="sourceLine" id="cb4-2" title="2"><span class="co"># year value se_min se_max observations observed estimated</span></a>
<a class="sourceLine" id="cb4-3" title="3"><span class="co"># 1 2003 0.06250000 NA NA 32 0.06250000 0.05486389</span></a>
<a class="sourceLine" id="cb4-4" title="4"><span class="co"># 2 2004 0.08536585 NA NA 82 0.08536585 0.06089002</span></a>
<a class="sourceLine" id="cb4-5" title="5"><span class="co"># 3 2005 0.05000000 NA NA 60 0.05000000 0.06753075</span></a>
<a class="sourceLine" id="cb4-6" title="6"><span class="co"># 4 2006 0.05084746 NA NA 59 0.05084746 0.07483801</span></a>
<a class="sourceLine" id="cb4-7" title="7"><span class="co"># 5 2007 0.12121212 NA NA 66 0.12121212 0.08286570</span></a>
<a class="sourceLine" id="cb4-8" title="8"><span class="co"># 6 2008 0.04166667 NA NA 72 0.04166667 0.09166918</span></a>
<a class="sourceLine" id="cb4-9" title="9"><span class="co"># 7 2009 0.01639344 NA NA 61 0.01639344 0.10130461</span></a>
<a class="sourceLine" id="cb4-10" title="10"><span class="co"># 8 2010 0.05660377 NA NA 53 0.05660377 0.11182814</span></a>
<a class="sourceLine" id="cb4-11" title="11"><span class="co"># 9 2011 0.18279570 NA NA 93 0.18279570 0.12329488</span></a>
<a class="sourceLine" id="cb4-12" title="12"><span class="co"># 10 2012 0.30769231 NA NA 65 0.30769231 0.13575768</span></a>
<a class="sourceLine" id="cb4-13" title="13"><span class="co"># 11 2013 0.06896552 NA NA 58 0.06896552 0.14926576</span></a>
<a class="sourceLine" id="cb4-14" title="14"><span class="co"># 12 2014 0.10000000 NA NA 60 0.10000000 0.16386307</span></a>
<a class="sourceLine" id="cb4-15" title="15"><span class="co"># 13 2015 0.23636364 NA NA 55 0.23636364 0.17958657</span></a>
<a class="sourceLine" id="cb4-16" title="16"><span class="co"># 14 2016 0.22619048 NA NA 84 0.22619048 0.19646431</span></a>
<a class="sourceLine" id="cb4-17" title="17"><span class="co"># 15 2017 0.16279070 NA NA 86 0.16279070 0.21451350</span></a>
<a class="sourceLine" id="cb4-18" title="18"><span class="co"># 16 2018 0.23373852 0.2021578 0.2653193 NA NA 0.23373852</span></a>
<a class="sourceLine" id="cb4-19" title="19"><span class="co"># 17 2019 0.25412909 0.2168525 0.2914057 NA NA 0.25412909</span></a>
<a class="sourceLine" id="cb4-20" title="20"><span class="co"># 18 2020 0.27565854 0.2321869 0.3191302 NA NA 0.27565854</span></a>
<a class="sourceLine" id="cb4-21" title="21"><span class="co"># 19 2021 0.29828252 0.2481942 0.3483709 NA NA 0.29828252</span></a>
<a class="sourceLine" id="cb4-22" title="22"><span class="co"># 20 2022 0.32193804 0.2649008 0.3789753 NA NA 0.32193804</span></a>
<a class="sourceLine" id="cb4-23" title="23"><span class="co"># 21 2023 0.34654311 0.2823269 0.4107593 NA NA 0.34654311</span></a>
<a class="sourceLine" id="cb4-24" title="24"><span class="co"># 22 2024 0.37199700 0.3004860 0.4435080 NA NA 0.37199700</span></a>
<a class="sourceLine" id="cb4-25" title="25"><span class="co"># 23 2025 0.39818127 0.3193839 0.4769787 NA NA 0.39818127</span></a>
<a class="sourceLine" id="cb4-26" title="26"><span class="co"># 24 2026 0.42496142 0.3390173 0.5109056 NA NA 0.42496142</span></a>
<a class="sourceLine" id="cb4-27" title="27"><span class="co"># 25 2027 0.45218939 0.3593720 0.5450068 NA NA 0.45218939</span></a>
<a class="sourceLine" id="cb4-28" title="28"><span class="co"># 26 2028 0.47970658 0.3804212 0.5789920 NA NA 0.47970658</span></a>
<a class="sourceLine" id="cb4-29" title="29"><span class="co"># 27 2029 0.50734745 0.4021241 0.6125708 NA NA 0.50734745</span></a></code></pre></div>
<p>The function <code>plot</code> is available in base R, and can be extended by other packages to depend the output based on the type of input. We extended its function to cope with resistance predictions:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb5-1" title="1"><span class="kw"><a href="https://www.rdocumentation.org/packages/graphics/topics/plot">plot</a></span>(predict_TZP)</a></code></pre></div>
<p><img src="resistance_predict_files/figure-html/unnamed-chunk-4-1.png" width="720"></p>
@ -305,7 +305,7 @@
<a class="sourceLine" id="cb8-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(<span class="kw"><a href="../reference/mo_property.html">mo_gramstain</a></span>(mo) <span class="op">==</span><span class="st"> "Gram positive"</span>) <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb8-3" title="3"><span class="st"> </span><span class="kw"><a href="../reference/resistance_predict.html">resistance_predict</a></span>(<span class="dt">col_ab =</span> <span class="st">"VAN"</span>, <span class="dt">year_min =</span> <span class="dv">2010</span>, <span class="dt">info =</span> <span class="ot">FALSE</span>) <span class="op">%&gt;%</span><span class="st"> </span></a>
<a class="sourceLine" id="cb8-4" title="4"><span class="st"> </span><span class="kw"><a href="../reference/resistance_predict.html">ggplot_rsi_predict</a></span>()</a>
<a class="sourceLine" id="cb8-5" title="5"><span class="co">#&gt; </span><span class="al">NOTE</span><span class="co">: Using column `date` as input for `col_date`.</span></a></code></pre></div>
<a class="sourceLine" id="cb8-5" title="5"><span class="co"># </span><span class="al">NOTE</span><span class="co">: Using column `date` as input for `col_date`.</span></a></code></pre></div>
<p><img src="resistance_predict_files/figure-html/unnamed-chunk-6-1.png" width="720"></p>
<p>Vancomycin resistance could be 100% in ten years, but might also stay around 0%.</p>
<p>You can define the model with the <code>model</code> parameter. The default model is a generalised linear regression model using a binomial distribution, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance.</p>
@ -350,21 +350,21 @@
<a class="sourceLine" id="cb9-2" title="2"><span class="st"> </span><span class="kw"><a href="https://dplyr.tidyverse.org/reference/filter.html">filter</a></span>(<span class="kw"><a href="../reference/mo_property.html">mo_gramstain</a></span>(mo) <span class="op">==</span><span class="st"> "Gram positive"</span>) <span class="op">%&gt;%</span></a>
<a class="sourceLine" id="cb9-3" title="3"><span class="st"> </span><span class="kw"><a href="../reference/resistance_predict.html">resistance_predict</a></span>(<span class="dt">col_ab =</span> <span class="st">"VAN"</span>, <span class="dt">year_min =</span> <span class="dv">2010</span>, <span class="dt">info =</span> <span class="ot">FALSE</span>, <span class="dt">model =</span> <span class="st">"linear"</span>) <span class="op">%&gt;%</span><span class="st"> </span></a>
<a class="sourceLine" id="cb9-4" title="4"><span class="st"> </span><span class="kw"><a href="../reference/resistance_predict.html">ggplot_rsi_predict</a></span>()</a>
<a class="sourceLine" id="cb9-5" title="5"><span class="co">#&gt; </span><span class="al">NOTE</span><span class="co">: Using column `date` as input for `col_date`.</span></a></code></pre></div>
<a class="sourceLine" id="cb9-5" title="5"><span class="co"># </span><span class="al">NOTE</span><span class="co">: Using column `date` as input for `col_date`.</span></a></code></pre></div>
<p><img src="resistance_predict_files/figure-html/unnamed-chunk-7-1.png" width="720"></p>
<p>This seems more likely, doesnt it?</p>
<p>The model itself is also available from the object, as an <code>attribute</code>:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb10-1" title="1">model &lt;-<span class="st"> </span><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/attributes">attributes</a></span>(predict_TZP)<span class="op">$</span>model</a>
<a class="sourceLine" id="cb10-2" title="2"></a>
<a class="sourceLine" id="cb10-3" title="3"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/summary">summary</a></span>(model)<span class="op">$</span>family</a>
<a class="sourceLine" id="cb10-4" title="4"><span class="co">#&gt; </span></a>
<a class="sourceLine" id="cb10-5" title="5"><span class="co">#&gt; Family: binomial </span></a>
<a class="sourceLine" id="cb10-6" title="6"><span class="co">#&gt; Link function: logit</span></a>
<a class="sourceLine" id="cb10-4" title="4"><span class="co"># </span></a>
<a class="sourceLine" id="cb10-5" title="5"><span class="co"># Family: binomial </span></a>
<a class="sourceLine" id="cb10-6" title="6"><span class="co"># Link function: logit</span></a>
<a class="sourceLine" id="cb10-7" title="7"></a>
<a class="sourceLine" id="cb10-8" title="8"><span class="kw"><a href="https://www.rdocumentation.org/packages/base/topics/summary">summary</a></span>(model)<span class="op">$</span>coefficients</a>
<a class="sourceLine" id="cb10-9" title="9"><span class="co">#&gt; Estimate Std. Error z value Pr(&gt;|z|)</span></a>
<a class="sourceLine" id="cb10-10" title="10"><span class="co">#&gt; (Intercept) -224.3987194 48.0335384 -4.671709 2.987038e-06</span></a>
<a class="sourceLine" id="cb10-11" title="11"><span class="co">#&gt; year 0.1106102 0.0238753 4.632831 3.606990e-06</span></a></code></pre></div>
<a class="sourceLine" id="cb10-9" title="9"><span class="co"># Estimate Std. Error z value Pr(&gt;|z|)</span></a>
<a class="sourceLine" id="cb10-10" title="10"><span class="co"># (Intercept) -224.3987194 48.0335384 -4.671709 2.987038e-06</span></a>
<a class="sourceLine" id="cb10-11" title="11"><span class="co"># year 0.1106102 0.0238753 4.632831 3.606990e-06</span></a></code></pre></div>
</div>
</div>
</div>

View File

@ -66,6 +66,7 @@ $( document ).ready(function() {
$(disqus).insertBefore('footer');
$('#disqus_thread footer').remove();
// Alter footer
$('footer').html(
'<div>' +
'<p>' + $('footer .copyright p').html().replace(
@ -73,6 +74,7 @@ $( document ).ready(function() {
'<code>AMR</code> (for R). Developed at the <a href="https://www.rug.nl">University of Groningen</a>.<br>Authors:') + '</p>' +
'<a href="https://www.rug.nl"><img src="https://gitlab.com/msberends/AMR/raw/master/docs/logo_rug.png" class="footer_logo"></a>' +
'</div>');
// all links should open in new tab/window
$('footer').html($('footer').html().replace(/href/g, 'target="_blank" href'));
// doctoral titles of authors
@ -83,8 +85,8 @@ $( document ).ready(function() {
x = x.replace("Bhanu", "Prof Dr Bhanu");
x = x.replace(/Author, thesis advisor/g, "Doctoral advisor");
x = x.replace(/Authors/g, "aut_plural");
x = x.replace(/Author, maintainer./g, "");
x = x.replace(/Author/g, "");
x = x.replace(/Author, maintainer[.]?/g, "");
x = x.replace(/Author[.]?/g, "");
x = x.replace(/aut_plural/g, "Authors");
}
return(x);

View File

@ -339,13 +339,13 @@ The algorithm can additionally use three different levels of uncertainty to gues
<p>Use <code>mo_uncertainties()</code> to get a data.frame with all values that were coerced to a valid value, but with uncertainty.</p>
<p>Use <code>mo_renamed()</code> to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.</p>
<p><strong>Microbial prevalence of pathogens in humans</strong> <br />
The intelligent rules takes into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:</p><ul>
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:</p><ul>
<li><p>1 (most prevalent): class is Gammaproteobacteria <strong>or</strong> genus is one of: <em>Enterococcus</em>, <em>Staphylococcus</em>, <em>Streptococcus</em>.</p></li>
<li><p>2: phylum is one of: Proteobacteria, Firmicutes, Actinobacteria, Sarcomastigophora <strong>or</strong> genus is one of: <em>Aspergillus</em>, <em>Bacteroides</em>, <em>Candida</em>, <em>Capnocytophaga</em>, <em>Chryseobacterium</em>, <em>Cryptococcus</em>, <em>Elisabethkingia</em>, <em>Flavobacterium</em>, <em>Fusobacterium</em>, <em>Giardia</em>, <em>Leptotrichia</em>, <em>Mycoplasma</em>, <em>Prevotella</em>, <em>Rhodotorula</em>, <em>Treponema</em>, <em>Trichophyton</em>, <em>Ureaplasma</em>.</p></li>
<li><p>3 (least prevalent): all others.</p></li>
</ul>
<p>Group 1 contains all common Gram negatives, like all Enterobacteriaceae and e.g. <em>Pseudomonas</em> and <em>Legionella</em>.</p>
<p>Group 2 probably contains all other microbial pathogens ever found in humans.</p>
<p>Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. <em>Pseudomonas</em> and <em>Legionella</em>.</p>
<p>Group 2 probably contains less microbial pathogens; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.</p>
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>

View File

@ -302,7 +302,7 @@
</tr>
<tr>
<th>property</th>
<td><p>one of the column names of one of the <code><a href='microorganisms.html'>microorganisms</a></code> data set or <code>"shortname"</code></p></td>
<td><p>one of the column names of the <code><a href='microorganisms.html'>microorganisms</a></code> data set or <code>"shortname"</code></p></td>
</tr>
</table>
@ -320,7 +320,7 @@
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for <code>mo_ref</code>, <code>mo_authors</code> and <code>mo_year</code>. This leads to the following results:</p><ul>
<li><p><code>mo_fullname("Chlamydia psittaci")</code> will return <code>"Chlamydophila psittaci"</code> (with a warning about the renaming)</p></li>
<li><p><code>mo_name("Chlamydia psittaci")</code> will return <code>"Chlamydophila psittaci"</code> (with a warning about the renaming)</p></li>
<li><p><code>mo_ref("Chlamydia psittaci")</code> will return <code>"Page, 1968"</code> (with a warning about the renaming)</p></li>
<li><p><code>mo_ref("Chlamydophila psittaci")</code> will return <code>"Everett et al., 1999"</code> (without a warning)</p></li>
</ul>
@ -393,9 +393,10 @@ This package contains the complete taxonomic tree of almost all microorganisms (
<span class='co'># Known subspecies</span>
<span class='fu'>mo_name</span>(<span class='st'>"doylei"</span>) <span class='co'># "Campylobacter jejuni doylei"</span>
<span class='fu'>mo_genus</span>(<span class='st'>"doylei"</span>) <span class='co'># "Campylobacter"</span>
<span class='fu'>mo_species</span>(<span class='st'>"doylei"</span>) <span class='co'># "jejuni"</span>
<span class='fu'>mo_fullname</span>(<span class='st'>"doylei"</span>) <span class='co'># "Campylobacter jejuni doylei"</span>
<span class='fu'>mo_subspecies</span>(<span class='st'>"doylei"</span>) <span class='co'># "doylei"</span>
<span class='fu'>mo_fullname</span>(<span class='st'>"K. pneu rh"</span>) <span class='co'># "Klebsiella pneumoniae rhinoscleromatis"</span>
<span class='fu'>mo_shortname</span>(<span class='st'>"K. pneu rh"</span>) <span class='co'># "K. pneumoniae"</span>

View File

@ -112,16 +112,16 @@ Use \code{mo_uncertainties()} to get a data.frame with all values that were coer
Use \code{mo_renamed()} to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.
\strong{Microbial prevalence of pathogens in humans} \cr
The intelligent rules takes into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
The intelligent rules take into account microbial prevalence of pathogens in humans. It uses three groups and all (sub)species are in only one group. These groups are:
\itemize{
\item{1 (most prevalent): class is Gammaproteobacteria \strong{or} genus is one of: \emph{Enterococcus}, \emph{Staphylococcus}, \emph{Streptococcus}.}
\item{2: phylum is one of: Proteobacteria, Firmicutes, Actinobacteria, Sarcomastigophora \strong{or} genus is one of: \emph{Aspergillus}, \emph{Bacteroides}, \emph{Candida}, \emph{Capnocytophaga}, \emph{Chryseobacterium}, \emph{Cryptococcus}, \emph{Elisabethkingia}, \emph{Flavobacterium}, \emph{Fusobacterium}, \emph{Giardia}, \emph{Leptotrichia}, \emph{Mycoplasma}, \emph{Prevotella}, \emph{Rhodotorula}, \emph{Treponema}, \emph{Trichophyton}, \emph{Ureaplasma}.}
\item{3 (least prevalent): all others.}
}
Group 1 contains all common Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
Group 1 contains all common Gram positives and Gram negatives, like all Enterobacteriaceae and e.g. \emph{Pseudomonas} and \emph{Legionella}.
Group 2 probably contains all other microbial pathogens ever found in humans.
Group 2 probably contains less microbial pathogens; all other members of phyla that were found in humans in the Northern Netherlands between 2001 and 2018.
}
\section{Source}{

View File

@ -72,7 +72,7 @@ mo_property(x, property = "fullname", language = get_locale(), ...)
\item{open}{browse the URL using \code{\link[utils]{browseURL}()}}
\item{property}{one of the column names of one of the \code{\link{microorganisms}} data set or \code{"shortname"}}
\item{property}{one of the column names of the \code{\link{microorganisms}} data set or \code{"shortname"}}
}
\value{
\itemize{
@ -88,7 +88,7 @@ Use these functions to return a specific property of a microorganism from the \c
\details{
All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for \code{mo_ref}, \code{mo_authors} and \code{mo_year}. This leads to the following results:
\itemize{
\item{\code{mo_fullname("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)}
\item{\code{mo_name("Chlamydia psittaci")} will return \code{"Chlamydophila psittaci"} (with a warning about the renaming)}
\item{\code{mo_ref("Chlamydia psittaci")} will return \code{"Page, 1968"} (with a warning about the renaming)}
\item{\code{mo_ref("Chlamydophila psittaci")} will return \code{"Everett et al., 1999"} (without a warning)}
}
@ -162,9 +162,10 @@ mo_species("VISA") # "aureus"
# Known subspecies
mo_name("doylei") # "Campylobacter jejuni doylei"
mo_genus("doylei") # "Campylobacter"
mo_species("doylei") # "jejuni"
mo_fullname("doylei") # "Campylobacter jejuni doylei"
mo_subspecies("doylei") # "doylei"
mo_fullname("K. pneu rh") # "Klebsiella pneumoniae rhinoscleromatis"
mo_shortname("K. pneu rh") # "K. pneumoniae"

View File

@ -66,6 +66,7 @@ $( document ).ready(function() {
$(disqus).insertBefore('footer');
$('#disqus_thread footer').remove();
// Alter footer
$('footer').html(
'<div>' +
'<p>' + $('footer .copyright p').html().replace(
@ -73,6 +74,7 @@ $( document ).ready(function() {
'<code>AMR</code> (for R). Developed at the <a href="https://www.rug.nl">University of Groningen</a>.<br>Authors:') + '</p>' +
'<a href="https://www.rug.nl"><img src="https://gitlab.com/msberends/AMR/raw/master/docs/logo_rug.png" class="footer_logo"></a>' +
'</div>');
// all links should open in new tab/window
$('footer').html($('footer').html().replace(/href/g, 'target="_blank" href'));
// doctoral titles of authors
@ -83,8 +85,8 @@ $( document ).ready(function() {
x = x.replace("Bhanu", "Prof Dr Bhanu");
x = x.replace(/Author, thesis advisor/g, "Doctoral advisor");
x = x.replace(/Authors/g, "aut_plural");
x = x.replace(/Author, maintainer./g, "");
x = x.replace(/Author/g, "");
x = x.replace(/Author, maintainer[.]?/g, "");
x = x.replace(/Author[.]?/g, "");
x = x.replace(/aut_plural/g, "Authors");
}
return(x);

View File

@ -263,4 +263,8 @@ test_that("as.mo works", {
expect_error(translate_allow_uncertain(5))
# very old MO codes (<= v0.5.0)
expect_equal(as.character(as.mo("F_CCCCS_NEO")), "F_CRYPT_NEO")
expect_equal(as.character(as.mo("F_CANDD_GLB")), "F_CANDD_GLA")
})

View File

@ -16,8 +16,9 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
warning = FALSE,
collapse = TRUE,
comment = "#>",
comment = "#",
fig.width = 7.5,
fig.height = 5
)

View File

@ -17,7 +17,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
comment = "#",
fig.width = 7.5,
fig.height = 4.5
)

View File

@ -16,7 +16,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
comment = "#"
)
# set to original language (English)
Sys.setlocale(locale = "C")

View File

@ -16,7 +16,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
comment = "#"
)
# set to original language (English)
Sys.setlocale(locale = "C")

View File

@ -17,7 +17,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
comment = "#",
fig.width = 7.5,
fig.height = 4.5
)

View File

@ -16,7 +16,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
comment = "#"
)
# set to original language (English)
Sys.setlocale(locale = "C")

View File

@ -16,7 +16,7 @@ editor_options:
```{r setup, include = FALSE, results = 'markup'}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
comment = "#",
fig.width = 7.5,
fig.height = 4.75
)