1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 21:22:01 +02:00

mo_source improvement

This commit is contained in:
2019-03-01 09:34:04 +01:00
parent 2565b60024
commit c5efb272fd
21 changed files with 324 additions and 294 deletions

21
R/mo.R
View File

@ -117,6 +117,7 @@
#' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr
#' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code.
#' @inheritSection AMR Read more on our website!
#' @importFrom dplyr %>% pull left_join
#' @examples
#' # These examples all return "B_STPHY_AUR", the ID of S. aureus:
#' as.mo("stau")
@ -171,16 +172,28 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = TRUE,
# check onLoad() in R/zzz.R: data tables are created there.
}
if (all(x %in% AMR::microorganisms$mo)
if (deparse(substitute(reference_df)) == "get_mo_source()"
& isFALSE(Becker)
& isFALSE(Lancefield)
& is.null(reference_df)) {
& !is.null(reference_df)
& all(x %in% reference_df[,1])) {
# has valid own reference_df
# (data.table not faster here)
colnames(reference_df)[1] <- "x"
suppressWarnings(
y <- data.frame(x = x, stringsAsFactors = FALSE) %>%
left_join(reference_df, by = "x") %>%
pull("mo")
)
} else if (all(x %in% AMR::microorganisms$mo)
& isFALSE(Becker)
& isFALSE(Lancefield)) {
y <- x
} else if (all(tolower(x) %in% microorganismsDT$fullname_lower)
& isFALSE(Becker)
& isFALSE(Lancefield)
& is.null(reference_df)) {
& isFALSE(Lancefield)) {
# we need special treatment for very prevalent full names, they are likely! (case insensitive)
# e.g. as.mo("Staphylococcus aureus")
y <- microorganismsDT[prevalence == 1][data.table(fullname_lower = tolower(x)),

View File

@ -52,15 +52,19 @@
#' # Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'.
#' }
#'
#' It has now created a file "~/.mo_source.rds" with the contents of our Excel file. It it an R specific format with great compression.
#' It has now created a file "~/.mo_source.rds" with the contents of our Excel file, but only the first column with foreign values and the 'mo' column will be kept.
#'
#' And now we can use it in our functions:
#' \preformatted{
#' as.mo("lab_mo_ecoli")
#' # B_ESCHR_COL
#' [1] B_ESCHR_COL
#'
#' mo_genus("lab_mo_kpneumoniae")
#' # "Klebsiella"
#' [1] "Klebsiella"
#'
#' # other input values still work too
#' as.mo(c("Escherichia coli", "E. coli", "lab_mo_ecoli"))
#' [1] B_ESCHR_COL B_ESCHR_COL B_ESCHR_COL
#' }
#'
#' If we edit the Excel file to, let's say, this:
@ -78,10 +82,10 @@
#' \preformatted{
#' as.mo("lab_mo_ecoli")
#' # Updated mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'.
#' # B_ESCHR_COL
#' [1] B_ESCHR_COL
#'
#' mo_genus("lab_Staph_aureus")
#' # "Staphylococcus"
#' [1] "Staphylococcus"
#' }
#'
#' To remove the reference completely, just use any of these:
@ -119,7 +123,9 @@ set_mo_source <- function(path) {
valid <- FALSE
} else if (!"mo" %in% colnames(df)) {
valid <- FALSE
} else if (!all(df$mo %in% AMR::microorganisms$mo)) {
} else if (all(as.data.frame(df)[, 1] == "")) {
valid <- FALSE
} else if (!all(df$mo %in% c("", AMR::microorganisms$mo))) {
valid <- FALSE
} else if (NCOL(df) < 2) {
valid <- FALSE
@ -163,9 +169,11 @@ set_mo_source <- function(path) {
stop("File must contain a column with self-defined values and a reference column `mo` with valid values from the `microorganisms` data set.")
}
# keep only first two columns, second must be mo
if (colnames(df)[1] == "mo") {
# put mo to the end
df <- df %>% select(-"mo", everything(), "mo")
df <- df[, c(2, 1)]
} else {
df <- df[, c(1, 2)]
}
df <- as.data.frame(df, stringAsFactors = FALSE)