mirror of
https://github.com/msberends/AMR.git
synced 2025-07-10 11:01:55 +02:00
support A. species for as.mo, cleanup
This commit is contained in:
27
R/freq.R
27
R/freq.R
@ -313,16 +313,22 @@ frequency_tbl <- function(x,
|
||||
}
|
||||
}
|
||||
|
||||
na_txt <- paste0(NAs %>% length() %>% format(), ' = ',
|
||||
(NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>%
|
||||
sub('NaN', '0', ., fixed = TRUE))
|
||||
if (!na_txt %like% "^0 =") {
|
||||
na_txt <- red(na_txt)
|
||||
if (NROW(x) > 0) {
|
||||
na_txt <- paste0(NAs %>% length() %>% format(), ' = ',
|
||||
(NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE, round = digits) %>%
|
||||
sub('NaN', '0', ., fixed = TRUE))
|
||||
if (!na_txt %like% "^0 =") {
|
||||
na_txt <- red(na_txt)
|
||||
} else {
|
||||
na_txt <- green(na_txt)
|
||||
}
|
||||
na_txt <- paste0('(of which NA: ', na_txt, ')')
|
||||
} else {
|
||||
na_txt <- green(na_txt)
|
||||
na_txt <- ""
|
||||
}
|
||||
|
||||
header_txt <- header_txt %>% paste0(markdown_line, '\nLength: ', (NAs %>% length() + x %>% length()) %>% format(),
|
||||
' (of which NA: ', na_txt, ')')
|
||||
' ', na_txt)
|
||||
header_txt <- header_txt %>% paste0(markdown_line, '\nUnique: ', x %>% n_distinct() %>% format())
|
||||
|
||||
if (NROW(x) > 0 & any(class(x) == "character")) {
|
||||
@ -592,7 +598,12 @@ print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default =
|
||||
}
|
||||
title <- paste(title, group_var)
|
||||
}
|
||||
title <- paste("Frequency table of", trimws(title))
|
||||
title <- trimws(title)
|
||||
if (title == "") {
|
||||
title <- "Frequency table"
|
||||
} else {
|
||||
title <- paste("Frequency table of", trimws(title))
|
||||
}
|
||||
} else {
|
||||
title <- opt$title
|
||||
}
|
||||
|
@ -54,6 +54,8 @@ globalVariables(c(".",
|
||||
"microorganisms.prevDT",
|
||||
"microorganisms.unprevDT",
|
||||
"microorganisms.oldDT",
|
||||
"microorganisms.certe",
|
||||
"microorganisms.umcg",
|
||||
"mo",
|
||||
"mo.old",
|
||||
"n",
|
||||
|
20
R/misc.R
20
R/misc.R
@ -28,12 +28,24 @@ addin_insert_like <- function() {
|
||||
|
||||
# No export, no Rd
|
||||
percent <- function(x, round = 1, force_zero = FALSE, ...) {
|
||||
val <- base::round(x * 100, digits = round)
|
||||
if (force_zero == TRUE & any(val == as.integer(val) & !is.na(val))) {
|
||||
val[val == as.integer(val)] <- paste0(val[val == as.integer(val)], ".", strrep(0, round))
|
||||
|
||||
# https://stackoverflow.com/a/12688836/4575331
|
||||
round2 <- function(x, n) (trunc((abs(x) * 10 ^ n) + 0.5) / 10 ^ n) * sign(x)
|
||||
|
||||
val <- round2(x, round + 2) # round up 0.5
|
||||
val <- round(x = val * 100, digits = round) # remove floating point error
|
||||
|
||||
if (force_zero == TRUE) {
|
||||
if (any(val == as.integer(val) & !is.na(val))) {
|
||||
# add zeroes to all integers
|
||||
val[val == as.integer(as.character(val))] <- paste0(val[val == as.integer(val)], ".", strrep(0, round))
|
||||
}
|
||||
# add extra zeroes if needed
|
||||
val_decimals <- nchar(gsub(".*[.](.*)", "\\1", as.character(val)))
|
||||
val[val_decimals < round] <- paste0(val[val_decimals < round], strrep(0, max(0, round - val_decimals)))
|
||||
}
|
||||
pct <- base::paste0(val, "%")
|
||||
pct[pct == "NA%"] <- NA_character_
|
||||
pct[pct %in% c("NA%", "NaN%")] <- NA_character_
|
||||
pct
|
||||
}
|
||||
|
||||
|
100
R/mo.R
100
R/mo.R
@ -53,7 +53,7 @@
|
||||
#' \itemize{
|
||||
#' \item{Taxonomic kingdom: it first searches in bacteria, then fungi, then protozoa}
|
||||
#' \item{Human pathogenic prevalence: it first searches in more prevalent microorganisms, then less prevalent ones}
|
||||
#' \item{Valid MO codes and full names: it first searches in already valid MO code and genus/species combinations}
|
||||
#' \item{Valid MO codes and full names: it first searches in already valid MO code and known genus/species combinations}
|
||||
#' \item{Breakdown of input values: from here it starts to breakdown input values to find possible matches}
|
||||
#' }
|
||||
#'
|
||||
@ -126,16 +126,15 @@
|
||||
#' library(dplyr)
|
||||
#' df$mo <- df %>%
|
||||
#' select(microorganism_name) %>%
|
||||
#' guess_mo()
|
||||
#' as.mo()
|
||||
#'
|
||||
#' # and can even contain 2 columns, which is convenient for genus/species combinations:
|
||||
#' df$mo <- df %>%
|
||||
#' select(genus, species) %>%
|
||||
#' guess_mo()
|
||||
#'
|
||||
#' # same result:
|
||||
#' as.mo()
|
||||
#' # although this works easier and does the same:
|
||||
#' df <- df %>%
|
||||
#' mutate(mo = guess_mo(paste(genus, species)))
|
||||
#' mutate(mo = as.mo(paste(genus, species)))
|
||||
#' }
|
||||
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
|
||||
structure(mo_validate(x = x, property = "mo",
|
||||
@ -160,11 +159,14 @@ guess_mo <- as.mo
|
||||
#' @importFrom data.table data.table as.data.table setkey
|
||||
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
|
||||
|
||||
# These data.tables are available as data sets when the AMR package is loaded:
|
||||
# microorganismsDT # this one is sorted by kingdom (B<F<P), prevalence, TSN
|
||||
# microorganisms.prevDT # same as microorganismsDT, but with prevalence != 9999
|
||||
# microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
|
||||
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||
if (!"AMR" %in% base::.packages()) {
|
||||
library("AMR")
|
||||
# These data.tables are available as data sets when the AMR package is loaded:
|
||||
# microorganismsDT # this one is sorted by kingdom (B<F<P), prevalence, TSN
|
||||
# microorganisms.prevDT # same as microorganismsDT, but with prevalence != 9999
|
||||
# microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
|
||||
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||
}
|
||||
|
||||
if (NCOL(x) == 2) {
|
||||
# support tidyverse selection like: df %>% select(colA, colB)
|
||||
@ -216,31 +218,35 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
||||
suppressWarnings(
|
||||
x <- data.frame(x = x, stringsAsFactors = FALSE) %>%
|
||||
left_join(reference_df, by = "x") %>%
|
||||
left_join(AMR::microorganisms, by = "mo") %>%
|
||||
left_join(microorganisms, by = "mo") %>%
|
||||
pull(property)
|
||||
)
|
||||
} else if (all(toupper(x) %in% AMR::microorganisms.certe[, "certe"])) {
|
||||
} else if (all(toupper(x) %in% microorganisms.certe[, "certe"])) {
|
||||
# old Certe codes
|
||||
y <- as.data.table(AMR::microorganisms.certe)[data.table(certe = toupper(x)), on = "certe", ]
|
||||
y <- as.data.table(microorganisms.certe)[data.table(certe = toupper(x)), on = "certe", ]
|
||||
x <- microorganismsDT[data.table(mo = y[["mo"]]), on = "mo", ..property][[1]]
|
||||
|
||||
} else if (!all(x %in% microorganismsDT[[property]])) {
|
||||
|
||||
x_backup <- trimws(x, which = "both")
|
||||
x_species <- paste(x_backup, "species")
|
||||
|
||||
# remove spp and species
|
||||
x <- gsub(" +(spp.?|species)", "", x_backup)
|
||||
x_species <- paste(x, "species")
|
||||
# translate to English for supported languages of mo_property
|
||||
x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x)
|
||||
# remove 'empty' genus and species values
|
||||
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
||||
# remove dots and other non-text in case of "E. coli" except spaces
|
||||
x <- gsub("[^a-zA-Z0-9/ \\-]+", "", x)
|
||||
# remove non-text in case of "E. coli" except dots and spaces
|
||||
x <- gsub("[^.a-zA-Z0-9/ \\-]+", "", x)
|
||||
|
||||
# but spaces before and after should be omitted
|
||||
x <- trimws(x, which = "both")
|
||||
x_trimmed <- x
|
||||
x_trimmed_species <- paste(x_trimmed, "species")
|
||||
# replace space by regex sign
|
||||
x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
|
||||
x <- gsub(" ", ".*", x, fixed = TRUE)
|
||||
# replace space and dot by regex sign
|
||||
x_withspaces <- gsub("[ .]+", ".* ", x)
|
||||
x <- gsub("[ .]+", ".*", x)
|
||||
# add start en stop regex
|
||||
x <- paste0('^', x, '$')
|
||||
x_withspaces_start <- paste0('^', x_withspaces)
|
||||
@ -261,10 +267,28 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
||||
next
|
||||
}
|
||||
if (nchar(x_trimmed[i]) < 3) {
|
||||
# fewer than 3 chars, add as failure
|
||||
x[i] <- NA_character_
|
||||
failures <- c(failures, x_backup[i])
|
||||
next
|
||||
# check if search term was like "A. species", then return first genus found with ^A
|
||||
if (x_backup[i] %like% "species" | x_backup[i] %like% "spp[.]?") {
|
||||
# get mo code of first hit
|
||||
found <- microorganismsDT[fullname %like% x_withspaces_start[i], mo][[1]]
|
||||
mo_code <- found[1L] %>% strsplit("_") %>% unlist() %>% .[1:2] %>% paste(collapse = "_")
|
||||
found <- microorganismsDT[mo == mo_code, ..property][[1]]
|
||||
# return first genus that begins with x_trimmed, e.g. when "E. spp."
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
} else {
|
||||
# fewer than 3 chars, add as failure
|
||||
x[i] <- NA_character_
|
||||
failures <- c(failures, x_backup[i])
|
||||
next
|
||||
}
|
||||
} else {
|
||||
# fewer than 3 chars, add as failure
|
||||
x[i] <- NA_character_
|
||||
failures <- c(failures, x_backup[i])
|
||||
next
|
||||
}
|
||||
}
|
||||
|
||||
# translate known trivial abbreviations to genus + species ----
|
||||
@ -353,15 +377,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
||||
}
|
||||
|
||||
# TRY OTHER SOURCES ----
|
||||
if (x_backup[i] %in% AMR::microorganisms.certe$certe) {
|
||||
x[i] <- microorganismsDT[mo == AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == x_backup[i], 2], ..property][[1]][1L]
|
||||
# x[i] <- exec_as.mo(x = AMR::microorganisms.certe[AMR::microorganisms.certe$certe == x_backup[i], "mo"],
|
||||
# property = property)
|
||||
# next
|
||||
}
|
||||
if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
|
||||
mo_umcg <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
|
||||
mo_found <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == mo_umcg, 2]
|
||||
if (x_backup[i] %in% microorganisms.umcg[, 1]) {
|
||||
mo_umcg <- microorganisms.umcg[microorganisms.umcg[, 1] == x_backup[i], 2]
|
||||
mo_found <- microorganisms.certe[microorganisms.certe[, 1] == mo_umcg, 2]
|
||||
if (length(mo_found) == 0) {
|
||||
# not found
|
||||
x[i] <- NA_character_
|
||||
@ -371,13 +389,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
||||
}
|
||||
next
|
||||
}
|
||||
if (x_backup[i] %in% reference_df[, 1]) {
|
||||
ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
|
||||
if (ref_mo %in% microorganismsDT[, mo]) {
|
||||
x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
|
||||
next
|
||||
} else {
|
||||
warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
|
||||
if (!is.null(reference_df)) {
|
||||
if (x_backup[i] %in% reference_df[, 1]) {
|
||||
ref_mo <- reference_df[reference_df[, 1] == x_backup[i], 2]
|
||||
if (ref_mo %in% microorganismsDT[, mo]) {
|
||||
x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
|
||||
next
|
||||
} else {
|
||||
warning("Value '", x_backup[i], "' was found in reference_df, but '", ref_mo, "' is not a valid MO code.", call. = FALSE)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
25
R/zzz.R
25
R/zzz.R
@ -47,28 +47,3 @@ NULL
|
||||
.onLoad <- function(libname, pkgname) {
|
||||
backports::import(pkgname)
|
||||
}
|
||||
|
||||
.onAttach <- function(libname, pkgname) {
|
||||
# save data.tables to improve speed of as.mo:
|
||||
|
||||
# microorganismsDT <- data.table::as.data.table(AMR::microorganisms)
|
||||
# microorganisms.oldDT <- data.table::as.data.table(AMR::microorganisms.old)
|
||||
#
|
||||
# data.table::setkey(microorganismsDT, prevalence, tsn)
|
||||
# data.table::setkey(microorganisms.oldDT, tsn, name)
|
||||
|
||||
base::assign(x = "microorganismsDT",
|
||||
value = microorganismsDT,
|
||||
envir = base::as.environment("package:AMR"))
|
||||
base::assign(x = "microorganisms.prevDT",
|
||||
value = microorganismsDT[prevalence != 9999,],
|
||||
envir = base::as.environment("package:AMR"))
|
||||
base::assign(x = "microorganisms.unprevDT",
|
||||
value = microorganismsDT[prevalence == 9999,],
|
||||
envir = base::as.environment("package:AMR"))
|
||||
|
||||
base::assign(x = "microorganisms.oldDT",
|
||||
value = microorganisms.oldDT,
|
||||
envir = base::as.environment("package:AMR"))
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user