mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 16:02:02 +02:00
added prevalence column and alterted as.mo algorith to use it, added ab_name as alias
This commit is contained in:
@ -21,6 +21,7 @@
|
||||
#' Use these functions to return a specific property of an antibiotic from the \code{\link{antibiotics}} data set, based on their ATC code. Get such a code with \code{\link{as.atc}}.
|
||||
#' @param x a (vector of a) valid \code{\link{atc}} code or any text that can be coerced to a valid atc with \code{\link{as.atc}}
|
||||
#' @param property one of the column names of one of the \code{\link{antibiotics}} data set, like \code{"atc"} and \code{"official"}
|
||||
#' @param language language of the returned text, defaults to the systems language. Either one of \code{"en"} (English) or \code{"nl"} (Dutch).
|
||||
#' @rdname ab_property
|
||||
#' @return A vector of values. In case of \code{ab_tradenames}, if \code{x} is of length one, a vector will be returned. Otherwise a \code{\link{list}}, with \code{x} as names.
|
||||
#' @export
|
||||
@ -28,8 +29,8 @@
|
||||
#' @seealso \code{\link{antibiotics}}
|
||||
#' @examples
|
||||
#' ab_atc("amcl") # J01CR02
|
||||
#' ab_official("amcl") # Amoxicillin and beta-lactamase inhibitor
|
||||
#' ab_official_nl("amcl") # Amoxicilline met enzymremmer
|
||||
#' ab_name("amcl") # Amoxicillin and beta-lactamase inhibitor
|
||||
#' ab_name("amcl", "nl") # Amoxicilline met enzymremmer
|
||||
#' ab_trivial_nl("amcl") # Amoxicilline/clavulaanzuur
|
||||
#' ab_certe("amcl") # amcl
|
||||
#' ab_umcg("amcl") # AMCL
|
||||
@ -56,15 +57,25 @@ ab_atc <- function(x) {
|
||||
|
||||
#' @rdname ab_property
|
||||
#' @export
|
||||
ab_official <- function(x) {
|
||||
ab_property(x, "official")
|
||||
ab_official <- function(x, language = NULL) {
|
||||
|
||||
if (is.null(language)) {
|
||||
language <- Sys.locale()
|
||||
} else {
|
||||
language <- tolower(language[1])
|
||||
}
|
||||
if (language %in% c("en", "")) {
|
||||
ab_property(x, "official")
|
||||
} else if (language == "nl") {
|
||||
ab_property(x, "official_nl")
|
||||
} else {
|
||||
stop("Unsupported language: '", language, "' - use one of: 'en', 'nl'", call. = FALSE)
|
||||
}
|
||||
}
|
||||
|
||||
#' @rdname ab_property
|
||||
#' @export
|
||||
ab_official_nl <- function(x) {
|
||||
ab_property(x, "official_nl")
|
||||
}
|
||||
ab_name <- ab_official
|
||||
|
||||
#' @rdname ab_property
|
||||
#' @export
|
||||
|
5
R/data.R
5
R/data.R
@ -122,8 +122,8 @@
|
||||
|
||||
#' Data set with human pathogenic microorganisms
|
||||
#'
|
||||
#' A data set containing 2,630 (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
||||
#' @format A \code{\link{tibble}} with 2,630 observations and 10 variables:
|
||||
#' A data set containing (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
||||
#' @format A \code{\link{tibble}} with 2,642 observations and 11 variables:
|
||||
#' \describe{
|
||||
#' \item{\code{mo}}{ID of microorganism}
|
||||
#' \item{\code{bactsys}}{Bactsyscode of microorganism}
|
||||
@ -135,6 +135,7 @@
|
||||
#' \item{\code{aerobic}}{Logical whether bacteria is aerobic}
|
||||
#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungus/yeast"}}
|
||||
#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Negative rods"}}
|
||||
#' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
|
||||
#' }
|
||||
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
|
||||
# new <- microorganisms %>% filter(genus == "Bacteroides") %>% .[1,]
|
||||
|
@ -47,6 +47,7 @@ globalVariables(c(".",
|
||||
"Pasted",
|
||||
"patient_id",
|
||||
"Percentage",
|
||||
"prevalence",
|
||||
"R",
|
||||
"real_first_isolate",
|
||||
"S",
|
||||
|
14
R/misc.R
14
R/misc.R
@ -155,6 +155,20 @@ tbl_parse_guess <- function(tbl,
|
||||
tbl
|
||||
}
|
||||
|
||||
#' @importFrom dplyr case_when
|
||||
Sys.locale <- function() {
|
||||
sys <- base::Sys.getlocale()
|
||||
case_when(
|
||||
sys %like% '(Deutsch|German|de_)' ~ "de",
|
||||
sys %like% '(Nederlands|Dutch|nl_)' ~ "nl",
|
||||
sys %like% '(Espa.ol|Spanish|es_)' ~ "es",
|
||||
sys %like% '(Fran.ais|French|fr_)' ~ "fr",
|
||||
sys %like% '(Portugu.s|Portuguese|pt_)' ~ "pt",
|
||||
sys %like% '(Italiano|Italian|it_)' ~ "it",
|
||||
TRUE ~ "en"
|
||||
)
|
||||
}
|
||||
|
||||
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
|
||||
date_generic <- function(format) {
|
||||
if (!grepl('%', format, fixed = TRUE)) {
|
||||
|
49
R/mo.R
49
R/mo.R
@ -33,12 +33,12 @@
|
||||
#'
|
||||
#' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples.
|
||||
#'
|
||||
#' Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. These are:
|
||||
#' Thus function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example:
|
||||
#' \itemize{
|
||||
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
|
||||
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae}}
|
||||
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
|
||||
#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
|
||||
#' \item{Something like \code{"stau"} or \code{"staaur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
|
||||
#' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
|
||||
#' }
|
||||
#' Moreover, this function also supports ID's based on only Gram stain, when the species is not known. \cr
|
||||
#' For example, \code{"Gram negative rods"} and \code{"GNR"} will both return the ID of a Gram negative rod: \code{GNR}.
|
||||
@ -47,7 +47,7 @@
|
||||
#'
|
||||
#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571}
|
||||
#' @export
|
||||
#' @importFrom dplyr %>% pull left_join
|
||||
#' @importFrom dplyr %>% pull left_join arrange
|
||||
#' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
|
||||
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
|
||||
#' @examples
|
||||
@ -118,7 +118,10 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
}
|
||||
}
|
||||
|
||||
MOs <- AMR::microorganisms %>% filter(!mo %like% '^_FAM') # dont search in those
|
||||
MOs <- AMR::microorganisms %>%
|
||||
arrange(prevalence) %>% # more expected result on multiple findings
|
||||
filter(!mo %like% '^_FAM', # don't search in those
|
||||
(nchar(mo) > 3 | mo %in% c("GNR", "GPR", "GNC", "GPC"))) # no genera
|
||||
failures <- character(0)
|
||||
x_input <- x
|
||||
|
||||
@ -144,11 +147,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
x_withspaces_start <- paste0('^', x_withspaces)
|
||||
x_withspaces <- paste0('^', x_withspaces, '$')
|
||||
|
||||
# print(x)
|
||||
# print(x_withspaces_all)
|
||||
# print(x_withspaces_start)
|
||||
# print(x_withspaces)
|
||||
# print(x_backup)
|
||||
# cat(paste0('x "', x, '"\n'))
|
||||
# cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n'))
|
||||
# cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n'))
|
||||
# cat(paste0('x_withspaces "', x_withspaces, '"\n'))
|
||||
# cat(paste0('x_backup "', x_backup, '"\n'))
|
||||
|
||||
for (i in 1:length(x)) {
|
||||
if (identical(x_trimmed[i], "")) {
|
||||
@ -201,7 +204,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) == 'VRE') {
|
||||
x[i] <- 'ENC'
|
||||
x[i] <- 'ENCSPP'
|
||||
next
|
||||
}
|
||||
if (toupper(x_trimmed[i]) == 'MRPA') {
|
||||
@ -234,6 +237,13 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
next
|
||||
}
|
||||
|
||||
# try any match with genus, keeping spaces, not ending with $ ----
|
||||
found <- MOs[which(MOs$genus %like% x_withspaces_start[i] & MOs$mo %like% 'SPP$'),]$mo
|
||||
if (length(found) > 0) {
|
||||
x[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
# try any match keeping spaces, not ending with $ ----
|
||||
found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
|
||||
if (length(found) > 0) {
|
||||
@ -297,19 +307,6 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
|
||||
}
|
||||
|
||||
# avoid detection of Staphylococcus auricularis in case of S. aureus ----
|
||||
x[x == "STAAUC" & toupper(x_backup) != "STAAUC" & !x_backup %like% 'auri'] <- "STAAUR"
|
||||
# avoid detection of Entamoeba coli in case of E. coli ----
|
||||
x[x == "ENMCOL" & toupper(x_backup) != "ENMCOL" & !x_backup %like% '^ent?'] <- "ESCCOL"
|
||||
# avoid detection of Haematobacter influenzae in case of H. influenzae ----
|
||||
x[x == "HABINF" & toupper(x_backup) != "HABINF" & !x_backup %like% '^haema'] <- "HAEINF"
|
||||
# avoid detection of Pasteurella aerogenes in case of P. aeruginosa ----
|
||||
x[x == "PASAER" & toupper(x_backup) != "PASAER" & !(x_backup %like% '^pas?' | x_backup %like% 'aero')] <- "PSEAER"
|
||||
# avoid detection of Legionella non pneumophila in case of Legionella pneumophila ----
|
||||
x[x == "LEGNON" & toupper(x_backup) != "LEGNON" & !x_backup %like% 'non'] <- "LEGPNE"
|
||||
# avoid detection of Streptobacillus in case of Streptococcus ----
|
||||
x[x == "STB" & toupper(x_backup) != "STB" & !x_backup %like% 'streptob'] <- "STC"
|
||||
|
||||
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
||||
if (length(failures) > 0) {
|
||||
warning("These ", length(failures) , " values could not be coerced to a valid mo: ",
|
||||
@ -376,7 +373,9 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
||||
|
||||
# for the returned genera without species, add species ----
|
||||
# like "ESC" -> "ESCSPP", but only where the input contained it
|
||||
indices <- unique(x_input) %like% "[A-Z]{3}SPP" & !x %like% "[A-Z]{3}SPP"
|
||||
indices <- nchar(unique(x)) == 3 & !x %like% "[A-Z]{3}SPP" & !x %in% c("GNR", "GPR", "GNC", "GPC",
|
||||
"GNS", "GPS", "GNK", "GPK")
|
||||
indices <- indices[!is.na(indices)]
|
||||
x[indices] <- paste0(x[indices], 'SPP')
|
||||
|
||||
# left join the found results to the original input values (x_input)
|
||||
|
@ -207,7 +207,7 @@ mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = F
|
||||
#' @importFrom dplyr %>% case_when
|
||||
mo_translate <- function(x, language) {
|
||||
if (is.null(language)) {
|
||||
language <- mo_getlangcode()
|
||||
language <- Sys.locale()
|
||||
} else {
|
||||
language <- tolower(language[1])
|
||||
}
|
||||
@ -350,17 +350,3 @@ mo_translate <- function(x, language) {
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
#' @importFrom dplyr case_when
|
||||
mo_getlangcode <- function() {
|
||||
sys <- base::Sys.getlocale()
|
||||
case_when(
|
||||
sys %like% '(Deutsch|German|de_)' ~ "de",
|
||||
sys %like% '(Nederlands|Dutch|nl_)' ~ "nl",
|
||||
sys %like% '(Espa.ol|Spanish|es_)' ~ "es",
|
||||
sys %like% '(Fran.ais|French|fr_)' ~ "fr",
|
||||
sys %like% '(Portugu.s|Portuguese|pt_)' ~ "pt",
|
||||
sys %like% '(Italiano|Italian|it_)' ~ "it",
|
||||
TRUE ~ "en"
|
||||
)
|
||||
}
|
||||
|
Reference in New Issue
Block a user