1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-25 18:46:11 +01:00

custom mo update

This commit is contained in:
dr. M.S. (Matthijs) Berends 2022-12-28 14:20:10 +01:00
parent fe09e9252b
commit ba3b237bdf
5 changed files with 66 additions and 52 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 1.8.2.9071 Version: 1.8.2.9072
Date: 2022-12-27 Date: 2022-12-28
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9071 # AMR 1.8.2.9072
*(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

View File

@ -30,7 +30,7 @@
#' Add Custom Microorganisms to This Package #' Add Custom Microorganisms to This Package
#' #'
#' With [add_custom_microorganisms()] you can add your own custom antimicrobial drug codes to the `AMR` package. #' With [add_custom_microorganisms()] you can add your own custom antimicrobial drug codes to the `AMR` package.
#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "mo", "genus" and "species" #' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "genus" and "species"
#' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*. #' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*.
#' #'
#' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited. #' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited.
@ -39,7 +39,7 @@
#' #'
#' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method: #' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method:
#' #'
#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "ab" and "name") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. #' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "genus" and "species") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location.
#' #'
#' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file: #' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file:
#' #'
@ -60,8 +60,7 @@
#' # Add custom antibiotic drug codes: #' # Add custom antibiotic drug codes:
#' library(AMR) #' library(AMR)
#' add_custom_microorganisms( #' add_custom_microorganisms(
#' data.frame(mo = "ENT_ASB_CLO", #' data.frame(genus = "Enterobacter",
#' genus = "Enterobacter",
#' species = "asburiae/cloacae") #' species = "asburiae/cloacae")
#' ) #' )
#' ``` #' ```
@ -80,36 +79,32 @@
#' # now add a custom entry - it will be considered by as.mo() and #' # now add a custom entry - it will be considered by as.mo() and
#' # all mo_*() functions #' # all mo_*() functions
#' add_custom_microorganisms( #' add_custom_microorganisms(
#' data.frame(mo = "ENT_ASB_CLO", #' data.frame(genus = "Enterobacter",
#' genus = "Enterobacter",
#' species = "asburiae/cloacae" #' species = "asburiae/cloacae"
#' ) #' )
#' ) #' )
#' #'
#' # "ENT_ASB_CLO" is now a new microorganism: #' # E. asburiae/cloacae is now a new microorganism:
#' mo_name("Enterobacter asburiae/cloacae") #' mo_name("Enterobacter asburiae/cloacae")
#' as.mo("ent_asb_clo") #' # its code:
#' mo_name("ent_asb_clo") #' as.mo("Enterobacter asburiae/cloacae")
#' # all internal algorithms will work as well: #' # all internal algorithms will work as well:
#' mo_name("Ent asburia cloacae") #' mo_name("Ent asburia cloacae")
#' #'
#' # and even the taxonomy was added based on the genus! #' # and even the taxonomy was added based on the genus!
#' mo_family("ent_asb_clo") #' mo_family("E. asburiae/cloacae")
#' mo_gramstain("Enterobacter asburiae/cloacae") #' mo_gramstain("Enterobacter asburiae/cloacae")
#' #'
#' mo_info("ent_asb_clo") #' mo_info("Enterobacter asburiae/cloacae")
#' } #' }
add_custom_microorganisms <- function(x) { add_custom_microorganisms <- function(x) {
meet_criteria(x, allow_class = "data.frame") meet_criteria(x, allow_class = "data.frame")
required_cols <- c("mo", "genus", "species")
stop_ifnot( stop_ifnot(
all(required_cols %in% colnames(x)), all(c("genus", "species") %in% colnames(x)),
paste0("`x` must contain columns ", vector_and(required_cols, sort = FALSE), ".") paste0("`x` must contain columns ", vector_and(c("genus", "species"), sort = FALSE), ".")
)
stop_if(
any(x$mo %in% AMR_env$MO_lookup$mo),
"Microorganism code(s) ", vector_and(x$mo[x$mo %in% AMR_env$MO_lookup$mo]), " already exist in the internal `microorganisms` data set."
) )
# remove any extra class/type, such as grouped tbl, or data.table: # remove any extra class/type, such as grouped tbl, or data.table:
x <- as.data.frame(x, stringsAsFactors = FALSE) x <- as.data.frame(x, stringsAsFactors = FALSE)
# rename 'name' to 'fullname' if it's in the data set # rename 'name' to 'fullname' if it's in the data set
@ -126,14 +121,14 @@ add_custom_microorganisms <- function(x) {
x$genus <- trimws2(x$genus) x$genus <- trimws2(x$genus)
x$species <- trimws2(x$species) x$species <- trimws2(x$species)
x$subspecies <- trimws2(x$subspecies) x$subspecies <- trimws2(x$subspecies)
x$genus[x$genus == ""] <- NA_character_ x$genus[is.na(x$genus)] <- ""
x$species[x$species == ""] <- NA_character_ x$species[is.na(x$species)] <- ""
x$subspecies[x$subspecies == ""] <- NA_character_ x$subspecies[is.na(x$subspecies)] <- ""
stop_if(any(x$genus[!is.na(x$genus)] %like% " "), stop_if(any(x$genus %like% " "),
"the 'genus' column must not contain spaces") "the 'genus' column must not contain spaces")
stop_if(any(x$species[!is.na(x$species)] %like% " "), stop_if(any(x$species %like% " "),
"the 'species' column must not contain spaces") "the 'species' column must not contain spaces")
stop_if(any(x$subspecies[!is.na(x$subspecies)] %like% " "), stop_if(any(x$subspecies %like% " "),
"the 'subspecies' column must not contain spaces") "the 'subspecies' column must not contain spaces")
if ("rank" %in% colnames(x)) { if ("rank" %in% colnames(x)) {
@ -147,14 +142,13 @@ add_custom_microorganisms <- function(x) {
call. = FALSE)))) call. = FALSE))))
} }
if (!"fullname" %in% colnames(x)) { if (!"fullname" %in% colnames(x)) {
x$fullname <- paste(x$genus, x$species, x$subspecies) x$fullname <- trimws2(paste(x$genus, x$species, x$subspecies))
x$fullname <- gsub(" NA", "", x$fullname)
} }
if (!"kingdom" %in% colnames(x)) x$kingdom <- NA_character_ if (!"kingdom" %in% colnames(x)) x$kingdom <- ""
if (!"phylum" %in% colnames(x)) x$phylum <- NA_character_ if (!"phylum" %in% colnames(x)) x$phylum <- ""
if (!"class" %in% colnames(x)) x$class <- NA_character_ if (!"class" %in% colnames(x)) x$class <- ""
if (!"order" %in% colnames(x)) x$order <- NA_character_ if (!"order" %in% colnames(x)) x$order <- ""
if (!"family" %in% colnames(x)) x$family <- NA_character_ if (!"family" %in% colnames(x)) x$family <- ""
for (col in colnames(x)) { for (col in colnames(x)) {
if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) { if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) {
@ -171,13 +165,32 @@ add_custom_microorganisms <- function(x) {
x$order <- AMR_env$MO_lookup$order[match(x$genus, AMR_env$MO_lookup$genus)] x$order <- AMR_env$MO_lookup$order[match(x$genus, AMR_env$MO_lookup$genus)]
x$family <- AMR_env$MO_lookup$family[match(x$genus, AMR_env$MO_lookup$genus)] x$family <- AMR_env$MO_lookup$family[match(x$genus, AMR_env$MO_lookup$genus)]
x$kingdom[is.na(x$kingdom)] <- ""
x$phylum[is.na(x$phylum)] <- ""
x$class[is.na(x$class)] <- ""
x$order[is.na(x$order)] <- ""
x$family[is.na(x$family)] <- ""
x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)] x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)]
x$fullname_lower <- tolower(x$fullname) x$fullname_lower <- tolower(x$fullname)
x$full_first <- substr(x$fullname_lower, 1, 1) x$full_first <- substr(x$fullname_lower, 1, 1)
x$species_first <- tolower(substr(x$species, 1, 1)) x$species_first <- tolower(substr(x$species, 1, 1))
x$subspecies_first <- tolower(substr(x$subspecies, 1, 1)) x$subspecies_first <- tolower(substr(x$subspecies, 1, 1))
# add to pacakge ---- if (!"mo" %in% colnames(x)) {
# create the mo code
x$mo <- NA_character_
}
x$mo <- trimws2(x$mo)
x$mo[x$mo == ""] <- NA_character_
x$mo[is.na(x$mo)] <- paste0("CUSTOM_",
toupper(abbreviate(gsub(" +", " _ ",
gsub("[^A-Za-z0-9-]", " ",
trimws2(paste(x$genus, x$species, x$subspecies)))),
minlength = 10,
named = FALSE)))
# add to package ----
AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo) AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
class(AMR_env$MO_lookup$mo) <- "character" class(AMR_env$MO_lookup$mo) <- "character"
@ -193,8 +206,11 @@ add_custom_microorganisms <- function(x) {
# assign new values # assign new values
new_df[, col] <- x[, col, drop = TRUE] new_df[, col] <- x[, col, drop = TRUE]
} }
# clear previous coercions
suppressMessages(mo_reset_session())
AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df)) AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df))
AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[which(!AMR_env$mo_previously_coerced$mo %in% new_df$mo), , drop = FALSE]
class(AMR_env$MO_lookup$mo) <- c("mo", "character") class(AMR_env$MO_lookup$mo) <- c("mo", "character")
message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.") message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.")
} }

8
R/mo.R
View File

@ -182,9 +182,9 @@ as.mo <- function(x,
x_lower <- tolower(x) x_lower <- tolower(x)
complexes <- x[trimws2(x_lower) %like_case% "complex$"] complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"]
if (length(complexes) > 0) { if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex())) {
warning_("in `as.mo()`: translation of complexes is not supported at the moment - the input text 'complex' was ignored in ", length(complexes), " cases.", call = FALSE) warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE)
} }
# WHONET: xxx = no growth # WHONET: xxx = no growth
@ -536,7 +536,7 @@ mo_cleaning_regex <- function() {
"|", "|",
"([({]|\\[).+([})]|\\])", "([({]|\\[).+([})]|\\])",
"|", "|",
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|serogr.?up|e?species|complex)[.]*( |$))" "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|serogr.?up|e?species|group|complex)[.]*( |$))"
) )
} }

View File

@ -10,7 +10,7 @@ add_custom_microorganisms(x)
clear_custom_microorganisms() clear_custom_microorganisms()
} }
\arguments{ \arguments{
\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "mo", "genus" and "species"} \item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "genus" and "species"}
} }
\description{ \description{
With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom antimicrobial drug codes to the \code{AMR} package. With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom antimicrobial drug codes to the \code{AMR} package.
@ -24,7 +24,7 @@ There are two ways to automate this process:
\strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method: \strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method:
\enumerate{ \enumerate{
\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "ab" and "name") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. \item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "genus" and "species") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location.
\item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file: \item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file:
\if{html}{\out{<div class="sourceCode r">}}\preformatted{# Add custom microorganism codes: \if{html}{\out{<div class="sourceCode r">}}\preformatted{# Add custom microorganism codes:
@ -42,8 +42,7 @@ Upon package load, this file will be loaded and run through the \code{\link[=add
\if{html}{\out{<div class="sourceCode r">}}\preformatted{ # Add custom antibiotic drug codes: \if{html}{\out{<div class="sourceCode r">}}\preformatted{ # Add custom antibiotic drug codes:
library(AMR) library(AMR)
add_custom_microorganisms( add_custom_microorganisms(
data.frame(mo = "ENT_ASB_CLO", data.frame(genus = "Enterobacter",
genus = "Enterobacter",
species = "asburiae/cloacae") species = "asburiae/cloacae")
) )
}\if{html}{\out{</div>}} }\if{html}{\out{</div>}}
@ -61,24 +60,23 @@ mo_name("Enterobacter asburiae/cloacae")
# now add a custom entry - it will be considered by as.mo() and # now add a custom entry - it will be considered by as.mo() and
# all mo_*() functions # all mo_*() functions
add_custom_microorganisms( add_custom_microorganisms(
data.frame(mo = "ENT_ASB_CLO", data.frame(genus = "Enterobacter",
genus = "Enterobacter",
species = "asburiae/cloacae" species = "asburiae/cloacae"
) )
) )
# "ENT_ASB_CLO" is now a new microorganism: # E. asburiae/cloacae is now a new microorganism:
mo_name("Enterobacter asburiae/cloacae") mo_name("Enterobacter asburiae/cloacae")
as.mo("ent_asb_clo") # its code:
mo_name("ent_asb_clo") as.mo("Enterobacter asburiae/cloacae")
# all internal algorithms will work as well: # all internal algorithms will work as well:
mo_name("Ent asburia cloacae") mo_name("Ent asburia cloacae")
# and even the taxonomy was added based on the genus! # and even the taxonomy was added based on the genus!
mo_family("ent_asb_clo") mo_family("E. asburiae/cloacae")
mo_gramstain("Enterobacter asburiae/cloacae") mo_gramstain("Enterobacter asburiae/cloacae")
mo_info("ent_asb_clo") mo_info("Enterobacter asburiae/cloacae")
} }
} }
\seealso{ \seealso{