From ba3b237bdf6d4c7c6d07804073d9d84f3355f9fc Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Wed, 28 Dec 2022 14:20:10 +0100 Subject: [PATCH] custom mo update --- DESCRIPTION | 4 +- NEWS.md | 2 +- R/add_custom_microorganisms.R | 82 +++++++++++++++++++------------- R/mo.R | 10 ++-- man/add_custom_microorganisms.Rd | 20 ++++---- 5 files changed, 66 insertions(+), 52 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab5fd5cb..391816c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9071 -Date: 2022-12-27 +Version: 1.8.2.9072 +Date: 2022-12-28 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index ac2b9210..fb432123 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9071 +# AMR 1.8.2.9072 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/add_custom_microorganisms.R b/R/add_custom_microorganisms.R index bf878676..ecf27866 100644 --- a/R/add_custom_microorganisms.R +++ b/R/add_custom_microorganisms.R @@ -30,7 +30,7 @@ #' Add Custom Microorganisms to This Package #' #' With [add_custom_microorganisms()] you can add your own custom antimicrobial drug codes to the `AMR` package. -#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "mo", "genus" and "species" +#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "genus" and "species" #' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*. #' #' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited. @@ -39,7 +39,7 @@ #' #' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method: #' -#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "ab" and "name") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. +#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "genus" and "species") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. #' #' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file: #' @@ -60,8 +60,7 @@ #' # Add custom antibiotic drug codes: #' library(AMR) #' add_custom_microorganisms( -#' data.frame(mo = "ENT_ASB_CLO", -#' genus = "Enterobacter", +#' data.frame(genus = "Enterobacter", #' species = "asburiae/cloacae") #' ) #' ``` @@ -80,36 +79,32 @@ #' # now add a custom entry - it will be considered by as.mo() and #' # all mo_*() functions #' add_custom_microorganisms( -#' data.frame(mo = "ENT_ASB_CLO", -#' genus = "Enterobacter", +#' data.frame(genus = "Enterobacter", #' species = "asburiae/cloacae" #' ) #' ) #' -#' # "ENT_ASB_CLO" is now a new microorganism: +#' # E. asburiae/cloacae is now a new microorganism: #' mo_name("Enterobacter asburiae/cloacae") -#' as.mo("ent_asb_clo") -#' mo_name("ent_asb_clo") +#' # its code: +#' as.mo("Enterobacter asburiae/cloacae") + #' # all internal algorithms will work as well: #' mo_name("Ent asburia cloacae") #' #' # and even the taxonomy was added based on the genus! -#' mo_family("ent_asb_clo") +#' mo_family("E. asburiae/cloacae") #' mo_gramstain("Enterobacter asburiae/cloacae") #' -#' mo_info("ent_asb_clo") +#' mo_info("Enterobacter asburiae/cloacae") #' } add_custom_microorganisms <- function(x) { meet_criteria(x, allow_class = "data.frame") - required_cols <- c("mo", "genus", "species") stop_ifnot( - all(required_cols %in% colnames(x)), - paste0("`x` must contain columns ", vector_and(required_cols, sort = FALSE), ".") - ) - stop_if( - any(x$mo %in% AMR_env$MO_lookup$mo), - "Microorganism code(s) ", vector_and(x$mo[x$mo %in% AMR_env$MO_lookup$mo]), " already exist in the internal `microorganisms` data set." + all(c("genus", "species") %in% colnames(x)), + paste0("`x` must contain columns ", vector_and(c("genus", "species"), sort = FALSE), ".") ) + # remove any extra class/type, such as grouped tbl, or data.table: x <- as.data.frame(x, stringsAsFactors = FALSE) # rename 'name' to 'fullname' if it's in the data set @@ -126,14 +121,14 @@ add_custom_microorganisms <- function(x) { x$genus <- trimws2(x$genus) x$species <- trimws2(x$species) x$subspecies <- trimws2(x$subspecies) - x$genus[x$genus == ""] <- NA_character_ - x$species[x$species == ""] <- NA_character_ - x$subspecies[x$subspecies == ""] <- NA_character_ - stop_if(any(x$genus[!is.na(x$genus)] %like% " "), + x$genus[is.na(x$genus)] <- "" + x$species[is.na(x$species)] <- "" + x$subspecies[is.na(x$subspecies)] <- "" + stop_if(any(x$genus %like% " "), "the 'genus' column must not contain spaces") - stop_if(any(x$species[!is.na(x$species)] %like% " "), + stop_if(any(x$species %like% " "), "the 'species' column must not contain spaces") - stop_if(any(x$subspecies[!is.na(x$subspecies)] %like% " "), + stop_if(any(x$subspecies %like% " "), "the 'subspecies' column must not contain spaces") if ("rank" %in% colnames(x)) { @@ -147,14 +142,13 @@ add_custom_microorganisms <- function(x) { call. = FALSE)))) } if (!"fullname" %in% colnames(x)) { - x$fullname <- paste(x$genus, x$species, x$subspecies) - x$fullname <- gsub(" NA", "", x$fullname) + x$fullname <- trimws2(paste(x$genus, x$species, x$subspecies)) } - if (!"kingdom" %in% colnames(x)) x$kingdom <- NA_character_ - if (!"phylum" %in% colnames(x)) x$phylum <- NA_character_ - if (!"class" %in% colnames(x)) x$class <- NA_character_ - if (!"order" %in% colnames(x)) x$order <- NA_character_ - if (!"family" %in% colnames(x)) x$family <- NA_character_ + if (!"kingdom" %in% colnames(x)) x$kingdom <- "" + if (!"phylum" %in% colnames(x)) x$phylum <- "" + if (!"class" %in% colnames(x)) x$class <- "" + if (!"order" %in% colnames(x)) x$order <- "" + if (!"family" %in% colnames(x)) x$family <- "" for (col in colnames(x)) { if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) { @@ -171,13 +165,32 @@ add_custom_microorganisms <- function(x) { x$order <- AMR_env$MO_lookup$order[match(x$genus, AMR_env$MO_lookup$genus)] x$family <- AMR_env$MO_lookup$family[match(x$genus, AMR_env$MO_lookup$genus)] + x$kingdom[is.na(x$kingdom)] <- "" + x$phylum[is.na(x$phylum)] <- "" + x$class[is.na(x$class)] <- "" + x$order[is.na(x$order)] <- "" + x$family[is.na(x$family)] <- "" + x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)] x$fullname_lower <- tolower(x$fullname) x$full_first <- substr(x$fullname_lower, 1, 1) x$species_first <- tolower(substr(x$species, 1, 1)) x$subspecies_first <- tolower(substr(x$subspecies, 1, 1)) - # add to pacakge ---- + if (!"mo" %in% colnames(x)) { + # create the mo code + x$mo <- NA_character_ + } + x$mo <- trimws2(x$mo) + x$mo[x$mo == ""] <- NA_character_ + x$mo[is.na(x$mo)] <- paste0("CUSTOM_", + toupper(abbreviate(gsub(" +", " _ ", + gsub("[^A-Za-z0-9-]", " ", + trimws2(paste(x$genus, x$species, x$subspecies)))), + minlength = 10, + named = FALSE))) + + # add to package ---- AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo) class(AMR_env$MO_lookup$mo) <- "character" @@ -193,8 +206,11 @@ add_custom_microorganisms <- function(x) { # assign new values new_df[, col] <- x[, col, drop = TRUE] } + + # clear previous coercions + suppressMessages(mo_reset_session()) + AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df)) - AMR_env$mo_previously_coerced <- AMR_env$mo_previously_coerced[which(!AMR_env$mo_previously_coerced$mo %in% new_df$mo), , drop = FALSE] class(AMR_env$MO_lookup$mo) <- c("mo", "character") message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.") } diff --git a/R/mo.R b/R/mo.R index 7456fda3..f1f52dca 100755 --- a/R/mo.R +++ b/R/mo.R @@ -182,11 +182,11 @@ as.mo <- function(x, x_lower <- tolower(x) - complexes <- x[trimws2(x_lower) %like_case% "complex$"] - if (length(complexes) > 0) { - warning_("in `as.mo()`: translation of complexes is not supported at the moment - the input text 'complex' was ignored in ", length(complexes), " cases.", call = FALSE) + complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"] + if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex())) { + warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE) } - + # WHONET: xxx = no growth x[x_lower %in% c("", "xxx", "na", "nan")] <- NA_character_ @@ -536,7 +536,7 @@ mo_cleaning_regex <- function() { "|", "([({]|\\[).+([})]|\\])", "|", - "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|serogr.?up|e?species|complex)[.]*( |$))" + "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|serogr.?up|e?species|group|complex)[.]*( |$))" ) } diff --git a/man/add_custom_microorganisms.Rd b/man/add_custom_microorganisms.Rd index 541ef360..7b764a76 100644 --- a/man/add_custom_microorganisms.Rd +++ b/man/add_custom_microorganisms.Rd @@ -10,7 +10,7 @@ add_custom_microorganisms(x) clear_custom_microorganisms() } \arguments{ -\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "mo", "genus" and "species"} +\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "genus" and "species"} } \description{ With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom antimicrobial drug codes to the \code{AMR} package. @@ -24,7 +24,7 @@ There are two ways to automate this process: \strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method: \enumerate{ -\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "ab" and "name") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. +\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "genus" and "species") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. \item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file: \if{html}{\out{
}}\preformatted{# Add custom microorganism codes: @@ -42,8 +42,7 @@ Upon package load, this file will be loaded and run through the \code{\link[=add \if{html}{\out{
}}\preformatted{ # Add custom antibiotic drug codes: library(AMR) add_custom_microorganisms( - data.frame(mo = "ENT_ASB_CLO", - genus = "Enterobacter", + data.frame(genus = "Enterobacter", species = "asburiae/cloacae") ) }\if{html}{\out{
}} @@ -61,24 +60,23 @@ mo_name("Enterobacter asburiae/cloacae") # now add a custom entry - it will be considered by as.mo() and # all mo_*() functions add_custom_microorganisms( - data.frame(mo = "ENT_ASB_CLO", - genus = "Enterobacter", + data.frame(genus = "Enterobacter", species = "asburiae/cloacae" ) ) -# "ENT_ASB_CLO" is now a new microorganism: +# E. asburiae/cloacae is now a new microorganism: mo_name("Enterobacter asburiae/cloacae") -as.mo("ent_asb_clo") -mo_name("ent_asb_clo") +# its code: +as.mo("Enterobacter asburiae/cloacae") # all internal algorithms will work as well: mo_name("Ent asburia cloacae") # and even the taxonomy was added based on the genus! -mo_family("ent_asb_clo") +mo_family("E. asburiae/cloacae") mo_gramstain("Enterobacter asburiae/cloacae") -mo_info("ent_asb_clo") +mo_info("Enterobacter asburiae/cloacae") } } \seealso{