From ca790686049ff35c4a4d2a1372b0d76aab19ae24 Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Sat, 14 Jan 2023 17:10:10 +0100 Subject: [PATCH] Fix for adding MO's --- DESCRIPTION | 4 +- NEWS.md | 2 +- ...timicrobials.R => custom_antimicrobials.R} | 0 ...croorganisms.R => custom_microorganisms.R} | 100 ++++++++++++------ R/mo.R | 71 +++++++------ R/mo_matching_score.R | 6 +- R/mo_property.R | 1 + index.md | 6 +- man/add_custom_antimicrobials.Rd | 2 +- man/add_custom_microorganisms.Rd | 19 +++- man/as.mo.Rd | 6 +- man/mo_matching_score.Rd | 6 +- pkgdown/extra.js | 10 +- 13 files changed, 147 insertions(+), 86 deletions(-) rename R/{add_custom_antimicrobials.R => custom_antimicrobials.R} (100%) rename R/{add_custom_microorganisms.R => custom_microorganisms.R} (70%) diff --git a/DESCRIPTION b/DESCRIPTION index 088e68ee..af205b66 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.8.2.9091 -Date: 2023-01-12 +Version: 1.8.2.9092 +Date: 2023-01-14 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index 10cab4f9..33e8953b 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 1.8.2.9091 +# AMR 1.8.2.9092 *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* diff --git a/R/add_custom_antimicrobials.R b/R/custom_antimicrobials.R similarity index 100% rename from R/add_custom_antimicrobials.R rename to R/custom_antimicrobials.R diff --git a/R/add_custom_microorganisms.R b/R/custom_microorganisms.R similarity index 70% rename from R/add_custom_microorganisms.R rename to R/custom_microorganisms.R index 37bfed07..74bd2e15 100644 --- a/R/add_custom_microorganisms.R +++ b/R/custom_microorganisms.R @@ -30,7 +30,7 @@ #' Add Custom Microorganisms to This Package #' #' With [add_custom_microorganisms()] you can add your own custom microorganisms to the `AMR` package, such the non-taxonomic outcome of laboratory analysis. -#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "genus" and "species" +#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing column "genus" (case-insensitive) #' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*. #' #' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited. @@ -39,7 +39,7 @@ #' #' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method: #' -#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "genus" and "species") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. +#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least column "genus") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. #' #' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file: #' @@ -98,16 +98,27 @@ #' mo_gramstain("Enterobacter asburiae/cloacae") #' #' mo_info("Enterobacter asburiae/cloacae") +#' +#' +#' # the function tries to be forgiving: +#' add_custom_microorganisms( +#' data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA", +#' SPECIES = "SPECIES")) +#' mo_name("ESCHERICHIA / KLEBSIELLA") +#' mo_family("Escherichia/Klebsiella") +#' +#' add_custom_microorganisms( +#' data.frame(genus = "Citrobacter", species = "freundii complex")) +#' mo_name("C. freundii complex") +#' mo_gramstain("C. freundii complex") #' } add_custom_microorganisms <- function(x) { meet_criteria(x, allow_class = "data.frame") - stop_ifnot( - all(c("genus", "species") %in% colnames(x)), - paste0("`x` must contain columns ", vector_and(c("genus", "species"), sort = FALSE), ".") - ) + stop_ifnot("genus" %in% tolower(colnames(x)), paste0("`x` must contain column 'genus'.")) # remove any extra class/type, such as grouped tbl, or data.table: x <- as.data.frame(x, stringsAsFactors = FALSE) + colnames(x) <- tolower(colnames(x)) # rename 'name' to 'fullname' if it's in the data set if ("name" %in% colnames(x) && !"fullname" %in% colnames(x)) { colnames(x)[colnames(x) == "name"] <- "fullname" @@ -116,21 +127,30 @@ add_custom_microorganisms <- function(x) { x <- x[, colnames(AMR_env$MO_lookup)[colnames(AMR_env$MO_lookup) %in% colnames(x)], drop = FALSE] # clean the input ---- - if (!"subspecies" %in% colnames(x)) { - x$subspecies <- NA_character_ + for (col in c("genus", "species", "subspecies")) { + if (!col %in% colnames(x)) { + x[, col] <- "" + } + if (is.factor(x[, col, drop = TRUE])) { + x[, col] <- as.character(x[, col, drop = TRUE]) + } + col_ <- x[, col, drop = TRUE] + col_ <- tolower(trimws2(col_)) + col_[col_ %like% "(sub)?species"] <- "" + col_ <- gsub(" *([/-]) *", "\\1", col_, perl = TRUE) + # groups are in our taxonomic table with a capital G, and complexes might be added by the user + col_ <- gsub(" group( |$)", " Group\\1", col_, perl = TRUE) + col_ <- gsub(" complex( |$)", " Complex\\1", col_, perl = TRUE) + + col_[is.na(col_)] <- "" + if (col == "genus") { + substr(col_, 1, 1) <- toupper(substr(col_, 1, 1)) + col_ <- gsub("/([a-z])", "/\\U\\1", col_, perl = TRUE) + stop_if(any(col_ == ""), "the 'genus' column cannot be empty") + stop_if(any(col_ %like% " "), "the 'genus' column must not contain spaces") + } + x[, col] <- col_ } - x$genus <- trimws2(x$genus) - x$species <- trimws2(x$species) - x$subspecies <- trimws2(x$subspecies) - x$genus[is.na(x$genus)] <- "" - x$species[is.na(x$species)] <- "" - x$subspecies[is.na(x$subspecies)] <- "" - stop_if(any(x$genus %like% " "), - "the 'genus' column must not contain spaces") - stop_if(any(x$species %like% " "), - "the 'species' column must not contain spaces") - stop_if(any(x$subspecies %like% " "), - "the 'subspecies' column must not contain spaces") if ("rank" %in% colnames(x)) { stop_ifnot(all(x$rank %in% AMR_env$MO_lookup$rank), @@ -139,7 +159,7 @@ add_custom_microorganisms <- function(x) { x$rank <- ifelse(x$subspecies != "", "subspecies", ifelse(x$species != "", "species", ifelse(x$genus != "", "genus", - stop("in add_custom_microorganisms(): the 'genus' column cannot be empty", + stop("in add_custom_microorganisms(): only microorganisms up to the genus level can be added", call. = FALSE)))) } x$source <- "Added by user" @@ -158,22 +178,29 @@ add_custom_microorganisms <- function(x) { x$family[is.na(x$family)] <- "" for (col in colnames(x)) { + if (is.factor(x[, col, drop = TRUE])) { + x[, col] <- as.character(x[, col, drop = TRUE]) + } if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) { x[, col] <- as.list(x[, col, drop = TRUE]) } } - # fill in other columns + # fill in taxonomy based on genus + genus_to_check <- gsub("^(.*)[^a-zA-Z].*", "\\1", x$genus, perl = TRUE) + x$kingdom[which(x$kingdom == "" & genus_to_check != "")] <- AMR_env$MO_lookup$kingdom[match(genus_to_check[which(x$kingdom == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)] + x$phylum[which(x$phylum == "" & genus_to_check != "")] <- AMR_env$MO_lookup$phylum[match(genus_to_check[which(x$phylum == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)] + x$class[which(x$class == "" & genus_to_check != "")] <- AMR_env$MO_lookup$class[match(genus_to_check[which(x$class == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)] + x$order[which(x$order == "" & genus_to_check != "")] <- AMR_env$MO_lookup$order[match(genus_to_check[which(x$order == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)] + x$family[which(x$family == "" & genus_to_check != "")] <- AMR_env$MO_lookup$family[match(genus_to_check[which(x$family == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)] + + # fill in other columns that are used in internal algorithms x$status <- "accepted" x$prevalence <- 1 - - x$kingdom[which(x$kingdom == "" & x$genus != "")] <- AMR_env$MO_lookup$kingdom[match(x$genus[which(x$kingdom == "" & x$genus != "")], AMR_env$MO_lookup$genus)] - x$phylum[which(x$phylum == "" & x$genus != "")] <- AMR_env$MO_lookup$phylum[match(x$genus[which(x$phylum == "" & x$genus != "")], AMR_env$MO_lookup$genus)] - x$class[which(x$class == "" & x$genus != "")] <- AMR_env$MO_lookup$class[match(x$genus[which(x$class == "" & x$genus != "")], AMR_env$MO_lookup$genus)] - x$order[which(x$order == "" & x$genus != "")] <- AMR_env$MO_lookup$order[match(x$genus[which(x$order == "" & x$genus != "")], AMR_env$MO_lookup$genus)] - x$family[which(x$family == "" & x$genus != "")] <- AMR_env$MO_lookup$family[match(x$genus[which(x$family == "" & x$genus != "")], AMR_env$MO_lookup$genus)] - - x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)] + x$ref <- paste("Self-added,", format(Sys.Date(), "%Y")) + x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(genus_to_check, AMR_env$MO_lookup$genus)] + # complete missing kingdom index, so mo_matching_score() will not return NA + x$kingdom_index[is.na(x$kingdom_index)] <- 1 x$fullname_lower <- tolower(x$fullname) x$full_first <- substr(x$fullname_lower, 1, 1) x$species_first <- tolower(substr(x$species, 1, 1)) @@ -183,18 +210,19 @@ add_custom_microorganisms <- function(x) { # create the mo code x$mo <- NA_character_ } - x$mo <- trimws2(x$mo) + x$mo <- trimws2(as.character(x$mo)) x$mo[x$mo == ""] <- NA_character_ + current <- sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE) x$mo[is.na(x$mo)] <- paste0("CUSTOM", - seq(from = sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE) + 1, to = nrow(x), by = 1), + seq.int(from = current + 1, to = current + nrow(x), by = 1), "_", toupper(unname(abbreviate(gsub(" +", " _ ", gsub("[^A-Za-z0-9-]", " ", trimws2(paste(x$genus, x$species, x$subspecies)))), minlength = 10)))) + stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package") # add to package ---- - AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo) class(AMR_env$MO_lookup$mo) <- "character" @@ -215,7 +243,11 @@ add_custom_microorganisms <- function(x) { AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df)) class(AMR_env$MO_lookup$mo) <- c("mo", "character") - message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.") + if (nrow(x) <= 3) { + message_("Added ", vector_and(italicise(x$fullname), quotes = FALSE), " to the internal `microorganisms` data set.") + } else { + message_("Added ", nr2char(nrow(x)), " records to the internal `microorganisms` data set.") + } } #' @rdname add_custom_microorganisms diff --git a/R/mo.R b/R/mo.R index 27d6982c..93258829 100755 --- a/R/mo.R +++ b/R/mo.R @@ -183,7 +183,7 @@ as.mo <- function(x, x_lower <- tolower(x) complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"] - if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex())) { + if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex()) && !any(AMR_env$MO_lookup$fullname[which(AMR_env$MO_lookup$source == "Added by user")] %like% "(group|complex)", na.rm = TRUE)) { warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE) } @@ -222,7 +222,7 @@ as.mo <- function(x, if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { message_( "Returning previously coerced value", ifelse(sum(is.na(old) & !is.na(new)) > 1, "s", ""), - " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this." + " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this. This note will be shown once per session for this input." ) } @@ -236,8 +236,9 @@ as.mo <- function(x, x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") ")] <- NA_character_ - # groups are in our taxonomic table with a capital G - x <- gsub(" group ", " Group ", x, fixed = TRUE) + # groups are in our taxonomic table with a capital G, and complexes might be added by the user + x <- gsub(" group( |$)", " Group\\1", x, perl = TRUE) + x <- gsub(" complex( |$)", " Complex\\1", x, perl = TRUE) # run over all unique leftovers x_unique <- unique(x[is.na(out) & !is.na(x)]) @@ -281,11 +282,11 @@ as.mo <- function(x, first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]") filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars) } else if (nchar(x_out) == 4) { - # no space and 4 characters - probably a code such as STAU or ESCO! + # no space and 4 characters - probably a code such as STAU or ESCO msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE))) filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4))) } else if (nchar(x_out) <= 6) { - # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL! + # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3)) second_part <- substr(x_out, 4, nchar(x_out)) msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE))) @@ -992,6 +993,9 @@ italicise <- function(x) { gsub("Salmonella ", "", x[x %like_case% "Salmonella [A-Z]"])) out[x %like_case% "Streptococcus [A-Z]"] <- paste(font_italic("Streptococcus"), gsub("Streptococcus ", "", x[x %like_case% "Streptococcus [A-Z]"])) + if (has_colour()) { + out <- gsub("(Group|group|Complex|complex)(\033\\[23m)?", "\033[23m\\1", out, perl = TRUE) + } out } @@ -1009,33 +1013,36 @@ nr2char <- function(x) { parse_and_convert <- function(x) { if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) { - return(trimws2(x)) - } - tryCatch( - { - if (!is.null(dim(x))) { - if (NCOL(x) > 2) { - stop("a maximum of two columns is allowed", call. = FALSE) - } else if (NCOL(x) == 2) { - # support Tidyverse selection like: df %>% select(colA, colB) - # paste these columns together - x <- as.data.frame(x, stringsAsFactors = FALSE) - colnames(x) <- c("A", "B") - x <- paste(x$A, x$B) - } else { - # support Tidyverse selection like: df %>% select(colA) - x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]] + out <- x + } else { + out <- tryCatch( + { + if (!is.null(dim(x))) { + if (NCOL(x) > 2) { + stop("a maximum of two columns is allowed", call. = FALSE) + } else if (NCOL(x) == 2) { + # support Tidyverse selection like: df %>% select(colA, colB) + # paste these columns together + x <- as.data.frame(x, stringsAsFactors = FALSE) + colnames(x) <- c("A", "B") + x <- paste(x$A, x$B) + } else { + # support Tidyverse selection like: df %>% select(colA) + x <- as.data.frame(x, stringsAsFactors = FALSE)[[1]] + } } - } - parsed <- iconv(as.character(x), to = "UTF-8") - parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") - parsed <- gsub('"', "", parsed, fixed = TRUE) - parsed <- gsub(" +", " ", parsed, perl = TRUE) - parsed - }, - error = function(e) stop(e$message, call. = FALSE) - ) # this will also be thrown when running `as.mo(no_existing_object)` - trimws2(parsed) + parsed <- iconv(as.character(x), to = "UTF-8") + parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") + parsed <- gsub('"', "", parsed, fixed = TRUE) + parsed + }, + error = function(e) stop(e$message, call. = FALSE) + ) # this will also be thrown when running `as.mo(no_existing_object)` + } + out <- trimws2(out) + out <- gsub(" +", " ", out, perl = TRUE) + out <- gsub(" ?/ ? ", "/", out, perl = TRUE) + out } replace_old_mo_codes <- function(x, property) { diff --git a/R/mo_matching_score.R b/R/mo_matching_score.R index 50da212e..42e694d1 100755 --- a/R/mo_matching_score.R +++ b/R/mo_matching_score.R @@ -39,7 +39,9 @@ #' @section Matching Score for Microorganisms: #' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as: #' -#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} +#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{ +#' +#' \ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} #' #' where: #' @@ -47,7 +49,7 @@ #' * \eqn{n} is a taxonomic name (genus, species, and subspecies); #' * \eqn{l_n} is the length of \eqn{n}; #' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; -#' * \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; +#' * \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below; #' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. #' #' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups: diff --git a/R/mo_property.R b/R/mo_property.R index dca10b53..e79f413e 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -777,6 +777,7 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A info <- lapply(x, function(y) { c( + list(identifier = x), mo_taxonomy(y, language = language, keep_synonyms = keep_synonyms), list( status = mo_status(y, language = language, keep_synonyms = keep_synonyms), diff --git a/index.md b/index.md index f47d1225..3413852f 100644 --- a/index.md +++ b/index.md @@ -15,11 +15,11 @@ ### Introduction -The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. +The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](./authors.html) from around the globe are continually helping us to make this a successful and durable project! This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)). -After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl), and is being [actively and durably maintained](./news) by two public healthcare organisations in the Netherlands. +After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl). ##### Used in 175 countries, translated to 16 languages @@ -27,7 +27,7 @@ After installing this package, R knows [**~52,000 distinct microbial species**]( Since its first public release in early 2018, this R package has been used in almost all countries in the world. Click the map to enlarge and to see the country names. -The `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish, and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. +With the help of contributors from all corners of the world, the `AMR` package is available in English, Chinese, Danish, Dutch, French, German, Greek, Italian, Japanese, Polish, Portuguese, Russian, Spanish, Swedish, Turkish, and Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. ### Practical examples diff --git a/man/add_custom_antimicrobials.Rd b/man/add_custom_antimicrobials.Rd index 63b970e1..534aaf69 100644 --- a/man/add_custom_antimicrobials.Rd +++ b/man/add_custom_antimicrobials.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_custom_antimicrobials.R +% Please edit documentation in R/custom_antimicrobials.R \name{add_custom_antimicrobials} \alias{add_custom_antimicrobials} \alias{clear_custom_antimicrobials} diff --git a/man/add_custom_microorganisms.Rd b/man/add_custom_microorganisms.Rd index 891db595..caa82878 100644 --- a/man/add_custom_microorganisms.Rd +++ b/man/add_custom_microorganisms.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_custom_microorganisms.R +% Please edit documentation in R/custom_microorganisms.R \name{add_custom_microorganisms} \alias{add_custom_microorganisms} \alias{clear_custom_microorganisms} @@ -10,7 +10,7 @@ add_custom_microorganisms(x) clear_custom_microorganisms() } \arguments{ -\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "genus" and "species"} +\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing column "genus" (case-insensitive)} } \description{ With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom microorganisms to the \code{AMR} package, such the non-taxonomic outcome of laboratory analysis. @@ -24,7 +24,7 @@ There are two ways to automate this process: \strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method: \enumerate{ -\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "genus" and "species") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. +\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least column "genus") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. \item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file: \if{html}{\out{
}}\preformatted{# Add custom microorganism codes: @@ -79,6 +79,19 @@ mo_family("E. asburiae/cloacae") mo_gramstain("Enterobacter asburiae/cloacae") mo_info("Enterobacter asburiae/cloacae") + + +# the function tries to be forgiving: +add_custom_microorganisms( + data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA", + SPECIES = "SPECIES")) +mo_name("ESCHERICHIA / KLEBSIELLA") +mo_family("Escherichia/Klebsiella") + +add_custom_microorganisms( + data.frame(genus = "Citrobacter", species = "freundii complex")) +mo_name("C. freundii complex") +mo_gramstain("C. freundii complex") } } \seealso{ diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 9b6ba030..cad7f4ab 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -131,7 +131,9 @@ The coercion rules consider the prevalence of microorganisms in humans, which is With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as: -\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} +\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{ + +\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} where: \itemize{ @@ -139,7 +141,7 @@ where: \item \eqn{n} is a taxonomic name (genus, species, and subspecies); \item \eqn{l_n} is the length of \eqn{n}; \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; -\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; +\item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below; \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. } diff --git a/man/mo_matching_score.Rd b/man/mo_matching_score.Rd index 5f8629a4..73fbcbd8 100644 --- a/man/mo_matching_score.Rd +++ b/man/mo_matching_score.Rd @@ -23,7 +23,9 @@ Later, the work of Bartlett A \emph{et al.} about bacterial pathogens infecting With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as: -\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} +\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{ + +\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} where: \itemize{ @@ -31,7 +33,7 @@ where: \item \eqn{n} is a taxonomic name (genus, species, and subspecies); \item \eqn{l_n} is the length of \eqn{n}; \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; -\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; +\item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below; \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. } diff --git a/pkgdown/extra.js b/pkgdown/extra.js index 3d6ac7e7..19d3fe0d 100644 --- a/pkgdown/extra.js +++ b/pkgdown/extra.js @@ -34,12 +34,14 @@ $(document).ready(function() { // Replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page $(".developers h2").text("Maintainers"); $(".citation h2:first").text("All contributors"); + $(".citation h2:second").text("How to Cite"); - // replace \donttest and \dontrun texts in Examples + // remove \donttest and \dontrun texts in Examples if ($("#ref-examples ~ div pre").length > 0) { - $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\donttest{", "")); - $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\dontrun{", "")); - $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# }", "")); + const regex1 = /# \\dont(test|run)\{(\n|
)*/ig; + const regex2 = /(\n|
)*# \}/ig; + $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex1, "")); + $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex2, "")); } // remove leading newline in code examples on changelog