Fix for adding MO's

This commit is contained in:
dr. M.S. (Matthijs) Berends 2023-01-14 17:10:10 +01:00
parent 33f269e2f4
commit ca79068604
13 changed files with 147 additions and 86 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 1.8.2.9091 Version: 1.8.2.9092
Date: 2023-01-12 Date: 2023-01-14
Title: Antimicrobial Resistance Data Analysis Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR) Description: Functions to simplify and standardise antimicrobial resistance (AMR)
data analysis and to work with microbial and antimicrobial properties by data analysis and to work with microbial and antimicrobial properties by

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9091 # AMR 1.8.2.9092
*(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)* *(this beta version will eventually become v2.0! We're happy to reach a new major milestone soon!)*

View File

@ -30,7 +30,7 @@
#' Add Custom Microorganisms to This Package #' Add Custom Microorganisms to This Package
#' #'
#' With [add_custom_microorganisms()] you can add your own custom microorganisms to the `AMR` package, such the non-taxonomic outcome of laboratory analysis. #' With [add_custom_microorganisms()] you can add your own custom microorganisms to the `AMR` package, such the non-taxonomic outcome of laboratory analysis.
#' @param x a [data.frame] resembling the [microorganisms] data set, at least containing columns "genus" and "species" #' @param x a [data.frame] resembling the [microorganisms] data set, at least containing column "genus" (case-insensitive)
#' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*. #' @details This function will fill in missing taxonomy for you, if specific taxonomic columns are missing, see *Examples*.
#' #'
#' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited. #' **Important:** Due to how \R works, the [add_custom_microorganisms()] function has to be run in every \R session - added microorganisms are not stored between sessions and are thus lost when \R is exited.
@ -39,7 +39,7 @@
#' #'
#' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method: #' **Method 1:** Save the microorganisms to a local or remote file (can even be the internet). To use this method:
#' #'
#' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least columns "genus" and "species") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location. #' 1. Create a data set in the structure of the [microorganisms] data set (containing at the very least column "genus") and save it with [saveRDS()] to a location of choice, e.g. `"~/my_custom_mo.rds"`, or any remote location.
#' #'
#' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file: #' 2. Set the file location to the `AMR_custom_mo` \R option: `options(AMR_custom_mo = "~/my_custom_mo.rds")`. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the `.Rprofile` file so that it will loaded on start-up of \R. To do this, open the `.Rprofile` file using e.g. `utils::file.edit("~/.Rprofile")`, add this text and save the file:
#' #'
@ -98,16 +98,27 @@
#' mo_gramstain("Enterobacter asburiae/cloacae") #' mo_gramstain("Enterobacter asburiae/cloacae")
#' #'
#' mo_info("Enterobacter asburiae/cloacae") #' mo_info("Enterobacter asburiae/cloacae")
#'
#'
#' # the function tries to be forgiving:
#' add_custom_microorganisms(
#' data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA",
#' SPECIES = "SPECIES"))
#' mo_name("ESCHERICHIA / KLEBSIELLA")
#' mo_family("Escherichia/Klebsiella")
#'
#' add_custom_microorganisms(
#' data.frame(genus = "Citrobacter", species = "freundii complex"))
#' mo_name("C. freundii complex")
#' mo_gramstain("C. freundii complex")
#' } #' }
add_custom_microorganisms <- function(x) { add_custom_microorganisms <- function(x) {
meet_criteria(x, allow_class = "data.frame") meet_criteria(x, allow_class = "data.frame")
stop_ifnot( stop_ifnot("genus" %in% tolower(colnames(x)), paste0("`x` must contain column 'genus'."))
all(c("genus", "species") %in% colnames(x)),
paste0("`x` must contain columns ", vector_and(c("genus", "species"), sort = FALSE), ".")
)
# remove any extra class/type, such as grouped tbl, or data.table: # remove any extra class/type, such as grouped tbl, or data.table:
x <- as.data.frame(x, stringsAsFactors = FALSE) x <- as.data.frame(x, stringsAsFactors = FALSE)
colnames(x) <- tolower(colnames(x))
# rename 'name' to 'fullname' if it's in the data set # rename 'name' to 'fullname' if it's in the data set
if ("name" %in% colnames(x) && !"fullname" %in% colnames(x)) { if ("name" %in% colnames(x) && !"fullname" %in% colnames(x)) {
colnames(x)[colnames(x) == "name"] <- "fullname" colnames(x)[colnames(x) == "name"] <- "fullname"
@ -116,21 +127,30 @@ add_custom_microorganisms <- function(x) {
x <- x[, colnames(AMR_env$MO_lookup)[colnames(AMR_env$MO_lookup) %in% colnames(x)], drop = FALSE] x <- x[, colnames(AMR_env$MO_lookup)[colnames(AMR_env$MO_lookup) %in% colnames(x)], drop = FALSE]
# clean the input ---- # clean the input ----
if (!"subspecies" %in% colnames(x)) { for (col in c("genus", "species", "subspecies")) {
x$subspecies <- NA_character_ if (!col %in% colnames(x)) {
x[, col] <- ""
}
if (is.factor(x[, col, drop = TRUE])) {
x[, col] <- as.character(x[, col, drop = TRUE])
}
col_ <- x[, col, drop = TRUE]
col_ <- tolower(trimws2(col_))
col_[col_ %like% "(sub)?species"] <- ""
col_ <- gsub(" *([/-]) *", "\\1", col_, perl = TRUE)
# groups are in our taxonomic table with a capital G, and complexes might be added by the user
col_ <- gsub(" group( |$)", " Group\\1", col_, perl = TRUE)
col_ <- gsub(" complex( |$)", " Complex\\1", col_, perl = TRUE)
col_[is.na(col_)] <- ""
if (col == "genus") {
substr(col_, 1, 1) <- toupper(substr(col_, 1, 1))
col_ <- gsub("/([a-z])", "/\\U\\1", col_, perl = TRUE)
stop_if(any(col_ == ""), "the 'genus' column cannot be empty")
stop_if(any(col_ %like% " "), "the 'genus' column must not contain spaces")
}
x[, col] <- col_
} }
x$genus <- trimws2(x$genus)
x$species <- trimws2(x$species)
x$subspecies <- trimws2(x$subspecies)
x$genus[is.na(x$genus)] <- ""
x$species[is.na(x$species)] <- ""
x$subspecies[is.na(x$subspecies)] <- ""
stop_if(any(x$genus %like% " "),
"the 'genus' column must not contain spaces")
stop_if(any(x$species %like% " "),
"the 'species' column must not contain spaces")
stop_if(any(x$subspecies %like% " "),
"the 'subspecies' column must not contain spaces")
if ("rank" %in% colnames(x)) { if ("rank" %in% colnames(x)) {
stop_ifnot(all(x$rank %in% AMR_env$MO_lookup$rank), stop_ifnot(all(x$rank %in% AMR_env$MO_lookup$rank),
@ -139,7 +159,7 @@ add_custom_microorganisms <- function(x) {
x$rank <- ifelse(x$subspecies != "", "subspecies", x$rank <- ifelse(x$subspecies != "", "subspecies",
ifelse(x$species != "", "species", ifelse(x$species != "", "species",
ifelse(x$genus != "", "genus", ifelse(x$genus != "", "genus",
stop("in add_custom_microorganisms(): the 'genus' column cannot be empty", stop("in add_custom_microorganisms(): only microorganisms up to the genus level can be added",
call. = FALSE)))) call. = FALSE))))
} }
x$source <- "Added by user" x$source <- "Added by user"
@ -158,22 +178,29 @@ add_custom_microorganisms <- function(x) {
x$family[is.na(x$family)] <- "" x$family[is.na(x$family)] <- ""
for (col in colnames(x)) { for (col in colnames(x)) {
if (is.factor(x[, col, drop = TRUE])) {
x[, col] <- as.character(x[, col, drop = TRUE])
}
if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) { if (is.list(AMR_env$MO_lookup[, col, drop = TRUE])) {
x[, col] <- as.list(x[, col, drop = TRUE]) x[, col] <- as.list(x[, col, drop = TRUE])
} }
} }
# fill in other columns # fill in taxonomy based on genus
genus_to_check <- gsub("^(.*)[^a-zA-Z].*", "\\1", x$genus, perl = TRUE)
x$kingdom[which(x$kingdom == "" & genus_to_check != "")] <- AMR_env$MO_lookup$kingdom[match(genus_to_check[which(x$kingdom == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$phylum[which(x$phylum == "" & genus_to_check != "")] <- AMR_env$MO_lookup$phylum[match(genus_to_check[which(x$phylum == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$class[which(x$class == "" & genus_to_check != "")] <- AMR_env$MO_lookup$class[match(genus_to_check[which(x$class == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$order[which(x$order == "" & genus_to_check != "")] <- AMR_env$MO_lookup$order[match(genus_to_check[which(x$order == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
x$family[which(x$family == "" & genus_to_check != "")] <- AMR_env$MO_lookup$family[match(genus_to_check[which(x$family == "" & genus_to_check != "")], AMR_env$MO_lookup$genus)]
# fill in other columns that are used in internal algorithms
x$status <- "accepted" x$status <- "accepted"
x$prevalence <- 1 x$prevalence <- 1
x$ref <- paste("Self-added,", format(Sys.Date(), "%Y"))
x$kingdom[which(x$kingdom == "" & x$genus != "")] <- AMR_env$MO_lookup$kingdom[match(x$genus[which(x$kingdom == "" & x$genus != "")], AMR_env$MO_lookup$genus)] x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(genus_to_check, AMR_env$MO_lookup$genus)]
x$phylum[which(x$phylum == "" & x$genus != "")] <- AMR_env$MO_lookup$phylum[match(x$genus[which(x$phylum == "" & x$genus != "")], AMR_env$MO_lookup$genus)] # complete missing kingdom index, so mo_matching_score() will not return NA
x$class[which(x$class == "" & x$genus != "")] <- AMR_env$MO_lookup$class[match(x$genus[which(x$class == "" & x$genus != "")], AMR_env$MO_lookup$genus)] x$kingdom_index[is.na(x$kingdom_index)] <- 1
x$order[which(x$order == "" & x$genus != "")] <- AMR_env$MO_lookup$order[match(x$genus[which(x$order == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
x$family[which(x$family == "" & x$genus != "")] <- AMR_env$MO_lookup$family[match(x$genus[which(x$family == "" & x$genus != "")], AMR_env$MO_lookup$genus)]
x$kingdom_index <- AMR_env$MO_lookup$kingdom_index[match(x$genus, AMR_env$MO_lookup$genus)]
x$fullname_lower <- tolower(x$fullname) x$fullname_lower <- tolower(x$fullname)
x$full_first <- substr(x$fullname_lower, 1, 1) x$full_first <- substr(x$fullname_lower, 1, 1)
x$species_first <- tolower(substr(x$species, 1, 1)) x$species_first <- tolower(substr(x$species, 1, 1))
@ -183,18 +210,19 @@ add_custom_microorganisms <- function(x) {
# create the mo code # create the mo code
x$mo <- NA_character_ x$mo <- NA_character_
} }
x$mo <- trimws2(x$mo) x$mo <- trimws2(as.character(x$mo))
x$mo[x$mo == ""] <- NA_character_ x$mo[x$mo == ""] <- NA_character_
current <- sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE)
x$mo[is.na(x$mo)] <- paste0("CUSTOM", x$mo[is.na(x$mo)] <- paste0("CUSTOM",
seq(from = sum(AMR_env$MO_lookup$source == "Added by user", na.rm = TRUE) + 1, to = nrow(x), by = 1), seq.int(from = current + 1, to = current + nrow(x), by = 1),
"_", "_",
toupper(unname(abbreviate(gsub(" +", " _ ", toupper(unname(abbreviate(gsub(" +", " _ ",
gsub("[^A-Za-z0-9-]", " ", gsub("[^A-Za-z0-9-]", " ",
trimws2(paste(x$genus, x$species, x$subspecies)))), trimws2(paste(x$genus, x$species, x$subspecies)))),
minlength = 10)))) minlength = 10))))
stop_if(anyDuplicated(c(as.character(AMR_env$MO_lookup$mo), x$mo)), "MO codes must be unique and not match existing MO codes of the AMR package")
# add to package ---- # add to package ----
AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo) AMR_env$custom_mo_codes <- c(AMR_env$custom_mo_codes, x$mo)
class(AMR_env$MO_lookup$mo) <- "character" class(AMR_env$MO_lookup$mo) <- "character"
@ -215,7 +243,11 @@ add_custom_microorganisms <- function(x) {
AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df)) AMR_env$MO_lookup <- unique(rbind(AMR_env$MO_lookup, new_df))
class(AMR_env$MO_lookup$mo) <- c("mo", "character") class(AMR_env$MO_lookup$mo) <- c("mo", "character")
message_("Added ", nr2char(nrow(x)), " record", ifelse(nrow(x) > 1, "s", ""), " to the internal `microorganisms` data set.") if (nrow(x) <= 3) {
message_("Added ", vector_and(italicise(x$fullname), quotes = FALSE), " to the internal `microorganisms` data set.")
} else {
message_("Added ", nr2char(nrow(x)), " records to the internal `microorganisms` data set.")
}
} }
#' @rdname add_custom_microorganisms #' @rdname add_custom_microorganisms

29
R/mo.R
View File

@ -183,7 +183,7 @@ as.mo <- function(x,
x_lower <- tolower(x) x_lower <- tolower(x)
complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"] complexes <- x[trimws2(x_lower) %like_case% " (complex|group)$"]
if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex())) { if (length(complexes) > 0 && identical(remove_from_input, mo_cleaning_regex()) && !any(AMR_env$MO_lookup$fullname[which(AMR_env$MO_lookup$source == "Added by user")] %like% "(group|complex)", na.rm = TRUE)) {
warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE) warning_("in `as.mo()`: 'complex' and 'group' were ignored from the input in ", length(complexes), " case", ifelse(length(complexes) > 1, "s", ""), ", as they are currently not supported.\nYou can add your own microorganism with `add_custom_microorganisms()`.", call = FALSE)
} }
@ -222,7 +222,7 @@ as.mo <- function(x,
if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) { if (isTRUE(info) && message_not_thrown_before("as.mo", old, new, entire_session = TRUE) && any(is.na(old) & !is.na(new), na.rm = TRUE)) {
message_( message_(
"Returning previously coerced value", ifelse(sum(is.na(old) & !is.na(new)) > 1, "s", ""), "Returning previously coerced value", ifelse(sum(is.na(old) & !is.na(new)) > 1, "s", ""),
" for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this." " for ", vector_and(x[is.na(old) & !is.na(new)]), ". Run `mo_reset_session()` to reset this. This note will be shown once per session for this input."
) )
} }
@ -236,8 +236,9 @@ as.mo <- function(x,
x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% translate_into_language("no .*growth", language = language)] <- NA_character_
x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") ")] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_into_language("no|not", language = language), ") ")] <- NA_character_
# groups are in our taxonomic table with a capital G # groups are in our taxonomic table with a capital G, and complexes might be added by the user
x <- gsub(" group ", " Group ", x, fixed = TRUE) x <- gsub(" group( |$)", " Group\\1", x, perl = TRUE)
x <- gsub(" complex( |$)", " Complex\\1", x, perl = TRUE)
# run over all unique leftovers # run over all unique leftovers
x_unique <- unique(x[is.na(out) & !is.na(x)]) x_unique <- unique(x[is.na(out) & !is.na(x)])
@ -281,11 +282,11 @@ as.mo <- function(x,
first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]") first_chars <- paste0("(^| )", "[", paste(substr(x_parts, 1, 1), collapse = ""), "]")
filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars) filtr <- which(AMR_env$MO_lookup$full_first %like_case% first_chars)
} else if (nchar(x_out) == 4) { } else if (nchar(x_out) == 4) {
# no space and 4 characters - probably a code such as STAU or ESCO! # no space and 4 characters - probably a code such as STAU or ESCO
msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE))) msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(substr(x_out, 1, 2), substr(x_out, 3, 4)), sort = FALSE)))
filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4))) filtr <- which(AMR_env$MO_lookup$fullname_lower %like_case% paste0("(^| )", substr(x_out, 1, 2), ".* ", substr(x_out, 3, 4)))
} else if (nchar(x_out) <= 6) { } else if (nchar(x_out) <= 6) {
# no space and 5-6 characters - probably a code such as STAAUR or ESCCOL! # no space and 5-6 characters - probably a code such as STAAUR or ESCCOL
first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3)) first_part <- paste0(substr(x_out, 1, 2), "[a-z]*", substr(x_out, 3, 3))
second_part <- substr(x_out, 4, nchar(x_out)) second_part <- substr(x_out, 4, nchar(x_out))
msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE))) msg <- c(msg, paste0("Input \"", x_search, "\" was assumed to be a microorganism code - tried to match on ", vector_and(c(gsub("[a-z]*", "(...)", first_part, fixed = TRUE), second_part), sort = FALSE)))
@ -992,6 +993,9 @@ italicise <- function(x) {
gsub("Salmonella ", "", x[x %like_case% "Salmonella [A-Z]"])) gsub("Salmonella ", "", x[x %like_case% "Salmonella [A-Z]"]))
out[x %like_case% "Streptococcus [A-Z]"] <- paste(font_italic("Streptococcus"), out[x %like_case% "Streptococcus [A-Z]"] <- paste(font_italic("Streptococcus"),
gsub("Streptococcus ", "", x[x %like_case% "Streptococcus [A-Z]"])) gsub("Streptococcus ", "", x[x %like_case% "Streptococcus [A-Z]"]))
if (has_colour()) {
out <- gsub("(Group|group|Complex|complex)(\033\\[23m)?", "\033[23m\\1", out, perl = TRUE)
}
out out
} }
@ -1009,9 +1013,9 @@ nr2char <- function(x) {
parse_and_convert <- function(x) { parse_and_convert <- function(x) {
if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) { if (tryCatch(is.character(x) && all(Encoding(x) == "unknown", na.rm = TRUE), error = function(e) FALSE)) {
return(trimws2(x)) out <- x
} } else {
tryCatch( out <- tryCatch(
{ {
if (!is.null(dim(x))) { if (!is.null(dim(x))) {
if (NCOL(x) > 2) { if (NCOL(x) > 2) {
@ -1030,12 +1034,15 @@ parse_and_convert <- function(x) {
parsed <- iconv(as.character(x), to = "UTF-8") parsed <- iconv(as.character(x), to = "UTF-8")
parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT") parsed[is.na(parsed) & !is.na(x)] <- iconv(x[is.na(parsed) & !is.na(x)], from = "Latin1", to = "ASCII//TRANSLIT")
parsed <- gsub('"', "", parsed, fixed = TRUE) parsed <- gsub('"', "", parsed, fixed = TRUE)
parsed <- gsub(" +", " ", parsed, perl = TRUE)
parsed parsed
}, },
error = function(e) stop(e$message, call. = FALSE) error = function(e) stop(e$message, call. = FALSE)
) # this will also be thrown when running `as.mo(no_existing_object)` ) # this will also be thrown when running `as.mo(no_existing_object)`
trimws2(parsed) }
out <- trimws2(out)
out <- gsub(" +", " ", out, perl = TRUE)
out <- gsub(" ?/ ? ", "/", out, perl = TRUE)
out
} }
replace_old_mo_codes <- function(x, property) { replace_old_mo_codes <- function(x, property) {

View File

@ -39,7 +39,9 @@
#' @section Matching Score for Microorganisms: #' @section Matching Score for Microorganisms:
#' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as: #' With ambiguous user input in [as.mo()] and all the [`mo_*`][mo_property()] functions, the returned results are chosen based on their matching score using [mo_matching_score()]. This matching score \eqn{m}, is calculated as:
#' #'
#' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} #' \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
#'
#' \ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
#' #'
#' where: #' where:
#' #'
@ -47,7 +49,7 @@
#' * \eqn{n} is a taxonomic name (genus, species, and subspecies); #' * \eqn{n} is a taxonomic name (genus, species, and subspecies);
#' * \eqn{l_n} is the length of \eqn{n}; #' * \eqn{l_n} is the length of \eqn{n};
#' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; #' * \eqn{lev} is the [Levenshtein distance function](https://en.wikipedia.org/wiki/Levenshtein_distance) (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
#' * \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; #' * \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
#' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. #' * \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
#' #'
#' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups: #' The grouping into human pathogenic prevalence \eqn{p} is based on recent work from Bartlett *et al.* (2022, \doi{10.1099/mic.0.001269}) who extensively studied medical-scientific literature to categorise all bacterial species into these groups:

View File

@ -777,6 +777,7 @@ mo_info <- function(x, language = get_AMR_locale(), keep_synonyms = getOption("A
info <- lapply(x, function(y) { info <- lapply(x, function(y) {
c( c(
list(identifier = x),
mo_taxonomy(y, language = language, keep_synonyms = keep_synonyms), mo_taxonomy(y, language = language, keep_synonyms = keep_synonyms),
list( list(
status = mo_status(y, language = language, keep_synonyms = keep_synonyms), status = mo_status(y, language = language, keep_synonyms = keep_synonyms),

View File

@ -15,11 +15,11 @@
### Introduction ### Introduction
The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. The `AMR` package is a [free and open-source](#copyright) R package with [zero dependencies](https://en.wikipedia.org/wiki/Dependency_hell) to simplify the analysis and prediction of Antimicrobial Resistance (AMR) and to work with microbial and antimicrobial data and properties, by using evidence-based methods. **Our aim is to provide a standard** for clean and reproducible AMR data analysis, that can therefore empower epidemiological analyses to continuously enable surveillance and treatment evaluation in any setting. [Many different researchers](./authors.html) from around the globe are continually helping us to make this a successful and durable project!
This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)). This work was published in the Journal of Statistical Software (Volume 104(3); [DOI 10.18637/jss.v104.i03](https://doi.org/10.18637/jss.v104.i03)) and formed the basis of two PhD theses ([DOI 10.33612/diss.177417131](https://doi.org/10.33612/diss.177417131) and [DOI 10.33612/diss.192486375](https://doi.org/10.33612/diss.192486375)).
After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl), and is being [actively and durably maintained](./news) by two public healthcare organisations in the Netherlands. After installing this package, R knows [**~52,000 distinct microbial species**](./reference/microorganisms.html) (updated December 2022) and all [**~600 antibiotic, antimycotic and antiviral drugs**](./reference/antibiotics.html) by name and code (including ATC, EARS-Net, ASIARS-Net, PubChem, LOINC and SNOMED CT), and knows all about valid R/SI and MIC values. The integral breakpoint guidelines from CLSI and EUCAST are included from the last 10 years. It supports and can read any data format, including WHONET data. This package works on Windows, macOS and Linux with all versions of R since R-3.0 (April 2013). **It was designed to work in any setting, including those with very limited resources**. It was created for both routine data analysis and academic research at the Faculty of Medical Sciences of the [University of Groningen](https://www.rug.nl), in collaboration with non-profit organisations [Certe Medical Diagnostics and Advice Foundation](https://www.certe.nl) and [University Medical Center Groningen](https://www.umcg.nl).
##### Used in 175 countries, translated to 16 languages ##### Used in 175 countries, translated to 16 languages
@ -27,7 +27,7 @@ After installing this package, R knows [**~52,000 distinct microbial species**](
Since its first public release in early 2018, this R package has been used in almost all countries in the world. Click the map to enlarge and to see the country names. Since its first public release in early 2018, this R package has been used in almost all countries in the world. Click the map to enlarge and to see the country names.
The `AMR` package is available in <img src="lang_en.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> English, <img src="lang_zh.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Chinese, <img src="lang_da.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Danish, <img src="lang_nl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Dutch, <img src="lang_fr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> French, <img src="lang_de.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> German, <img src="lang_el.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Greek, <img src="lang_it.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Italian, <img src="lang_ja.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Japanese, <img src="lang_pl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Polish, <img src="lang_pt.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Portuguese, <img src="lang_ru.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Russian, <img src="lang_es.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Spanish, <img src="lang_sv.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Swedish, <img src="lang_tr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Turkish, and <img src="lang_uk.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages. With the help of contributors from all corners of the world, the `AMR` package is available in <img src="lang_en.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> English, <img src="lang_zh.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Chinese, <img src="lang_da.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Danish, <img src="lang_nl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Dutch, <img src="lang_fr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> French, <img src="lang_de.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> German, <img src="lang_el.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Greek, <img src="lang_it.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Italian, <img src="lang_ja.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Japanese, <img src="lang_pl.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Polish, <img src="lang_pt.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Portuguese, <img src="lang_ru.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Russian, <img src="lang_es.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Spanish, <img src="lang_sv.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Swedish, <img src="lang_tr.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Turkish, and <img src="lang_uk.svg" style="height: 13px !important; border: 1px solid #cccccc; vertical-align: initial !important;"> Ukrainian. Antimicrobial drug (group) names and colloquial microorganism names are provided in these languages.
### Practical examples ### Practical examples

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_custom_antimicrobials.R % Please edit documentation in R/custom_antimicrobials.R
\name{add_custom_antimicrobials} \name{add_custom_antimicrobials}
\alias{add_custom_antimicrobials} \alias{add_custom_antimicrobials}
\alias{clear_custom_antimicrobials} \alias{clear_custom_antimicrobials}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_custom_microorganisms.R % Please edit documentation in R/custom_microorganisms.R
\name{add_custom_microorganisms} \name{add_custom_microorganisms}
\alias{add_custom_microorganisms} \alias{add_custom_microorganisms}
\alias{clear_custom_microorganisms} \alias{clear_custom_microorganisms}
@ -10,7 +10,7 @@ add_custom_microorganisms(x)
clear_custom_microorganisms() clear_custom_microorganisms()
} }
\arguments{ \arguments{
\item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing columns "genus" and "species"} \item{x}{a \link{data.frame} resembling the \link{microorganisms} data set, at least containing column "genus" (case-insensitive)}
} }
\description{ \description{
With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom microorganisms to the \code{AMR} package, such the non-taxonomic outcome of laboratory analysis. With \code{\link[=add_custom_microorganisms]{add_custom_microorganisms()}} you can add your own custom microorganisms to the \code{AMR} package, such the non-taxonomic outcome of laboratory analysis.
@ -24,7 +24,7 @@ There are two ways to automate this process:
\strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method: \strong{Method 1:} Save the microorganisms to a local or remote file (can even be the internet). To use this method:
\enumerate{ \enumerate{
\item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least columns "genus" and "species") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location. \item Create a data set in the structure of the \link{microorganisms} data set (containing at the very least column "genus") and save it with \code{\link[=saveRDS]{saveRDS()}} to a location of choice, e.g. \code{"~/my_custom_mo.rds"}, or any remote location.
\item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file: \item Set the file location to the \code{AMR_custom_mo} \R option: \code{options(AMR_custom_mo = "~/my_custom_mo.rds")}. This can even be a remote file location, such as an https URL. Since options are not saved between \R sessions, it is best to save this option to the \code{.Rprofile} file so that it will loaded on start-up of \R. To do this, open the \code{.Rprofile} file using e.g. \code{utils::file.edit("~/.Rprofile")}, add this text and save the file:
\if{html}{\out{<div class="sourceCode r">}}\preformatted{# Add custom microorganism codes: \if{html}{\out{<div class="sourceCode r">}}\preformatted{# Add custom microorganism codes:
@ -79,6 +79,19 @@ mo_family("E. asburiae/cloacae")
mo_gramstain("Enterobacter asburiae/cloacae") mo_gramstain("Enterobacter asburiae/cloacae")
mo_info("Enterobacter asburiae/cloacae") mo_info("Enterobacter asburiae/cloacae")
# the function tries to be forgiving:
add_custom_microorganisms(
data.frame(GENUS = "ESCHERICHIA / KLEBSIELLA",
SPECIES = "SPECIES"))
mo_name("ESCHERICHIA / KLEBSIELLA")
mo_family("Escherichia/Klebsiella")
add_custom_microorganisms(
data.frame(genus = "Citrobacter", species = "freundii complex"))
mo_name("C. freundii complex")
mo_gramstain("C. freundii complex")
} }
} }
\seealso{ \seealso{

View File

@ -131,7 +131,9 @@ The coercion rules consider the prevalence of microorganisms in humans, which is
With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as: With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as:
\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
where: where:
\itemize{ \itemize{
@ -139,7 +141,7 @@ where:
\item \eqn{n} is a taxonomic name (genus, species, and subspecies); \item \eqn{n} is a taxonomic name (genus, species, and subspecies);
\item \eqn{l_n} is the length of \eqn{n}; \item \eqn{l_n} is the length of \eqn{n};
\item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; \item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
\item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
} }

View File

@ -23,7 +23,9 @@ Later, the work of Bartlett A \emph{et al.} about bacterial pathogens infecting
With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as: With ambiguous user input in \code{\link[=as.mo]{as.mo()}} and all the \code{\link[=mo_property]{mo_*}} functions, the returned results are chosen based on their matching score using \code{\link[=mo_matching_score]{mo_matching_score()}}. This matching score \eqn{m}, is calculated as:
\ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}} \ifelse{latex}{\deqn{m_{(x, n)} = \frac{l_{n} - 0.5 \cdot \min \begin{cases}l_{n} \\ \textrm{lev}(x, n)\end{cases}}{l_{n} \cdot p_{n} \cdot k_{n}}}}{
\ifelse{html}{\figure{mo_matching_score.png}{options: width="300" alt="mo matching score"}}{m(x, n) = ( l_n * min(l_n, lev(x, n) ) ) / ( l_n * p_n * k_n )}}
where: where:
\itemize{ \itemize{
@ -31,7 +33,7 @@ where:
\item \eqn{n} is a taxonomic name (genus, species, and subspecies); \item \eqn{n} is a taxonomic name (genus, species, and subspecies);
\item \eqn{l_n} is the length of \eqn{n}; \item \eqn{l_n} is the length of \eqn{n};
\item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n}; \item \eqn{lev} is the \href{https://en.wikipedia.org/wiki/Levenshtein_distance}{Levenshtein distance function} (counting any insertion as 1, and any deletion or substitution as 2) that is needed to change \eqn{x} into \eqn{n};
\item \eqn{p_{n}} is the human pathogenic prevalence group of \eqn{n}, as described below; \item \eqn{p_n} is the human pathogenic prevalence group of \eqn{n}, as described below;
\item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5. \item \eqn{k_n} is the taxonomic kingdom of \eqn{n}, set as Bacteria = 1, Fungi = 2, Protozoa = 3, Archaea = 4, others = 5.
} }

View File

@ -34,12 +34,14 @@ $(document).ready(function() {
// Replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page // Replace 'Developers' with 'Maintainers' on the main page, and "Contributors" on the Authors page
$(".developers h2").text("Maintainers"); $(".developers h2").text("Maintainers");
$(".citation h2:first").text("All contributors"); $(".citation h2:first").text("All contributors");
$(".citation h2:second").text("How to Cite");
// replace \donttest and \dontrun texts in Examples // remove \donttest and \dontrun texts in Examples
if ($("#ref-examples ~ div pre").length > 0) { if ($("#ref-examples ~ div pre").length > 0) {
$("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\donttest{", "")); const regex1 = /# \\dont(test|run)\{(\n|<br>)*/ig;
$("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# \\dontrun{", "")); const regex2 = /(\n|<br>)*# \}/ig;
$("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll("# }", "")); $("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex1, ""));
$("#ref-examples ~ div pre").html($("#ref-examples ~ div pre").html().replaceAll(regex2, ""));
} }
// remove leading newline in code examples on changelog // remove leading newline in code examples on changelog