1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-14 00:11:50 +01:00

(v2.1.1.9068) fix for mo_url() and as.mo() for synonyms

This commit is contained in:
dr. M.S. (Matthijs) Berends 2024-07-17 14:29:55 +02:00
parent 63f6790c58
commit 7258a491b9
6 changed files with 79 additions and 56 deletions

66
R/mo.R
View File

@ -99,10 +99,11 @@
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571} #' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801} #' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`. #' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
#' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`. #' 8. `r TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(TAXONOMY_VERSION$MycoBank$accessed_date)`.
#' 9. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`. #' 9. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
#' 10. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`> #' 10. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`.
#' 11. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269} #' 11. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
#' 12. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
#' @export #' @export
#' @return A [character] [vector] with additional class [`mo`] #' @return A [character] [vector] with additional class [`mo`]
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
@ -418,24 +419,10 @@ as.mo <- function(x,
} # end of loop over all yet unknowns } # end of loop over all yet unknowns
# Keep or replace synonyms ---- # Keep or replace synonyms ----
lpsn_matches <- AMR_env$MO_lookup$lpsn_renamed_to[match(out, AMR_env$MO_lookup$mo)] out_current <- synonym_mo_to_accepted_mo(out, fill_in_accepted = FALSE)
lpsn_matches[!lpsn_matches %in% AMR_env$MO_lookup$lpsn] <- NA AMR_env$mo_renamed <- list(old = out[!is.na(out_current)])
mycobank_matches <- AMR_env$MO_lookup$mycobank_renamed_to[match(out, AMR_env$MO_lookup$mo)]
mycobank_matches[!mycobank_matches %in% AMR_env$MO_lookup$mycobank] <- NA
# GBIF only for non-bacteria and non-fungi, since we use LPSN as primary source for bacteria and MycoBank for fungi
# (an example is Strep anginosus, renamed according to GBIF, not according to LPSN)
gbif_matches <- AMR_env$MO_lookup$gbif_renamed_to[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")][match(out, AMR_env$MO_lookup$mo[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")])]
gbif_matches[!gbif_matches %in% AMR_env$MO_lookup$gbif] <- NA
AMR_env$mo_renamed <- list(
old = out[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
mycobank_matches = mycobank_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)]
)
if (isFALSE(keep_synonyms)) { if (isFALSE(keep_synonyms)) {
out[which(!is.na(gbif_matches))] <- AMR_env$MO_lookup$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR_env$MO_lookup$gbif)] out[!is.na(out_current)] <- out_current[!is.na(out_current)]
out[which(!is.na(mycobank_matches))] <- AMR_env$MO_lookup$mo[match(mycobank_matches[which(!is.na(mycobank_matches))], AMR_env$MO_lookup$mycobank)]
out[which(!is.na(lpsn_matches))] <- AMR_env$MO_lookup$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR_env$MO_lookup$lpsn)]
if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) { if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
} }
@ -1257,29 +1244,36 @@ load_mo_uncertainties <- function(metadata) {
} }
synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) { synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) {
# `dataset` is an argument so that it can be used in the regeneration of the microorganisms data set
if (identical(dataset, AMR_env$MO_lookup)) { if (identical(dataset, AMR_env$MO_lookup)) {
add_MO_lookup_to_AMR_env() add_MO_lookup_to_AMR_env()
dataset <- AMR_env$MO_lookup dataset <- AMR_env$MO_lookup
} }
x_lpsn <- dataset$lpsn_renamed_to[match(x, dataset$mo)] %or% NA_character_
x_mycobank <- dataset$mycobank_renamed_to[match(x, dataset$mo)] %or% NA_character_
x_gbif <- dataset$gbif_renamed_to[match(x, dataset$mo)] %or% NA_character_
# Replace invalid values with NA out <- x
x_lpsn[!x_lpsn %in% dataset$lpsn] <- NA_character_ is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym"
x_mycobank[!x_mycobank %in% dataset$mycobank] <- NA_character_ limit <- 0
x_gbif[!x_gbif %in% dataset$gbif] <- NA_character_ while(any(is_still_synonym, na.rm = TRUE) && limit < 5) {
limit <- limit + 1
# Create output vector using vectorized operations # make sure to get the latest name, e.g. Fusarium pulicaris robiniae was first renamed to Fusarium roseum, then to Fusarium sambucinum
out <- rep(NA_character_, length(x)) # we need the MO of Fusarium pulicaris robiniae to return the MO of Fusarium sambucinum
out[is.na(out) & !is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[is.na(out) & !is.na(x_lpsn)], dataset$lpsn)] idx <- !is.na(is_still_synonym) & is_still_synonym
out[is.na(out) & !is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[is.na(out) & !is.na(x_mycobank)], dataset$mycobank)] x_gbif <- dataset$gbif_renamed_to[match(out[idx], dataset$mo)]
out[is.na(out) & !is.na(x_gbif)] <- dataset$mo[match(x_gbif[is.na(out) & !is.na(x_gbif)], dataset$gbif)] x_mycobank <- dataset$mycobank_renamed_to[match(out[idx], dataset$mo)]
x_lpsn <- dataset$lpsn_renamed_to[match(out[idx], dataset$mo)]
out[dataset$status[match(x, dataset$mo)] == "accepted"] <- NA_character_ out[idx][!is.na(x_gbif)] <- dataset$mo[match(x_gbif[idx][!is.na(x_gbif)], dataset$gbif)]
out[idx][!is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[idx][!is.na(x_mycobank)], dataset$mycobank)]
out[idx][!is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[idx][!is.na(x_lpsn)], dataset$lpsn)]
is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym"
}
x_no_synonym <- dataset$status[match(x, dataset$mo)] != "synonym"
out[x_no_synonym] <- NA_character_
if (isTRUE(fill_in_accepted)) { if (isTRUE(fill_in_accepted)) {
x_accepted <- which(dataset$status[match(x, dataset$mo)] == "accepted") out[!is.na(x_no_synonym) & x_no_synonym] <- x[!is.na(x_no_synonym) & x_no_synonym]
out[x_accepted] <- x[x_accepted]
} }
out[is.na(match(x, dataset$mo))] <- NA_character_ out[is.na(match(x, dataset$mo))] <- NA_character_

View File

@ -39,8 +39,8 @@
#' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017: #' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017:
#' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming) #' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming)
#' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated) #' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated)
#' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note) #' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note about the renaming)
#' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a warning) #' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a once-per-session warning that the name is outdated)
#' #'
#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. #' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
#' #'
@ -50,15 +50,15 @@
#' #'
#' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. #' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
#' #'
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`). #' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are yeasts that reproduce asexually through a process called budding, where a new cell develops from a small protrusion on the parent cell. Taxonomically, these are members of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes) or Pichiomycetes. *True yeasts* quite specifically refers to yeasts in the underlying order Saccharomycetales (such as *Saccharomyces cerevisiae*). Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes or Pichiomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
#' #'
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics). #' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
#' #'
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria. #' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
#' #'
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. [This MycoBank URL](`r TAXONOMY_VERSION$MycoBank$url`) will be used for fungi wherever available , [this LPSN URL](`r TAXONOMY_VERSION$MycoBank$url`) for bacteria wherever available, and [this GBIF link](`r TAXONOMY_VERSION$GBIF$url`) otherwise.
#' #'
#' SNOMED codes ([mo_snomed()]) are from the version of `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info. #' SNOMED codes ([mo_snomed()]) was last updated on `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info.
#' #'
#' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names. #' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names.
#' #'
@ -71,8 +71,9 @@
#' @return #' @return
#' - An [integer] in case of [mo_year()] #' - An [integer] in case of [mo_year()]
#' - An [ordered factor][factor] in case of [mo_pathogenicity()] #' - An [ordered factor][factor] in case of [mo_pathogenicity()]
#' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()] and [mo_info()] #' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()], and [mo_info()]
#' - A named [character] in case of [mo_url()] #' - A [logical] in case of [mo_is_anaerobic()], [mo_is_gram_negative()], [mo_is_gram_positive()], [mo_is_intrinsic_resistant()], and [mo_is_yeast()]
#' - A named [character] in case of [mo_synonyms()] and [mo_url()]
#' - A [character] in all other cases #' - A [character] in all other cases
#' @export #' @export
#' @seealso Data set [microorganisms] #' @seealso Data set [microorganisms]
@ -107,8 +108,8 @@
#' mo_url("Klebsiella pneumoniae") #' mo_url("Klebsiella pneumoniae")
#' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")) #' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella"))
#' #'
#' mo_group_members("Streptococcus group A") #' mo_group_members(c("Streptococcus group A",
#' mo_group_members(c("Streptococcus group C", #' "Streptococcus group C",
#' "Streptococcus group G", #' "Streptococcus group G",
#' "Streptococcus group L")) #' "Streptococcus group L"))
#' #'
@ -118,10 +119,12 @@
#' mo_ref("Klebsiella aerogenes") #' mo_ref("Klebsiella aerogenes")
#' mo_authors("Klebsiella aerogenes") #' mo_authors("Klebsiella aerogenes")
#' mo_year("Klebsiella aerogenes") #' mo_year("Klebsiella aerogenes")
#' mo_synonyms("Klebsiella aerogenes")
#' mo_lpsn("Klebsiella aerogenes") #' mo_lpsn("Klebsiella aerogenes")
#' mo_gbif("Klebsiella aerogenes") #' mo_gbif("Klebsiella aerogenes")
#' mo_mycobank("Candida albicans") #' mo_mycobank("Candida albicans")
#' mo_synonyms("Klebsiella aerogenes") #' mo_mycobank("Candida krusei")
#' mo_mycobank("Candida krusei", keep_synonyms = TRUE)
#' #'
#' #'
#' # abbreviations known in the field ----------------------------------------- #' # abbreviations known in the field -----------------------------------------
@ -550,8 +553,7 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
load_mo_uncertainties(metadata) load_mo_uncertainties(metadata)
out <- rep(FALSE, length(x)) out <- x.mo == "F_YEAST" | (x.kingdom == "Fungi" & x.class %in% c("Saccharomycetes", "Pichiomycetes"))
out[x.kingdom == "Fungi" & x.class == "Saccharomycetes"] <- TRUE
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
out out
} }
@ -818,7 +820,7 @@ mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
mo_current <- function(x, language = get_AMR_locale(), ...) { mo_current <- function(x, language = get_AMR_locale(), ...) {
meet_criteria(x, allow_NA = TRUE) meet_criteria(x, allow_NA = TRUE)
language <- validate_language(language) language <- validate_language(language)
x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, ...)) x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, info = FALSE, ...))
out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE) out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE)
mo_name(out, language = language) mo_name(out, language = language)
} }
@ -916,19 +918,23 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms =
x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)] x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)]
x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)] x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)]
x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)] x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)]
x.mycobank <- AMR_env$MO_lookup$mycobank[match(x.mo, AMR_env$MO_lookup$mo)]
x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)] x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)]
u <- character(length(x)) u <- character(length(x))
u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)]) u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)])
# overwrite with LPSN: # overwrite with LPSN:
u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE)) u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE))
# overwrite with MycoBank (bacteria from LPSN will not be overwritten since MycoBank has no bacteria)
u[!is.na(x.mycobank)] <- paste0(TAXONOMY_VERSION$MycoBank$url, "/name/", gsub(" ", "%20", tolower(x.name[!is.na(x.mycobank)]), fixed = TRUE))
names(u) <- x.name names(u) <- x.name
if (isTRUE(open)) { if (isTRUE(open)) {
if (length(u) > 1) { if (length(u) > 1) {
warning_("in `mo_url()`: only the first URL will be opened, as `browseURL()` only suports one string.") warning_("in `mo_url()`: only the first URL will be opened, as R's built-in function `browseURL()` only suports one string.")
} }
utils::browseURL(u[1L]) utils::browseURL(u[1L])
} }

View File

@ -559,6 +559,9 @@ taxonomy_mycobank <- taxonomy_mycobank %>%
TRUE ~ "") TRUE ~ "")
) )
# FOR 2025: use this to get all the genera with updated names from MO_PREVALENT_GENERA:
# AMR::microorganisms %>% filter(genus %in% MO_PREVALENT_GENERA) %>% pull(fullname) %>% mo_current() %>% mo_genus() %>% unique() %>% sort()
# keep only the relevant ones # keep only the relevant ones
taxonomy_mycobank <- taxonomy_mycobank %>% taxonomy_mycobank <- taxonomy_mycobank %>%
filter(order %in% include_fungal_orders | filter(order %in% include_fungal_orders |
@ -1078,6 +1081,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ lpsn[match(kingdom, fullname)], rank == "genus" ~ lpsn[match(kingdom, fullname)],
# species, always has a genus # species, always has a genus
rank == "species" ~ lpsn[match(genus, fullname)], rank == "species" ~ lpsn[match(genus, fullname)],
# subspecies, always has a genus + species
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
TRUE ~ NA_character_), TRUE ~ NA_character_),
mycobank_parent = case_when( mycobank_parent = case_when(
rank == "phylum" ~ mycobank[match(kingdom, fullname)], rank == "phylum" ~ mycobank[match(kingdom, fullname)],
@ -1101,6 +1106,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ mycobank[match(kingdom, fullname)], rank == "genus" ~ mycobank[match(kingdom, fullname)],
# species # species
rank == "species" ~ mycobank[match(genus, fullname)], rank == "species" ~ mycobank[match(genus, fullname)],
# subspecies
rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)],
TRUE ~ NA_character_), TRUE ~ NA_character_),
gbif_parent = case_when( gbif_parent = case_when(
rank == "phylum" ~ gbif[match(kingdom, fullname)], rank == "phylum" ~ gbif[match(kingdom, fullname)],
@ -1124,6 +1131,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ gbif[match(kingdom, fullname)], rank == "genus" ~ gbif[match(kingdom, fullname)],
# species # species
rank == "species" ~ gbif[match(genus, fullname)], rank == "species" ~ gbif[match(genus, fullname)],
# subspecies
rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)],
TRUE ~ NA_character_)) TRUE ~ NA_character_))
# these still have no record in our data set: # these still have no record in our data set:
@ -1699,6 +1708,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ lpsn[match(kingdom, fullname)], rank == "genus" ~ lpsn[match(kingdom, fullname)],
# species, always has a genus # species, always has a genus
rank == "species" ~ lpsn[match(genus, fullname)], rank == "species" ~ lpsn[match(genus, fullname)],
# subspecies, always has a genus + species
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
TRUE ~ NA_character_), TRUE ~ NA_character_),
mycobank_parent = case_when( mycobank_parent = case_when(
rank == "phylum" ~ mycobank[match(kingdom, fullname)], rank == "phylum" ~ mycobank[match(kingdom, fullname)],
@ -1722,6 +1733,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ mycobank[match(kingdom, fullname)], rank == "genus" ~ mycobank[match(kingdom, fullname)],
# species # species
rank == "species" ~ mycobank[match(genus, fullname)], rank == "species" ~ mycobank[match(genus, fullname)],
# subspecies
rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)],
TRUE ~ NA_character_), TRUE ~ NA_character_),
gbif_parent = case_when( gbif_parent = case_when(
rank == "phylum" ~ gbif[match(kingdom, fullname)], rank == "phylum" ~ gbif[match(kingdom, fullname)],
@ -1745,6 +1758,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ gbif[match(kingdom, fullname)], rank == "genus" ~ gbif[match(kingdom, fullname)],
# species # species
rank == "species" ~ gbif[match(genus, fullname)], rank == "species" ~ gbif[match(genus, fullname)],
# subspecies
rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)],
TRUE ~ NA_character_)) TRUE ~ NA_character_))
# check again # check again
@ -1810,6 +1825,8 @@ taxonomy <- taxonomy %>%
rank == "genus" ~ lpsn[match(kingdom, fullname)], rank == "genus" ~ lpsn[match(kingdom, fullname)],
# species, always has a genus # species, always has a genus
rank == "species" ~ lpsn[match(genus, fullname)], rank == "species" ~ lpsn[match(genus, fullname)],
# subspecies, always has a genus + species
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
TRUE ~ NA_character_)) TRUE ~ NA_character_))

Binary file not shown.

View File

@ -194,6 +194,10 @@ expect_equal(
mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")),
c(FALSE, TRUE, FALSE) c(FALSE, TRUE, FALSE)
) )
expect_equal(
mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")),
c(TRUE, FALSE, FALSE)
)
# is intrinsic resistant # is intrinsic resistant
expect_equal( expect_equal(
mo_is_intrinsic_resistant( mo_is_intrinsic_resistant(

View File

@ -129,10 +129,12 @@ The tab-separated text file and Microsoft Excel workbook both contain all SNOMED
### Source ### Source
This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF): This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the `r AMR:::TAXONOMY_VERSION$LPSN$name`, `r AMR:::TAXONOMY_VERSION$MycoBank$name`, and the `r AMR:::TAXONOMY_VERSION$GBIF$name`:
* `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`. * `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`.
* `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`. * `r AMR:::TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$MycoBank$accessed_date)`.
* `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`.
* `r AMR:::TAXONOMY_VERSION$BacDive$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$BacDive$accessed_date)`.
* `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`> * `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`>
### Example content ### Example content