mirror of
https://github.com/msberends/AMR.git
synced 2025-10-24 14:36:18 +02:00
(v2.1.1.9068) fix for mo_url() and as.mo() for synonyms
This commit is contained in:
70
R/mo.R
70
R/mo.R
@@ -99,10 +99,11 @@
|
|||||||
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
|
#' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571}
|
||||||
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
|
#' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801}
|
||||||
#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
|
#' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`.
|
||||||
#' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
|
#' 8. `r TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(TAXONOMY_VERSION$MycoBank$accessed_date)`.
|
||||||
#' 9. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`.
|
#' 9. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`.
|
||||||
#' 10. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
|
#' 10. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`.
|
||||||
#' 11. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
|
#' 11. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`>
|
||||||
|
#' 12. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269}
|
||||||
#' @export
|
#' @export
|
||||||
#' @return A [character] [vector] with additional class [`mo`]
|
#' @return A [character] [vector] with additional class [`mo`]
|
||||||
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
|
#' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's.
|
||||||
@@ -418,24 +419,10 @@ as.mo <- function(x,
|
|||||||
} # end of loop over all yet unknowns
|
} # end of loop over all yet unknowns
|
||||||
|
|
||||||
# Keep or replace synonyms ----
|
# Keep or replace synonyms ----
|
||||||
lpsn_matches <- AMR_env$MO_lookup$lpsn_renamed_to[match(out, AMR_env$MO_lookup$mo)]
|
out_current <- synonym_mo_to_accepted_mo(out, fill_in_accepted = FALSE)
|
||||||
lpsn_matches[!lpsn_matches %in% AMR_env$MO_lookup$lpsn] <- NA
|
AMR_env$mo_renamed <- list(old = out[!is.na(out_current)])
|
||||||
mycobank_matches <- AMR_env$MO_lookup$mycobank_renamed_to[match(out, AMR_env$MO_lookup$mo)]
|
|
||||||
mycobank_matches[!mycobank_matches %in% AMR_env$MO_lookup$mycobank] <- NA
|
|
||||||
# GBIF only for non-bacteria and non-fungi, since we use LPSN as primary source for bacteria and MycoBank for fungi
|
|
||||||
# (an example is Strep anginosus, renamed according to GBIF, not according to LPSN)
|
|
||||||
gbif_matches <- AMR_env$MO_lookup$gbif_renamed_to[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")][match(out, AMR_env$MO_lookup$mo[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")])]
|
|
||||||
gbif_matches[!gbif_matches %in% AMR_env$MO_lookup$gbif] <- NA
|
|
||||||
AMR_env$mo_renamed <- list(
|
|
||||||
old = out[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
|
|
||||||
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
|
|
||||||
mycobank_matches = mycobank_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
|
|
||||||
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)]
|
|
||||||
)
|
|
||||||
if (isFALSE(keep_synonyms)) {
|
if (isFALSE(keep_synonyms)) {
|
||||||
out[which(!is.na(gbif_matches))] <- AMR_env$MO_lookup$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR_env$MO_lookup$gbif)]
|
out[!is.na(out_current)] <- out_current[!is.na(out_current)]
|
||||||
out[which(!is.na(mycobank_matches))] <- AMR_env$MO_lookup$mo[match(mycobank_matches[which(!is.na(mycobank_matches))], AMR_env$MO_lookup$mycobank)]
|
|
||||||
out[which(!is.na(lpsn_matches))] <- AMR_env$MO_lookup$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR_env$MO_lookup$lpsn)]
|
|
||||||
if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
|
if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
|
||||||
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
|
print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
|
||||||
}
|
}
|
||||||
@@ -1257,29 +1244,36 @@ load_mo_uncertainties <- function(metadata) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) {
|
synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) {
|
||||||
|
# `dataset` is an argument so that it can be used in the regeneration of the microorganisms data set
|
||||||
if (identical(dataset, AMR_env$MO_lookup)) {
|
if (identical(dataset, AMR_env$MO_lookup)) {
|
||||||
add_MO_lookup_to_AMR_env()
|
add_MO_lookup_to_AMR_env()
|
||||||
dataset <- AMR_env$MO_lookup
|
dataset <- AMR_env$MO_lookup
|
||||||
}
|
}
|
||||||
x_lpsn <- dataset$lpsn_renamed_to[match(x, dataset$mo)] %or% NA_character_
|
|
||||||
x_mycobank <- dataset$mycobank_renamed_to[match(x, dataset$mo)] %or% NA_character_
|
out <- x
|
||||||
x_gbif <- dataset$gbif_renamed_to[match(x, dataset$mo)] %or% NA_character_
|
is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym"
|
||||||
|
limit <- 0
|
||||||
|
while(any(is_still_synonym, na.rm = TRUE) && limit < 5) {
|
||||||
|
limit <- limit + 1
|
||||||
|
|
||||||
|
# make sure to get the latest name, e.g. Fusarium pulicaris robiniae was first renamed to Fusarium roseum, then to Fusarium sambucinum
|
||||||
|
# we need the MO of Fusarium pulicaris robiniae to return the MO of Fusarium sambucinum
|
||||||
|
idx <- !is.na(is_still_synonym) & is_still_synonym
|
||||||
|
x_gbif <- dataset$gbif_renamed_to[match(out[idx], dataset$mo)]
|
||||||
|
x_mycobank <- dataset$mycobank_renamed_to[match(out[idx], dataset$mo)]
|
||||||
|
x_lpsn <- dataset$lpsn_renamed_to[match(out[idx], dataset$mo)]
|
||||||
|
|
||||||
|
out[idx][!is.na(x_gbif)] <- dataset$mo[match(x_gbif[idx][!is.na(x_gbif)], dataset$gbif)]
|
||||||
|
out[idx][!is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[idx][!is.na(x_mycobank)], dataset$mycobank)]
|
||||||
|
out[idx][!is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[idx][!is.na(x_lpsn)], dataset$lpsn)]
|
||||||
|
|
||||||
|
is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym"
|
||||||
|
}
|
||||||
|
|
||||||
# Replace invalid values with NA
|
x_no_synonym <- dataset$status[match(x, dataset$mo)] != "synonym"
|
||||||
x_lpsn[!x_lpsn %in% dataset$lpsn] <- NA_character_
|
out[x_no_synonym] <- NA_character_
|
||||||
x_mycobank[!x_mycobank %in% dataset$mycobank] <- NA_character_
|
|
||||||
x_gbif[!x_gbif %in% dataset$gbif] <- NA_character_
|
|
||||||
|
|
||||||
# Create output vector using vectorized operations
|
|
||||||
out <- rep(NA_character_, length(x))
|
|
||||||
out[is.na(out) & !is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[is.na(out) & !is.na(x_lpsn)], dataset$lpsn)]
|
|
||||||
out[is.na(out) & !is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[is.na(out) & !is.na(x_mycobank)], dataset$mycobank)]
|
|
||||||
out[is.na(out) & !is.na(x_gbif)] <- dataset$mo[match(x_gbif[is.na(out) & !is.na(x_gbif)], dataset$gbif)]
|
|
||||||
|
|
||||||
out[dataset$status[match(x, dataset$mo)] == "accepted"] <- NA_character_
|
|
||||||
if (isTRUE(fill_in_accepted)) {
|
if (isTRUE(fill_in_accepted)) {
|
||||||
x_accepted <- which(dataset$status[match(x, dataset$mo)] == "accepted")
|
out[!is.na(x_no_synonym) & x_no_synonym] <- x[!is.na(x_no_synonym) & x_no_synonym]
|
||||||
out[x_accepted] <- x[x_accepted]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out[is.na(match(x, dataset$mo))] <- NA_character_
|
out[is.na(match(x, dataset$mo))] <- NA_character_
|
||||||
|
@@ -39,8 +39,8 @@
|
|||||||
#' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017:
|
#' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017:
|
||||||
#' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming)
|
#' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming)
|
||||||
#' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated)
|
#' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated)
|
||||||
#' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note)
|
#' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note about the renaming)
|
||||||
#' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a warning)
|
#' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a once-per-session warning that the name is outdated)
|
||||||
#'
|
#'
|
||||||
#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
|
#' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`.
|
||||||
#'
|
#'
|
||||||
@@ -50,15 +50,15 @@
|
|||||||
#'
|
#'
|
||||||
#' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
|
#' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria.
|
||||||
#'
|
#'
|
||||||
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
|
#' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are yeasts that reproduce asexually through a process called budding, where a new cell develops from a small protrusion on the parent cell. Taxonomically, these are members of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes) or Pichiomycetes. *True yeasts* quite specifically refers to yeasts in the underlying order Saccharomycetales (such as *Saccharomyces cerevisiae*). Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes or Pichiomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`).
|
||||||
#'
|
#'
|
||||||
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
|
#' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics).
|
||||||
#'
|
#'
|
||||||
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
|
#' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria.
|
||||||
#'
|
#'
|
||||||
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species.
|
#' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. [This MycoBank URL](`r TAXONOMY_VERSION$MycoBank$url`) will be used for fungi wherever available , [this LPSN URL](`r TAXONOMY_VERSION$MycoBank$url`) for bacteria wherever available, and [this GBIF link](`r TAXONOMY_VERSION$GBIF$url`) otherwise.
|
||||||
#'
|
#'
|
||||||
#' SNOMED codes ([mo_snomed()]) are from the version of `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info.
|
#' SNOMED codes ([mo_snomed()]) was last updated on `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info.
|
||||||
#'
|
#'
|
||||||
#' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names.
|
#' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names.
|
||||||
#'
|
#'
|
||||||
@@ -71,8 +71,9 @@
|
|||||||
#' @return
|
#' @return
|
||||||
#' - An [integer] in case of [mo_year()]
|
#' - An [integer] in case of [mo_year()]
|
||||||
#' - An [ordered factor][factor] in case of [mo_pathogenicity()]
|
#' - An [ordered factor][factor] in case of [mo_pathogenicity()]
|
||||||
#' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()] and [mo_info()]
|
#' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()], and [mo_info()]
|
||||||
#' - A named [character] in case of [mo_url()]
|
#' - A [logical] in case of [mo_is_anaerobic()], [mo_is_gram_negative()], [mo_is_gram_positive()], [mo_is_intrinsic_resistant()], and [mo_is_yeast()]
|
||||||
|
#' - A named [character] in case of [mo_synonyms()] and [mo_url()]
|
||||||
#' - A [character] in all other cases
|
#' - A [character] in all other cases
|
||||||
#' @export
|
#' @export
|
||||||
#' @seealso Data set [microorganisms]
|
#' @seealso Data set [microorganisms]
|
||||||
@@ -107,8 +108,8 @@
|
|||||||
#' mo_url("Klebsiella pneumoniae")
|
#' mo_url("Klebsiella pneumoniae")
|
||||||
#' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella"))
|
#' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella"))
|
||||||
#'
|
#'
|
||||||
#' mo_group_members("Streptococcus group A")
|
#' mo_group_members(c("Streptococcus group A",
|
||||||
#' mo_group_members(c("Streptococcus group C",
|
#' "Streptococcus group C",
|
||||||
#' "Streptococcus group G",
|
#' "Streptococcus group G",
|
||||||
#' "Streptococcus group L"))
|
#' "Streptococcus group L"))
|
||||||
#'
|
#'
|
||||||
@@ -118,11 +119,13 @@
|
|||||||
#' mo_ref("Klebsiella aerogenes")
|
#' mo_ref("Klebsiella aerogenes")
|
||||||
#' mo_authors("Klebsiella aerogenes")
|
#' mo_authors("Klebsiella aerogenes")
|
||||||
#' mo_year("Klebsiella aerogenes")
|
#' mo_year("Klebsiella aerogenes")
|
||||||
|
#' mo_synonyms("Klebsiella aerogenes")
|
||||||
#' mo_lpsn("Klebsiella aerogenes")
|
#' mo_lpsn("Klebsiella aerogenes")
|
||||||
#' mo_gbif("Klebsiella aerogenes")
|
#' mo_gbif("Klebsiella aerogenes")
|
||||||
#' mo_mycobank("Candida albicans")
|
#' mo_mycobank("Candida albicans")
|
||||||
#' mo_synonyms("Klebsiella aerogenes")
|
#' mo_mycobank("Candida krusei")
|
||||||
#'
|
#' mo_mycobank("Candida krusei", keep_synonyms = TRUE)
|
||||||
|
#'
|
||||||
#'
|
#'
|
||||||
#' # abbreviations known in the field -----------------------------------------
|
#' # abbreviations known in the field -----------------------------------------
|
||||||
#'
|
#'
|
||||||
@@ -550,8 +553,7 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
|
|||||||
|
|
||||||
load_mo_uncertainties(metadata)
|
load_mo_uncertainties(metadata)
|
||||||
|
|
||||||
out <- rep(FALSE, length(x))
|
out <- x.mo == "F_YEAST" | (x.kingdom == "Fungi" & x.class %in% c("Saccharomycetes", "Pichiomycetes"))
|
||||||
out[x.kingdom == "Fungi" & x.class == "Saccharomycetes"] <- TRUE
|
|
||||||
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
|
out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
@@ -818,7 +820,7 @@ mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio
|
|||||||
mo_current <- function(x, language = get_AMR_locale(), ...) {
|
mo_current <- function(x, language = get_AMR_locale(), ...) {
|
||||||
meet_criteria(x, allow_NA = TRUE)
|
meet_criteria(x, allow_NA = TRUE)
|
||||||
language <- validate_language(language)
|
language <- validate_language(language)
|
||||||
x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, ...))
|
x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, info = FALSE, ...))
|
||||||
out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE)
|
out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE)
|
||||||
mo_name(out, language = language)
|
mo_name(out, language = language)
|
||||||
}
|
}
|
||||||
@@ -916,19 +918,23 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms =
|
|||||||
|
|
||||||
x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)]
|
x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||||
x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)]
|
x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||||
|
|
||||||
x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)]
|
x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||||
|
x.mycobank <- AMR_env$MO_lookup$mycobank[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||||
x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)]
|
x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)]
|
||||||
|
|
||||||
u <- character(length(x))
|
u <- character(length(x))
|
||||||
u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)])
|
u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)])
|
||||||
# overwrite with LPSN:
|
# overwrite with LPSN:
|
||||||
u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE))
|
u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE))
|
||||||
|
# overwrite with MycoBank (bacteria from LPSN will not be overwritten since MycoBank has no bacteria)
|
||||||
|
u[!is.na(x.mycobank)] <- paste0(TAXONOMY_VERSION$MycoBank$url, "/name/", gsub(" ", "%20", tolower(x.name[!is.na(x.mycobank)]), fixed = TRUE))
|
||||||
|
|
||||||
names(u) <- x.name
|
names(u) <- x.name
|
||||||
|
|
||||||
if (isTRUE(open)) {
|
if (isTRUE(open)) {
|
||||||
if (length(u) > 1) {
|
if (length(u) > 1) {
|
||||||
warning_("in `mo_url()`: only the first URL will be opened, as `browseURL()` only suports one string.")
|
warning_("in `mo_url()`: only the first URL will be opened, as R's built-in function `browseURL()` only suports one string.")
|
||||||
}
|
}
|
||||||
utils::browseURL(u[1L])
|
utils::browseURL(u[1L])
|
||||||
}
|
}
|
||||||
|
@@ -559,6 +559,9 @@ taxonomy_mycobank <- taxonomy_mycobank %>%
|
|||||||
TRUE ~ "")
|
TRUE ~ "")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# FOR 2025: use this to get all the genera with updated names from MO_PREVALENT_GENERA:
|
||||||
|
# AMR::microorganisms %>% filter(genus %in% MO_PREVALENT_GENERA) %>% pull(fullname) %>% mo_current() %>% mo_genus() %>% unique() %>% sort()
|
||||||
|
|
||||||
# keep only the relevant ones
|
# keep only the relevant ones
|
||||||
taxonomy_mycobank <- taxonomy_mycobank %>%
|
taxonomy_mycobank <- taxonomy_mycobank %>%
|
||||||
filter(order %in% include_fungal_orders |
|
filter(order %in% include_fungal_orders |
|
||||||
@@ -1078,6 +1081,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
||||||
# species, always has a genus
|
# species, always has a genus
|
||||||
rank == "species" ~ lpsn[match(genus, fullname)],
|
rank == "species" ~ lpsn[match(genus, fullname)],
|
||||||
|
# subspecies, always has a genus + species
|
||||||
|
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_),
|
TRUE ~ NA_character_),
|
||||||
mycobank_parent = case_when(
|
mycobank_parent = case_when(
|
||||||
rank == "phylum" ~ mycobank[match(kingdom, fullname)],
|
rank == "phylum" ~ mycobank[match(kingdom, fullname)],
|
||||||
@@ -1101,6 +1106,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ mycobank[match(kingdom, fullname)],
|
rank == "genus" ~ mycobank[match(kingdom, fullname)],
|
||||||
# species
|
# species
|
||||||
rank == "species" ~ mycobank[match(genus, fullname)],
|
rank == "species" ~ mycobank[match(genus, fullname)],
|
||||||
|
# subspecies
|
||||||
|
rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_),
|
TRUE ~ NA_character_),
|
||||||
gbif_parent = case_when(
|
gbif_parent = case_when(
|
||||||
rank == "phylum" ~ gbif[match(kingdom, fullname)],
|
rank == "phylum" ~ gbif[match(kingdom, fullname)],
|
||||||
@@ -1124,6 +1131,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ gbif[match(kingdom, fullname)],
|
rank == "genus" ~ gbif[match(kingdom, fullname)],
|
||||||
# species
|
# species
|
||||||
rank == "species" ~ gbif[match(genus, fullname)],
|
rank == "species" ~ gbif[match(genus, fullname)],
|
||||||
|
# subspecies
|
||||||
|
rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_))
|
TRUE ~ NA_character_))
|
||||||
|
|
||||||
# these still have no record in our data set:
|
# these still have no record in our data set:
|
||||||
@@ -1699,6 +1708,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
||||||
# species, always has a genus
|
# species, always has a genus
|
||||||
rank == "species" ~ lpsn[match(genus, fullname)],
|
rank == "species" ~ lpsn[match(genus, fullname)],
|
||||||
|
# subspecies, always has a genus + species
|
||||||
|
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_),
|
TRUE ~ NA_character_),
|
||||||
mycobank_parent = case_when(
|
mycobank_parent = case_when(
|
||||||
rank == "phylum" ~ mycobank[match(kingdom, fullname)],
|
rank == "phylum" ~ mycobank[match(kingdom, fullname)],
|
||||||
@@ -1722,6 +1733,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ mycobank[match(kingdom, fullname)],
|
rank == "genus" ~ mycobank[match(kingdom, fullname)],
|
||||||
# species
|
# species
|
||||||
rank == "species" ~ mycobank[match(genus, fullname)],
|
rank == "species" ~ mycobank[match(genus, fullname)],
|
||||||
|
# subspecies
|
||||||
|
rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_),
|
TRUE ~ NA_character_),
|
||||||
gbif_parent = case_when(
|
gbif_parent = case_when(
|
||||||
rank == "phylum" ~ gbif[match(kingdom, fullname)],
|
rank == "phylum" ~ gbif[match(kingdom, fullname)],
|
||||||
@@ -1745,6 +1758,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ gbif[match(kingdom, fullname)],
|
rank == "genus" ~ gbif[match(kingdom, fullname)],
|
||||||
# species
|
# species
|
||||||
rank == "species" ~ gbif[match(genus, fullname)],
|
rank == "species" ~ gbif[match(genus, fullname)],
|
||||||
|
# subspecies
|
||||||
|
rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_))
|
TRUE ~ NA_character_))
|
||||||
|
|
||||||
# check again
|
# check again
|
||||||
@@ -1810,6 +1825,8 @@ taxonomy <- taxonomy %>%
|
|||||||
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
rank == "genus" ~ lpsn[match(kingdom, fullname)],
|
||||||
# species, always has a genus
|
# species, always has a genus
|
||||||
rank == "species" ~ lpsn[match(genus, fullname)],
|
rank == "species" ~ lpsn[match(genus, fullname)],
|
||||||
|
# subspecies, always has a genus + species
|
||||||
|
rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)],
|
||||||
TRUE ~ NA_character_))
|
TRUE ~ NA_character_))
|
||||||
|
|
||||||
|
|
||||||
|
Binary file not shown.
@@ -194,6 +194,10 @@ expect_equal(
|
|||||||
mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")),
|
mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")),
|
||||||
c(FALSE, TRUE, FALSE)
|
c(FALSE, TRUE, FALSE)
|
||||||
)
|
)
|
||||||
|
expect_equal(
|
||||||
|
mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")),
|
||||||
|
c(TRUE, FALSE, FALSE)
|
||||||
|
)
|
||||||
# is intrinsic resistant
|
# is intrinsic resistant
|
||||||
expect_equal(
|
expect_equal(
|
||||||
mo_is_intrinsic_resistant(
|
mo_is_intrinsic_resistant(
|
||||||
|
@@ -129,10 +129,12 @@ The tab-separated text file and Microsoft Excel workbook both contain all SNOMED
|
|||||||
|
|
||||||
### Source
|
### Source
|
||||||
|
|
||||||
This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF):
|
This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the `r AMR:::TAXONOMY_VERSION$LPSN$name`, `r AMR:::TAXONOMY_VERSION$MycoBank$name`, and the `r AMR:::TAXONOMY_VERSION$GBIF$name`:
|
||||||
|
|
||||||
* `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`.
|
* `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`.
|
||||||
* `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`.
|
* `r AMR:::TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$MycoBank$accessed_date)`.
|
||||||
|
* `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`.
|
||||||
|
* `r AMR:::TAXONOMY_VERSION$BacDive$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$BacDive$accessed_date)`.
|
||||||
* `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`>
|
* `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`>
|
||||||
|
|
||||||
### Example content
|
### Example content
|
||||||
|
Reference in New Issue
Block a user