mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-25 02:36:20 +02:00 
			
		
		
		
	(v2.1.1.9068) fix for mo_url() and as.mo() for synonyms
This commit is contained in:
		
							
								
								
									
										66
									
								
								R/mo.R
									
									
									
									
									
								
							
							
						
						
									
										66
									
								
								R/mo.R
									
									
									
									
									
								
							| @@ -99,10 +99,11 @@ | |||||||
| #' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571} | #' 5. Lancefield RC (1933). **A serological differentiation of human and other groups of hemolytic streptococci.** *J Exp Med.* 57(4): 571-95; \doi{10.1084/jem.57.4.571} | ||||||
| #' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801} | #' 6. Berends MS *et al.* (2022). **Trends in Occurrence and Phenotypic Resistance of Coagulase-Negative Staphylococci (CoNS) Found in Human Blood in the Northern Netherlands between 2013 and 2019/** *Micro.rganisms* 10(9), 1801; \doi{10.3390/microorganisms10091801} | ||||||
| #' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`. | #' 7. `r TAXONOMY_VERSION$LPSN$citation` Accessed from <`r TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(TAXONOMY_VERSION$LPSN$accessed_date)`. | ||||||
| #' 8. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`. | #' 8. `r TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(TAXONOMY_VERSION$MycoBank$accessed_date)`. | ||||||
| #' 9. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`. | #' 9. `r TAXONOMY_VERSION$GBIF$citation` Accessed from <`r TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(TAXONOMY_VERSION$GBIF$accessed_date)`. | ||||||
| #' 10. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`> | #' 10. `r TAXONOMY_VERSION$BacDive$citation` Accessed from <`r TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(TAXONOMY_VERSION$BacDive$accessed_date)`. | ||||||
| #' 11. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269} | #' 11. `r TAXONOMY_VERSION$SNOMED$citation` URL: <`r TAXONOMY_VERSION$SNOMED$url`> | ||||||
|  | #' 12. Bartlett A *et al.* (2022). **A comprehensive list of bacterial pathogens infecting humans** *Microbiology* 168:001269; \doi{10.1099/mic.0.001269} | ||||||
| #' @export | #' @export | ||||||
| #' @return A [character] [vector] with additional class [`mo`] | #' @return A [character] [vector] with additional class [`mo`] | ||||||
| #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. | #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. | ||||||
| @@ -418,24 +419,10 @@ as.mo <- function(x, | |||||||
|   } # end of loop over all yet unknowns |   } # end of loop over all yet unknowns | ||||||
|    |    | ||||||
|   # Keep or replace synonyms ---- |   # Keep or replace synonyms ---- | ||||||
|   lpsn_matches <- AMR_env$MO_lookup$lpsn_renamed_to[match(out, AMR_env$MO_lookup$mo)] |   out_current <- synonym_mo_to_accepted_mo(out, fill_in_accepted = FALSE) | ||||||
|   lpsn_matches[!lpsn_matches %in% AMR_env$MO_lookup$lpsn] <- NA |   AMR_env$mo_renamed <- list(old = out[!is.na(out_current)]) | ||||||
|   mycobank_matches <- AMR_env$MO_lookup$mycobank_renamed_to[match(out, AMR_env$MO_lookup$mo)] |  | ||||||
|   mycobank_matches[!mycobank_matches %in% AMR_env$MO_lookup$mycobank] <- NA |  | ||||||
|   # GBIF only for non-bacteria and non-fungi, since we use LPSN as primary source for bacteria and MycoBank for fungi |  | ||||||
|   # (an example is Strep anginosus, renamed according to GBIF, not according to LPSN) |  | ||||||
|   gbif_matches <- AMR_env$MO_lookup$gbif_renamed_to[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")][match(out, AMR_env$MO_lookup$mo[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")])] |  | ||||||
|   gbif_matches[!gbif_matches %in% AMR_env$MO_lookup$gbif] <- NA |  | ||||||
|   AMR_env$mo_renamed <- list( |  | ||||||
|     old = out[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)], |  | ||||||
|     gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)], |  | ||||||
|     mycobank_matches = mycobank_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)], |  | ||||||
|     lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)] |  | ||||||
|   ) |  | ||||||
|   if (isFALSE(keep_synonyms)) { |   if (isFALSE(keep_synonyms)) { | ||||||
|     out[which(!is.na(gbif_matches))] <- AMR_env$MO_lookup$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR_env$MO_lookup$gbif)] |     out[!is.na(out_current)] <- out_current[!is.na(out_current)] | ||||||
|     out[which(!is.na(mycobank_matches))] <- AMR_env$MO_lookup$mo[match(mycobank_matches[which(!is.na(mycobank_matches))], AMR_env$MO_lookup$mycobank)] |  | ||||||
|     out[which(!is.na(lpsn_matches))] <- AMR_env$MO_lookup$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR_env$MO_lookup$lpsn)] |  | ||||||
|     if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) { |     if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) { | ||||||
|       print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") |       print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)") | ||||||
|     } |     } | ||||||
| @@ -1257,29 +1244,36 @@ load_mo_uncertainties <- function(metadata) { | |||||||
| } | } | ||||||
|  |  | ||||||
| synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) { | synonym_mo_to_accepted_mo <- function(x, fill_in_accepted = FALSE, dataset = AMR_env$MO_lookup) { | ||||||
|  |   # `dataset` is an argument so that it can be used in the regeneration of the microorganisms data set | ||||||
|   if (identical(dataset, AMR_env$MO_lookup)) { |   if (identical(dataset, AMR_env$MO_lookup)) { | ||||||
|     add_MO_lookup_to_AMR_env() |     add_MO_lookup_to_AMR_env() | ||||||
|     dataset <- AMR_env$MO_lookup |     dataset <- AMR_env$MO_lookup | ||||||
|   } |   } | ||||||
|   x_lpsn <- dataset$lpsn_renamed_to[match(x, dataset$mo)] %or% NA_character_ |  | ||||||
|   x_mycobank <- dataset$mycobank_renamed_to[match(x, dataset$mo)] %or% NA_character_ |  | ||||||
|   x_gbif <- dataset$gbif_renamed_to[match(x, dataset$mo)] %or% NA_character_ |  | ||||||
|  |  | ||||||
|   # Replace invalid values with NA |   out <- x | ||||||
|   x_lpsn[!x_lpsn %in% dataset$lpsn] <- NA_character_ |   is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym" | ||||||
|   x_mycobank[!x_mycobank %in% dataset$mycobank] <- NA_character_ |   limit <- 0 | ||||||
|   x_gbif[!x_gbif %in% dataset$gbif] <- NA_character_ |   while(any(is_still_synonym, na.rm = TRUE) && limit < 5) { | ||||||
|  |     limit <- limit + 1 | ||||||
|      |      | ||||||
|   # Create output vector using vectorized operations |     # make sure to get the latest name, e.g. Fusarium pulicaris robiniae was first renamed to Fusarium roseum, then to Fusarium sambucinum | ||||||
|   out <- rep(NA_character_, length(x)) |     # we need the MO of Fusarium pulicaris robiniae to return the MO of Fusarium sambucinum | ||||||
|   out[is.na(out) & !is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[is.na(out) & !is.na(x_lpsn)], dataset$lpsn)] |     idx <- !is.na(is_still_synonym) & is_still_synonym | ||||||
|   out[is.na(out) & !is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[is.na(out) & !is.na(x_mycobank)], dataset$mycobank)] |     x_gbif <- dataset$gbif_renamed_to[match(out[idx], dataset$mo)] | ||||||
|   out[is.na(out) & !is.na(x_gbif)] <- dataset$mo[match(x_gbif[is.na(out) & !is.na(x_gbif)], dataset$gbif)] |     x_mycobank <- dataset$mycobank_renamed_to[match(out[idx], dataset$mo)] | ||||||
|  |     x_lpsn <- dataset$lpsn_renamed_to[match(out[idx], dataset$mo)] | ||||||
|  |  | ||||||
|   out[dataset$status[match(x, dataset$mo)] == "accepted"] <- NA_character_ |     out[idx][!is.na(x_gbif)] <- dataset$mo[match(x_gbif[idx][!is.na(x_gbif)], dataset$gbif)] | ||||||
|  |     out[idx][!is.na(x_mycobank)] <- dataset$mo[match(x_mycobank[idx][!is.na(x_mycobank)], dataset$mycobank)] | ||||||
|  |     out[idx][!is.na(x_lpsn)] <- dataset$mo[match(x_lpsn[idx][!is.na(x_lpsn)], dataset$lpsn)] | ||||||
|  |  | ||||||
|  |     is_still_synonym <- dataset$status[match(out, dataset$mo)] == "synonym" | ||||||
|  |   } | ||||||
|  |    | ||||||
|  |   x_no_synonym <- dataset$status[match(x, dataset$mo)] != "synonym" | ||||||
|  |   out[x_no_synonym] <- NA_character_ | ||||||
|   if (isTRUE(fill_in_accepted)) { |   if (isTRUE(fill_in_accepted)) { | ||||||
|     x_accepted <- which(dataset$status[match(x, dataset$mo)] == "accepted") |     out[!is.na(x_no_synonym) & x_no_synonym] <- x[!is.na(x_no_synonym) & x_no_synonym] | ||||||
|     out[x_accepted] <- x[x_accepted] |  | ||||||
|   } |   } | ||||||
|    |    | ||||||
|   out[is.na(match(x, dataset$mo))] <- NA_character_ |   out[is.na(match(x, dataset$mo))] <- NA_character_ | ||||||
|   | |||||||
| @@ -39,8 +39,8 @@ | |||||||
| #' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017: | #' @details All functions will, at default, **not** keep old taxonomic properties, as synonyms are automatically replaced with the current taxonomy. Take for example *Enterobacter aerogenes*, which was initially named in 1960 but renamed to *Klebsiella aerogenes* in 2017: | ||||||
| #' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming) | #' - `mo_genus("Enterobacter aerogenes")` will return `"Klebsiella"` (with a note about the renaming) | ||||||
| #' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated) | #' - `mo_genus("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Enterobacter"` (with a once-per-session warning that the name is outdated) | ||||||
| #' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note) | #' - `mo_ref("Enterobacter aerogenes")` will return `"Tindall et al., 2017"` (with a note about the renaming) | ||||||
| #' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a warning) | #' - `mo_ref("Enterobacter aerogenes", keep_synonyms = TRUE)` will return `"Hormaeche et al., 1960"` (with a once-per-session warning that the name is outdated) | ||||||
| #' | #' | ||||||
| #' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. | #' The short name ([mo_shortname()]) returns the first character of the genus and the full species, such as `"E. coli"`, for species and subspecies. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. As a result, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. | ||||||
| #' | #' | ||||||
| @@ -50,15 +50,15 @@ | |||||||
| #' | #' | ||||||
| #' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. | #' Determination of the Gram stain ([mo_gramstain()]) will be based on the taxonomic kingdom and phylum. Originally, Cavalier-Smith defined the so-called subkingdoms Negibacteria and Posibacteria (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318/)), and only considered these phyla as Posibacteria: Actinobacteria, Chloroflexi, Firmicutes, and Tenericutes. These phyla were later renamed to Actinomycetota, Chloroflexota, Bacillota, and Mycoplasmatota (2021, [PMID 34694987](https://pubmed.ncbi.nlm.nih.gov/34694987/)). Bacteria in these phyla are considered Gram-positive in this `AMR` package, except for members of the class Negativicutes (within phylum Bacillota) which are Gram-negative. All other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [mo_is_gram_negative()] and [mo_is_gram_positive()] always return `TRUE` or `FALSE` (or `NA` when the input is `NA` or the MO code is `UNKNOWN`), thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. | ||||||
| #' | #' | ||||||
| #' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are fungi of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes). *True yeasts* are aggregated into the underlying order Saccharomycetales. Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`). | #' Determination of yeasts ([mo_is_yeast()]) will be based on the taxonomic kingdom and class. *Budding yeasts* are yeasts that reproduce asexually through a process called budding, where a new cell develops from a small protrusion on the parent cell. Taxonomically, these are members of the phylum Ascomycota, class Saccharomycetes (also called Hemiascomycetes) or Pichiomycetes. *True yeasts* quite specifically refers to yeasts in the underlying order Saccharomycetales (such as *Saccharomyces cerevisiae*). Thus, for all microorganisms that are member of the taxonomic class Saccharomycetes or Pichiomycetes, the function will return `TRUE`. It returns `FALSE` otherwise (or `NA` when the input is `NA` or the MO code is `UNKNOWN`). | ||||||
| #' | #' | ||||||
| #' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics). | #' Determination of intrinsic resistance ([mo_is_intrinsic_resistant()]) will be based on the [intrinsic_resistant] data set, which is based on `r format_eucast_version_nr(3.3)`. The [mo_is_intrinsic_resistant()] function can be vectorised over both argument `x` (input for microorganisms) and `ab` (input for antibiotics). | ||||||
| #'  | #'  | ||||||
| #' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria. | #' Determination of bacterial oxygen tolerance ([mo_oxygen_tolerance()]) will be based on BacDive, see *Source*. The function [mo_is_anaerobic()] only returns `TRUE` if the oxygen tolerance is `"anaerobe"`, indicting an obligate anaerobic species or genus. It always returns `FALSE` for species outside the taxonomic kingdom of Bacteria. | ||||||
| #' | #' | ||||||
| #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. | #' The function [mo_url()] will return the direct URL to the online database entry, which also shows the scientific reference of the concerned species. [This MycoBank URL](`r TAXONOMY_VERSION$MycoBank$url`) will be used for fungi wherever available , [this LPSN URL](`r TAXONOMY_VERSION$MycoBank$url`) for bacteria wherever available, and [this GBIF link](`r TAXONOMY_VERSION$GBIF$url`) otherwise. | ||||||
| #' | #' | ||||||
| #' SNOMED codes ([mo_snomed()]) are from the version of `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info. | #' SNOMED codes ([mo_snomed()]) was last updated on `r documentation_date(TAXONOMY_VERSION$SNOMED$accessed_date)`. See *Source* and the [microorganisms] data set for more info. | ||||||
| #' | #' | ||||||
| #' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names. | #' Old taxonomic names (so-called 'synonyms') can be retrieved with [mo_synonyms()] (which will have the scientific reference as [name][base::names()]), the current taxonomic name can be retrieved with [mo_current()]. Both functions return full names. | ||||||
| #' | #' | ||||||
| @@ -71,8 +71,9 @@ | |||||||
| #' @return | #' @return | ||||||
| #' - An [integer] in case of [mo_year()] | #' - An [integer] in case of [mo_year()] | ||||||
| #' - An [ordered factor][factor] in case of [mo_pathogenicity()] | #' - An [ordered factor][factor] in case of [mo_pathogenicity()] | ||||||
| #' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()] and [mo_info()] | #' - A [list] in case of [mo_taxonomy()], [mo_synonyms()], [mo_snomed()], and [mo_info()] | ||||||
| #' - A named [character] in case of [mo_url()] | #' - A [logical] in case of [mo_is_anaerobic()], [mo_is_gram_negative()], [mo_is_gram_positive()], [mo_is_intrinsic_resistant()], and [mo_is_yeast()] | ||||||
|  | #' - A named [character] in case of [mo_synonyms()] and [mo_url()] | ||||||
| #' - A [character] in all other cases | #' - A [character] in all other cases | ||||||
| #' @export | #' @export | ||||||
| #' @seealso Data set [microorganisms] | #' @seealso Data set [microorganisms] | ||||||
| @@ -107,8 +108,8 @@ | |||||||
| #' mo_url("Klebsiella pneumoniae") | #' mo_url("Klebsiella pneumoniae") | ||||||
| #' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")) | #' mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")) | ||||||
| #'  | #'  | ||||||
| #' mo_group_members("Streptococcus group A") | #' mo_group_members(c("Streptococcus group A", | ||||||
| #' mo_group_members(c("Streptococcus group C", | #'                    "Streptococcus group C", | ||||||
| #'                    "Streptococcus group G", | #'                    "Streptococcus group G", | ||||||
| #'                    "Streptococcus group L")) | #'                    "Streptococcus group L")) | ||||||
| #'  | #'  | ||||||
| @@ -118,10 +119,12 @@ | |||||||
| #' mo_ref("Klebsiella aerogenes") | #' mo_ref("Klebsiella aerogenes") | ||||||
| #' mo_authors("Klebsiella aerogenes") | #' mo_authors("Klebsiella aerogenes") | ||||||
| #' mo_year("Klebsiella aerogenes") | #' mo_year("Klebsiella aerogenes") | ||||||
|  | #' mo_synonyms("Klebsiella aerogenes") | ||||||
| #' mo_lpsn("Klebsiella aerogenes") | #' mo_lpsn("Klebsiella aerogenes") | ||||||
| #' mo_gbif("Klebsiella aerogenes") | #' mo_gbif("Klebsiella aerogenes") | ||||||
| #' mo_mycobank("Candida albicans") | #' mo_mycobank("Candida albicans") | ||||||
| #' mo_synonyms("Klebsiella aerogenes") | #' mo_mycobank("Candida krusei") | ||||||
|  | #' mo_mycobank("Candida krusei", keep_synonyms = TRUE) | ||||||
| #'  | #'  | ||||||
| #' | #' | ||||||
| #' # abbreviations known in the field ----------------------------------------- | #' # abbreviations known in the field ----------------------------------------- | ||||||
| @@ -550,8 +553,7 @@ mo_is_yeast <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio | |||||||
|  |  | ||||||
|   load_mo_uncertainties(metadata) |   load_mo_uncertainties(metadata) | ||||||
|  |  | ||||||
|   out <- rep(FALSE, length(x)) |   out <- x.mo == "F_YEAST" | (x.kingdom == "Fungi" & x.class %in% c("Saccharomycetes", "Pichiomycetes")) | ||||||
|   out[x.kingdom == "Fungi" & x.class == "Saccharomycetes"] <- TRUE |  | ||||||
|   out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA |   out[x.mo %in% c(NA_character_, "UNKNOWN")] <- NA | ||||||
|   out |   out | ||||||
| } | } | ||||||
| @@ -818,7 +820,7 @@ mo_synonyms <- function(x, language = get_AMR_locale(), keep_synonyms = getOptio | |||||||
| mo_current <- function(x, language = get_AMR_locale(), ...) { | mo_current <- function(x, language = get_AMR_locale(), ...) { | ||||||
|   meet_criteria(x, allow_NA = TRUE) |   meet_criteria(x, allow_NA = TRUE) | ||||||
|   language <- validate_language(language) |   language <- validate_language(language) | ||||||
|   x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, ...)) |   x.mo <- suppressWarnings(as.mo(x, keep_synonyms = TRUE, info = FALSE, ...)) | ||||||
|   out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE) |   out <- synonym_mo_to_accepted_mo(x.mo, fill_in_accepted = TRUE) | ||||||
|   mo_name(out, language = language) |   mo_name(out, language = language) | ||||||
| } | } | ||||||
| @@ -916,19 +918,23 @@ mo_url <- function(x, open = FALSE, language = get_AMR_locale(), keep_synonyms = | |||||||
|  |  | ||||||
|   x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)] |   x.rank <- AMR_env$MO_lookup$rank[match(x.mo, AMR_env$MO_lookup$mo)] | ||||||
|   x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)] |   x.name <- AMR_env$MO_lookup$fullname[match(x.mo, AMR_env$MO_lookup$mo)] | ||||||
|  |    | ||||||
|   x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)] |   x.lpsn <- AMR_env$MO_lookup$lpsn[match(x.mo, AMR_env$MO_lookup$mo)] | ||||||
|  |   x.mycobank <- AMR_env$MO_lookup$mycobank[match(x.mo, AMR_env$MO_lookup$mo)] | ||||||
|   x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)] |   x.gbif <- AMR_env$MO_lookup$gbif[match(x.mo, AMR_env$MO_lookup$mo)] | ||||||
|  |  | ||||||
|   u <- character(length(x)) |   u <- character(length(x)) | ||||||
|   u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)]) |   u[!is.na(x.gbif)] <- paste0(TAXONOMY_VERSION$GBIF$url, "/species/", x.gbif[!is.na(x.gbif)]) | ||||||
|   # overwrite with LPSN: |   # overwrite with LPSN: | ||||||
|   u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE)) |   u[!is.na(x.lpsn)] <- paste0(TAXONOMY_VERSION$LPSN$url, "/", x.rank[!is.na(x.lpsn)], "/", gsub(" ", "-", tolower(x.name[!is.na(x.lpsn)]), fixed = TRUE)) | ||||||
|  |   # overwrite with MycoBank (bacteria from LPSN will not be overwritten since MycoBank has no bacteria) | ||||||
|  |   u[!is.na(x.mycobank)] <- paste0(TAXONOMY_VERSION$MycoBank$url, "/name/", gsub(" ", "%20", tolower(x.name[!is.na(x.mycobank)]), fixed = TRUE)) | ||||||
|  |  | ||||||
|   names(u) <- x.name |   names(u) <- x.name | ||||||
|  |  | ||||||
|   if (isTRUE(open)) { |   if (isTRUE(open)) { | ||||||
|     if (length(u) > 1) { |     if (length(u) > 1) { | ||||||
|       warning_("in `mo_url()`: only the first URL will be opened, as `browseURL()` only suports one string.") |       warning_("in `mo_url()`: only the first URL will be opened, as R's built-in function `browseURL()` only suports one string.") | ||||||
|     } |     } | ||||||
|     utils::browseURL(u[1L]) |     utils::browseURL(u[1L]) | ||||||
|   } |   } | ||||||
|   | |||||||
| @@ -559,6 +559,9 @@ taxonomy_mycobank <- taxonomy_mycobank %>% | |||||||
|                              TRUE ~ "") |                              TRUE ~ "") | ||||||
|   ) |   ) | ||||||
|  |  | ||||||
|  | # FOR 2025: use this to get all the genera with updated names from MO_PREVALENT_GENERA: | ||||||
|  | # AMR::microorganisms %>% filter(genus %in% MO_PREVALENT_GENERA) %>% pull(fullname) %>% mo_current() %>% mo_genus() %>% unique() %>% sort() | ||||||
|  |  | ||||||
| # keep only the relevant ones | # keep only the relevant ones | ||||||
| taxonomy_mycobank <- taxonomy_mycobank %>%  | taxonomy_mycobank <- taxonomy_mycobank %>%  | ||||||
|   filter(order %in% include_fungal_orders | |   filter(order %in% include_fungal_orders | | ||||||
| @@ -1078,6 +1081,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ lpsn[match(kingdom, fullname)], |       rank == "genus" ~ lpsn[match(kingdom, fullname)], | ||||||
|       # species, always has a genus |       # species, always has a genus | ||||||
|       rank == "species" ~ lpsn[match(genus, fullname)], |       rank == "species" ~ lpsn[match(genus, fullname)], | ||||||
|  |       # subspecies, always has a genus + species | ||||||
|  |       rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_), |       TRUE ~ NA_character_), | ||||||
|     mycobank_parent = case_when( |     mycobank_parent = case_when( | ||||||
|       rank == "phylum" ~ mycobank[match(kingdom, fullname)], |       rank == "phylum" ~ mycobank[match(kingdom, fullname)], | ||||||
| @@ -1101,6 +1106,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ mycobank[match(kingdom, fullname)], |       rank == "genus" ~ mycobank[match(kingdom, fullname)], | ||||||
|       # species |       # species | ||||||
|       rank == "species" ~ mycobank[match(genus, fullname)], |       rank == "species" ~ mycobank[match(genus, fullname)], | ||||||
|  |       # subspecies | ||||||
|  |       rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_), |       TRUE ~ NA_character_), | ||||||
|     gbif_parent = case_when( |     gbif_parent = case_when( | ||||||
|       rank == "phylum" ~ gbif[match(kingdom, fullname)], |       rank == "phylum" ~ gbif[match(kingdom, fullname)], | ||||||
| @@ -1124,6 +1131,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ gbif[match(kingdom, fullname)], |       rank == "genus" ~ gbif[match(kingdom, fullname)], | ||||||
|       # species |       # species | ||||||
|       rank == "species" ~ gbif[match(genus, fullname)], |       rank == "species" ~ gbif[match(genus, fullname)], | ||||||
|  |       # subspecies | ||||||
|  |       rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_)) |       TRUE ~ NA_character_)) | ||||||
|  |  | ||||||
| # these still have no record in our data set: | # these still have no record in our data set: | ||||||
| @@ -1699,6 +1708,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ lpsn[match(kingdom, fullname)], |       rank == "genus" ~ lpsn[match(kingdom, fullname)], | ||||||
|       # species, always has a genus |       # species, always has a genus | ||||||
|       rank == "species" ~ lpsn[match(genus, fullname)], |       rank == "species" ~ lpsn[match(genus, fullname)], | ||||||
|  |       # subspecies, always has a genus + species | ||||||
|  |       rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_), |       TRUE ~ NA_character_), | ||||||
|     mycobank_parent = case_when( |     mycobank_parent = case_when( | ||||||
|       rank == "phylum" ~ mycobank[match(kingdom, fullname)], |       rank == "phylum" ~ mycobank[match(kingdom, fullname)], | ||||||
| @@ -1722,6 +1733,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ mycobank[match(kingdom, fullname)], |       rank == "genus" ~ mycobank[match(kingdom, fullname)], | ||||||
|       # species |       # species | ||||||
|       rank == "species" ~ mycobank[match(genus, fullname)], |       rank == "species" ~ mycobank[match(genus, fullname)], | ||||||
|  |       # subspecies | ||||||
|  |       rank == "subspecies" ~ mycobank[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_), |       TRUE ~ NA_character_), | ||||||
|     gbif_parent = case_when( |     gbif_parent = case_when( | ||||||
|       rank == "phylum" ~ gbif[match(kingdom, fullname)], |       rank == "phylum" ~ gbif[match(kingdom, fullname)], | ||||||
| @@ -1745,6 +1758,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ gbif[match(kingdom, fullname)], |       rank == "genus" ~ gbif[match(kingdom, fullname)], | ||||||
|       # species |       # species | ||||||
|       rank == "species" ~ gbif[match(genus, fullname)], |       rank == "species" ~ gbif[match(genus, fullname)], | ||||||
|  |       # subspecies | ||||||
|  |       rank == "subspecies" ~ gbif[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_)) |       TRUE ~ NA_character_)) | ||||||
|  |  | ||||||
| # check again | # check again | ||||||
| @@ -1810,6 +1825,8 @@ taxonomy <- taxonomy %>% | |||||||
|       rank == "genus" ~ lpsn[match(kingdom, fullname)], |       rank == "genus" ~ lpsn[match(kingdom, fullname)], | ||||||
|       # species, always has a genus |       # species, always has a genus | ||||||
|       rank == "species" ~ lpsn[match(genus, fullname)], |       rank == "species" ~ lpsn[match(genus, fullname)], | ||||||
|  |       # subspecies, always has a genus + species | ||||||
|  |       rank == "subspecies" ~ lpsn[match(paste(genus, species), fullname)], | ||||||
|       TRUE ~ NA_character_)) |       TRUE ~ NA_character_)) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							| @@ -194,6 +194,10 @@ expect_equal( | |||||||
|   mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), |   mo_is_gram_positive(c("Escherichia coli", "Staphylococcus aureus", "Candida albicans")), | ||||||
|   c(FALSE, TRUE, FALSE) |   c(FALSE, TRUE, FALSE) | ||||||
| ) | ) | ||||||
|  | expect_equal( | ||||||
|  |   mo_is_yeast(c("Candida", "Trichophyton", "Klebsiella")), | ||||||
|  |   c(TRUE, FALSE, FALSE) | ||||||
|  | ) | ||||||
| # is intrinsic resistant | # is intrinsic resistant | ||||||
| expect_equal( | expect_equal( | ||||||
|   mo_is_intrinsic_resistant( |   mo_is_intrinsic_resistant( | ||||||
|   | |||||||
| @@ -129,10 +129,12 @@ The tab-separated text file and Microsoft Excel workbook both contain all SNOMED | |||||||
|  |  | ||||||
| ### Source | ### Source | ||||||
|  |  | ||||||
| This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the List of Prokaryotic names with Standing in Nomenclature (LPSN) and the Global Biodiversity Information Facility (GBIF): | This data set contains the full microbial taxonomy of `r AMR:::nr2char(length(unique(AMR::microorganisms$kingdom[!AMR::microorganisms$kingdom %like% "unknown"])))` kingdoms from the `r AMR:::TAXONOMY_VERSION$LPSN$name`, `r AMR:::TAXONOMY_VERSION$MycoBank$name`, and the `r AMR:::TAXONOMY_VERSION$GBIF$name`: | ||||||
|  |  | ||||||
| * `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`. | * `r AMR:::TAXONOMY_VERSION$LPSN$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$LPSN$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$LPSN$accessed_date)`. | ||||||
| * `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r AMR:::documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`. | * `r AMR:::TAXONOMY_VERSION$MycoBank$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$MycoBank$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$MycoBank$accessed_date)`. | ||||||
|  | * `r AMR:::TAXONOMY_VERSION$GBIF$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$GBIF$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$GBIF$accessed_date)`. | ||||||
|  | * `r AMR:::TAXONOMY_VERSION$BacDive$citation` Accessed from <`r AMR:::TAXONOMY_VERSION$BacDive$url`> on `r documentation_date(AMR:::TAXONOMY_VERSION$BacDive$accessed_date)`. | ||||||
| * `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`> | * `r AMR:::TAXONOMY_VERSION$SNOMED$citation` URL: <`r AMR:::TAXONOMY_VERSION$SNOMED$url`> | ||||||
|  |  | ||||||
| ### Example content | ### Example content | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user