mirror of
https://github.com/msberends/AMR.git
synced 2025-07-08 11:11:54 +02:00
updated taxonomy
This commit is contained in:
@ -1 +1 @@
|
||||
aa07f3422b8bba417bc0331f4bb34449
|
||||
33ff38f7d52ae39f951f60b73db29630
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1 +1 @@
|
||||
0a4241916334341aa70923aac880997d
|
||||
9d0367aa37e7f7d6923caae506ea434d
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
@ -50,13 +50,17 @@ int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R
|
||||
|
||||
# remove lab drugs
|
||||
untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE]
|
||||
int_resis2 <- int_resis2 %>%
|
||||
filter(!ab %in% untreatable) %>%
|
||||
arrange(mo, ab)
|
||||
# takes ages with filter()..., weird
|
||||
int_resis3 <- int_resis2[which(!int_resis2$ab %in% untreatable), ]
|
||||
class(int_resis3$ab) <- c("ab", "character")
|
||||
int_resis3
|
||||
|
||||
intrinsic_resistant <- as.data.frame(int_resis2, stringsAsFactors = FALSE)
|
||||
all(int_resis3$mo %in% microorganisms$mo)
|
||||
all(int_resis3$ab %in% antibiotics$ab)
|
||||
|
||||
intrinsic_resistant <- df_remove_nonASCII(int_resis3)
|
||||
usethis::use_data(intrinsic_resistant, internal = FALSE, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(intrinsic_resistant)
|
||||
|
||||
# AFTER THIS:
|
||||
# DO NOT FORGET TO UPDATE THE VERSION NUMBER IN mo_is_intrinsic_resistant()
|
||||
# DO NOT FORGET TO UPDATE THE VERSION NUMBER IN mo_is_intrinsic_resistant() AND R/data.R
|
||||
|
@ -31,10 +31,10 @@
|
||||
# (at least 10 GB will be used by the R session for the size of the files)
|
||||
|
||||
# 1. Go to https://doi.org/10.15468/39omei and find the download link for the
|
||||
# latest GBIF backbone taxonony ZIP file - unpack Taxon.tsv from it (~2.2 GB)
|
||||
# latest GBIF backbone taxonony under "Endpoints" and unpack Taxon.tsv from it (~2.2 GB)
|
||||
# ALSO BE SURE to get the date of release and update R/aa_globals.R later!
|
||||
# 2. Go to https://lpsn.dsmz.de/downloads (register first) and download the latest
|
||||
# CSV file. It should be (re)named "taxonomy.csv". Their API unfortunately does
|
||||
# CSV file (~12,5 MB) as "taxonomy.csv". Their API unfortunately does
|
||||
# not include the full taxonomy and is currently (2022) pretty worthless.
|
||||
# 3. Set this folder_location to the path where these two files are:
|
||||
folder_location <- "~/Downloads/backbone/"
|
||||
@ -47,7 +47,9 @@ if (!file.exists(file_gbif)) stop("GBIF file not found")
|
||||
if (!file.exists(file_lpsn)) stop("LPSN file not found")
|
||||
|
||||
library(dplyr)
|
||||
library(vroom)
|
||||
library(vroom) # to import files
|
||||
library(rvest) # to scape LPSN website
|
||||
library(progress) # to show progress bars
|
||||
library(AMR)
|
||||
# also requires 'rvest' and 'progress' packages
|
||||
|
||||
@ -55,7 +57,7 @@ library(AMR)
|
||||
|
||||
get_author_year <- function(ref) {
|
||||
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
|
||||
|
||||
|
||||
authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
|
||||
authors2 <- gsub(" ?\\(Approved Lists [0-9]+\\) ?", " () ", authors2)
|
||||
authors2 <- gsub(" [)(]+ $", "", authors2)
|
||||
@ -63,21 +65,21 @@ get_author_year <- function(ref) {
|
||||
authors2 <- trimws(gsub("^[(](.*)[)]$", "\\1", authors2))
|
||||
# only take part after brackets if there's a name
|
||||
authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
|
||||
gsub(".*[)] (.*)", "\\1", authors2),
|
||||
authors2
|
||||
gsub(".*[)] (.*)", "\\1", authors2),
|
||||
authors2
|
||||
)
|
||||
# replace parentheses with emend. to get the latest authors
|
||||
authors2 <- gsub("(", " emend. ", authors2, fixed = TRUE)
|
||||
authors2 <- gsub(")", "", authors2, fixed = TRUE)
|
||||
authors2 <- gsub(" +", " ", authors2)
|
||||
authors2 <- trimws(authors2)
|
||||
|
||||
|
||||
# get year from last 4 digits
|
||||
lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
|
||||
# can never be later than now
|
||||
lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
|
||||
NA,
|
||||
lastyear
|
||||
NA,
|
||||
lastyear
|
||||
)
|
||||
# get authors without last year
|
||||
authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
|
||||
@ -109,8 +111,8 @@ get_author_year <- function(ref) {
|
||||
authors[nchar(authors) <= 3] <- ""
|
||||
# combine author and year if year is available
|
||||
ref <- ifelse(!is.na(lastyear),
|
||||
paste0(authors, ", ", lastyear),
|
||||
authors
|
||||
paste0(authors, ", ", lastyear),
|
||||
authors
|
||||
)
|
||||
# fix beginning and ending
|
||||
ref <- gsub(", $", "", ref)
|
||||
@ -118,7 +120,7 @@ get_author_year <- function(ref) {
|
||||
ref <- gsub("^(emend|et al.,?)", "", ref)
|
||||
ref <- trimws(ref)
|
||||
ref <- gsub("'", "", ref)
|
||||
|
||||
|
||||
# a lot start with a lowercase character - fix that
|
||||
ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
|
||||
# specific one for the French that are named dOrbigny
|
||||
@ -158,15 +160,15 @@ abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE
|
||||
# to retrieve LPSN and authors from LPSN website
|
||||
get_lpsn_and_author <- function(rank, name) {
|
||||
url <- paste0("https://lpsn.dsmz.de/", tolower(rank), "/", tolower(name))
|
||||
page_txt <- tryCatch(rvest::read_html(url), error = function(e) NULL)
|
||||
page_txt <- tryCatch(read_html(url), error = function(e) NULL)
|
||||
if (is.null(page_txt)) {
|
||||
warning("No LPSN found for ", tolower(rank), " '", name, "'")
|
||||
lpsn <- NA_character_
|
||||
ref <- NA_character_
|
||||
} else {
|
||||
page_txt <- page_txt %>%
|
||||
rvest::html_element("#detail-page") %>%
|
||||
rvest::html_text()
|
||||
html_element("#detail-page") %>%
|
||||
html_text()
|
||||
lpsn <- gsub(".*Record number:[\r\n\t ]*([0-9]+).*", "\\1", page_txt, perl = FALSE)
|
||||
ref <- page_txt %>%
|
||||
gsub(".*?Name: (.*[0-9]{4}?).*", "\\1", ., perl = FALSE) %>%
|
||||
@ -204,6 +206,23 @@ get_lpsn_and_author <- function(rank, name) {
|
||||
# Read GBIF data ----------------------------------------------------------
|
||||
|
||||
taxonomy_gbif.bak <- vroom(file_gbif)
|
||||
|
||||
include_fungal_orders <- c(
|
||||
"Eurotiales", "Microascales", "Mucorales", "Saccharomycetales",
|
||||
"Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"
|
||||
)
|
||||
# get latest taxonomic names of these fungal orders
|
||||
include_fungal_orders_ids <- taxonomy_gbif.bak %>%
|
||||
filter(order %in% include_fungal_orders)
|
||||
include_fungal_orders <- taxonomy_gbif.bak %>%
|
||||
filter(taxonID %in% c(include_fungal_orders_ids$taxonID, include_fungal_orders_ids$acceptedNameUsageID)) %>%
|
||||
distinct(order) %>%
|
||||
pull(order)
|
||||
|
||||
# check some columns to validate below filters
|
||||
sort(table(taxonomy_gbif.bak$taxonomicStatus))
|
||||
sort(table(taxonomy_gbif.bak$taxonRank))
|
||||
|
||||
taxonomy_gbif <- taxonomy_gbif.bak %>%
|
||||
# immediately filter rows we really never want
|
||||
filter(
|
||||
@ -273,10 +292,15 @@ taxonomy_gbif <- taxonomy_gbif.bak %>%
|
||||
sort(table(taxonomy_gbif$rank))
|
||||
sort(table(taxonomy_gbif$status))
|
||||
|
||||
taxonomy_gbif
|
||||
|
||||
# Read LPSN data ----------------------------------------------------------
|
||||
|
||||
taxonomy_lpsn.bak <- vroom(file_lpsn)
|
||||
|
||||
# check some columns to validate below filters
|
||||
sort(table(is.na(taxonomy_lpsn.bak$record_lnk))) # accepted = TRUE
|
||||
|
||||
taxonomy_lpsn <- taxonomy_lpsn.bak %>%
|
||||
transmute(
|
||||
genus = genus_name,
|
||||
@ -306,42 +330,44 @@ taxonomy_lpsn_missing <- tibble(
|
||||
genus = character(0)
|
||||
)
|
||||
for (page in LETTERS) {
|
||||
message("Downloading page ", page, appendLF = FALSE)
|
||||
# this will not alter `taxonomy_lpsn` yet
|
||||
message("Downloading page ", page, "...", appendLF = FALSE)
|
||||
url <- paste0("https://lpsn.dsmz.de/genus?page=", page)
|
||||
x <- xml2::read_html(url) %>%
|
||||
x <- read_html(url) %>%
|
||||
# class "main-list" is the main table
|
||||
rvest::html_element(".main-list") %>%
|
||||
html_element(".main-list") %>%
|
||||
# get every list element with a set <id> attribute
|
||||
rvest::html_elements("li[id]")
|
||||
html_elements("li[id]")
|
||||
for (i in seq_len(length(x))) {
|
||||
if (i %% 25 == 0) {
|
||||
message(".", appendLF = FALSE)
|
||||
}
|
||||
elements <- x[[i]] %>% rvest::html_elements("a")
|
||||
hrefs <- elements %>% rvest::html_attr("href")
|
||||
elements <- x[[i]] %>% html_elements("a")
|
||||
hrefs <- elements %>% html_attr("href")
|
||||
ranks <- hrefs %>% gsub(".*/(.*?)/.*", "\\1", .)
|
||||
names <- elements %>%
|
||||
rvest::html_text() %>%
|
||||
html_text() %>%
|
||||
gsub('"', "", ., fixed = TRUE)
|
||||
# no species, this must be until genus level
|
||||
hrefs <- hrefs[ranks != "species"]
|
||||
names <- names[ranks != "species"]
|
||||
ranks <- ranks[ranks != "species"]
|
||||
ranks[ranks == "domain"] <- "kingdom"
|
||||
|
||||
|
||||
df <- names %>%
|
||||
tibble() %>%
|
||||
t() %>%
|
||||
as_tibble() %>%
|
||||
as_tibble(.name_repair = "unique") %>%
|
||||
setNames(ranks) %>%
|
||||
# no candidates please
|
||||
filter(genus %unlike% "^(Candidatus|\\[)")
|
||||
|
||||
|
||||
taxonomy_lpsn_missing <- taxonomy_lpsn_missing %>%
|
||||
bind_rows(df)
|
||||
}
|
||||
message(length(x), " entries incl. candidates (cleaned total: ", nrow(taxonomy_lpsn_missing), ")")
|
||||
}
|
||||
taxonomy_lpsn_missing
|
||||
|
||||
taxonomy_lpsn <- taxonomy_lpsn %>%
|
||||
left_join(taxonomy_lpsn_missing, by = "genus") %>%
|
||||
@ -350,8 +376,8 @@ taxonomy_lpsn <- taxonomy_lpsn %>%
|
||||
mutate_all(function(x) ifelse(x %like_case% " no ", NA_character_, x))
|
||||
|
||||
taxonomy_lpsn.bak2 <- taxonomy_lpsn
|
||||
# add family
|
||||
pb <- progress::progress_bar$new(total = length(unique(taxonomy_lpsn$family)))
|
||||
# download family directly from LPSN website using scraping
|
||||
pb <- progress_bar$new(total = length(unique(taxonomy_lpsn$family)))
|
||||
for (f in unique(taxonomy_lpsn$family)) {
|
||||
pb$tick()
|
||||
if (is.na(f)) next
|
||||
@ -370,8 +396,8 @@ for (f in unique(taxonomy_lpsn$family)) {
|
||||
ref = unname(tax_info["ref"])
|
||||
))
|
||||
}
|
||||
# add order
|
||||
pb <- progress::progress_bar$new(total = length(unique(taxonomy_lpsn$order)))
|
||||
# download order directly from LPSN website using scraping
|
||||
pb <- progress_bar$new(total = length(unique(taxonomy_lpsn$order)))
|
||||
for (o in unique(taxonomy_lpsn$order)) {
|
||||
pb$tick()
|
||||
if (is.na(o)) next
|
||||
@ -389,8 +415,8 @@ for (o in unique(taxonomy_lpsn$order)) {
|
||||
ref = unname(tax_info["ref"])
|
||||
))
|
||||
}
|
||||
# add class
|
||||
pb <- progress::progress_bar$new(total = length(unique(taxonomy_lpsn$class)))
|
||||
# download class directly from LPSN website using scraping
|
||||
pb <- progress_bar$new(total = length(unique(taxonomy_lpsn$class)))
|
||||
for (cc in unique(taxonomy_lpsn$class)) {
|
||||
pb$tick()
|
||||
if (is.na(cc)) next
|
||||
@ -407,8 +433,8 @@ for (cc in unique(taxonomy_lpsn$class)) {
|
||||
ref = unname(tax_info["ref"])
|
||||
))
|
||||
}
|
||||
# add phylum
|
||||
pb <- progress::progress_bar$new(total = length(unique(taxonomy_lpsn$phylum)))
|
||||
# download phylum directly from LPSN website using scraping
|
||||
pb <- progress_bar$new(total = length(unique(taxonomy_lpsn$phylum)))
|
||||
for (p in unique(taxonomy_lpsn$phylum)) {
|
||||
pb$tick()
|
||||
if (is.na(p)) next
|
||||
@ -424,8 +450,8 @@ for (p in unique(taxonomy_lpsn$phylum)) {
|
||||
ref = unname(tax_info["ref"])
|
||||
))
|
||||
}
|
||||
# add kingdom
|
||||
pb <- progress::progress_bar$new(total = length(unique(taxonomy_lpsn$kingdom)))
|
||||
# download kingdom directly from LPSN website using scraping
|
||||
pb <- progress_bar$new(total = length(unique(taxonomy_lpsn$kingdom)))
|
||||
for (k in unique(taxonomy_lpsn$kingdom)) {
|
||||
pb$tick()
|
||||
if (is.na(k)) next
|
||||
@ -450,41 +476,41 @@ sort(table(taxonomy_lpsn$status))
|
||||
|
||||
saveRDS(taxonomy_gbif, "data-raw/taxonomy_gbif.rds", version = 2)
|
||||
saveRDS(taxonomy_lpsn, "data-raw/taxonomy_lpsn.rds", version = 2)
|
||||
|
||||
# this allows to always get back to this point by simply loading the files from data-raw/.
|
||||
|
||||
# Add full names ----------------------------------------------------------
|
||||
|
||||
taxonomy_gbif <- taxonomy_gbif %>%
|
||||
# clean NAs and add fullname
|
||||
mutate(across(kingdom:subspecies, function(x) ifelse(is.na(x), "", x)),
|
||||
fullname = trimws(case_when(
|
||||
rank == "family" ~ family,
|
||||
rank == "order" ~ order,
|
||||
rank == "class" ~ class,
|
||||
rank == "phylum" ~ phylum,
|
||||
rank == "kingdom" ~ kingdom,
|
||||
TRUE ~ paste(genus, species, subspecies)
|
||||
)), .before = 1
|
||||
fullname = trimws(case_when(
|
||||
rank == "family" ~ family,
|
||||
rank == "order" ~ order,
|
||||
rank == "class" ~ class,
|
||||
rank == "phylum" ~ phylum,
|
||||
rank == "kingdom" ~ kingdom,
|
||||
TRUE ~ paste(genus, species, subspecies)
|
||||
)), .before = 1
|
||||
) %>%
|
||||
# keep only one GBIF taxon ID per full name
|
||||
arrange(fullname, gbif) %>%
|
||||
distinct(kingdom, fullname, .keep_all = TRUE)
|
||||
distinct(kingdom, rank, fullname, .keep_all = TRUE)
|
||||
|
||||
taxonomy_lpsn <- taxonomy_lpsn %>%
|
||||
# clean NAs and add fullname
|
||||
mutate(across(kingdom:subspecies, function(x) ifelse(is.na(x), "", x)),
|
||||
fullname = trimws(case_when(
|
||||
rank == "family" ~ family,
|
||||
rank == "order" ~ order,
|
||||
rank == "class" ~ class,
|
||||
rank == "phylum" ~ phylum,
|
||||
rank == "kingdom" ~ kingdom,
|
||||
TRUE ~ paste(genus, species, subspecies)
|
||||
)), .before = 1
|
||||
fullname = trimws(case_when(
|
||||
rank == "family" ~ family,
|
||||
rank == "order" ~ order,
|
||||
rank == "class" ~ class,
|
||||
rank == "phylum" ~ phylum,
|
||||
rank == "kingdom" ~ kingdom,
|
||||
TRUE ~ paste(genus, species, subspecies)
|
||||
)), .before = 1
|
||||
) %>%
|
||||
# keep only one LPSN record ID per full name
|
||||
arrange(fullname, lpsn) %>%
|
||||
distinct(kingdom, fullname, .keep_all = TRUE)
|
||||
distinct(kingdom, rank, fullname, .keep_all = TRUE)
|
||||
|
||||
# set parent LPSN IDs, requires full name
|
||||
taxonomy_lpsn$lpsn_parent[taxonomy_lpsn$rank == "phylum"] <- taxonomy_lpsn$lpsn[match(taxonomy_lpsn$kingdom[taxonomy_lpsn$rank == "phylum"], taxonomy_lpsn$fullname)]
|
||||
@ -502,17 +528,18 @@ taxonomy_lpsn$lpsn_parent[taxonomy_lpsn$rank == "subspecies"] <- taxonomy_lpsn$l
|
||||
taxonomy <- taxonomy_lpsn %>%
|
||||
# join GBIF identifiers to them
|
||||
left_join(taxonomy_gbif %>% select(kingdom, fullname, starts_with("gbif")),
|
||||
by = c("kingdom", "fullname")
|
||||
by = c("kingdom", "fullname")
|
||||
)
|
||||
|
||||
# for everything else, add the GBIF data
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(taxonomy_gbif %>%
|
||||
filter(!paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname))) %>%
|
||||
filter(!paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname))) %>%
|
||||
arrange(fullname) %>%
|
||||
filter(fullname != "")
|
||||
|
||||
# fix rank
|
||||
table(taxonomy$rank, useNA = "always")
|
||||
taxonomy <- taxonomy %>%
|
||||
mutate(rank = case_when(
|
||||
subspecies != "" ~ "subspecies",
|
||||
@ -525,7 +552,6 @@ taxonomy <- taxonomy %>%
|
||||
kingdom != "" ~ "kingdom",
|
||||
TRUE ~ NA_character_
|
||||
))
|
||||
|
||||
table(taxonomy$rank, useNA = "always")
|
||||
|
||||
# get the latest upper taxonomy from LPSN to update the GBIF data
|
||||
@ -572,13 +598,14 @@ taxonomy <- taxonomy %>%
|
||||
) %>%
|
||||
filter(!paste(kingdom, rank) %in% paste(taxonomy$kingdom, taxonomy$rank)) %>%
|
||||
left_join(current_gbif %>%
|
||||
select(kingdom, rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank")
|
||||
select(kingdom, rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank")
|
||||
) %>%
|
||||
mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
)
|
||||
|
||||
# 2 = phylum ... 6 = genus
|
||||
taxonomy_all_missing <- NULL
|
||||
for (i in 2:6) {
|
||||
i_name <- colnames(taxonomy)[i + 1]
|
||||
message("Adding missing: ", i_name, "... ", appendLF = FALSE)
|
||||
@ -592,84 +619,77 @@ for (i in 2:6) {
|
||||
status = "accepted",
|
||||
source = "manually added"
|
||||
) %>%
|
||||
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank)) # %>%
|
||||
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank)) %>%
|
||||
# get GBIF identifier where available
|
||||
# left_join(current_gbif %>%
|
||||
# select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
# by = c("kingdom", "rank", i_name)
|
||||
# ) %>%
|
||||
# mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
left_join(current_gbif %>%
|
||||
select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank", i_name)
|
||||
) %>%
|
||||
mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
message("n = ", nrow(to_add))
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(to_add)
|
||||
}
|
||||
|
||||
# FIX LATER: added missings after finding out still some taxonomic levels were missing
|
||||
# this should not be needed - it was the only part that was required after last update
|
||||
# can now be removed? Check with next update!
|
||||
new_df <- AMR::microorganisms[0, ]
|
||||
for (tax in c("phylum", "class", "order", "family", "genus")) {
|
||||
print(tax)
|
||||
out <- AMR::microorganisms %>%
|
||||
pull(tax) %>%
|
||||
unique()
|
||||
missing <- vapply(FUN.VALUE = logical(1), out, function(x) length(which(AMR::microorganisms[[tax]] == x & AMR::microorganisms$rank == tax)) == 0)
|
||||
missing <- names(missing)[which(missing == TRUE & names(missing) != "" & names(missing) %unlike% "unknown")]
|
||||
out <- microorganisms %>%
|
||||
filter(.[[tax]] %in% missing) %>%
|
||||
distinct(.[[tax]], .keep_all = TRUE) %>%
|
||||
mutate_at(vars((which(colnames(.) == tax) + 1):subspecies), ~"") %>%
|
||||
mutate_at(vars(lpsn:gbif_renamed_to), ~NA_character_) %>%
|
||||
mutate(
|
||||
rank = tax,
|
||||
ref = NA_character_,
|
||||
status = "accepted",
|
||||
fullname = .[[tax]],
|
||||
source = "manually added",
|
||||
snomed = rep(list(character(0)), nrow(.))
|
||||
)
|
||||
new_df <- bind_rows(new_df, out)
|
||||
if (".[[tax]]" %in% colnames(new_df)) {
|
||||
new_df <- new_df %>% select(-`.[[tax]]`)
|
||||
if (is.null(taxonomy_all_missing)) {
|
||||
taxonomy_all_missing <- to_add
|
||||
} else {
|
||||
taxonomy_all_missing <- taxonomy_all_missing %>%
|
||||
bind_rows(to_add)
|
||||
}
|
||||
}
|
||||
new_df <- new_df %>%
|
||||
mutate(mo = as.character(mo))
|
||||
taxonomy_all_missing %>% View()
|
||||
|
||||
new_mo <- new_df %>%
|
||||
filter(rank == "family") %>%
|
||||
mutate(
|
||||
mo_rank_new8 = abbreviate_mo(family, minlength = 8, prefix = "[FAM]_"),
|
||||
mo_rank_new9 = abbreviate_mo(family, minlength = 9, prefix = "[FAM]_"),
|
||||
mo_rank_new = mo_rank_new8,
|
||||
mo_duplicated = duplicated(mo_rank_new),
|
||||
mo_rank_new = ifelse(mo_duplicated, mo_rank_new9, mo_rank_new),
|
||||
mo_duplicated = duplicated(mo_rank_new)
|
||||
) %>%
|
||||
transmute(fullname, mo_rank_new = paste0(gsub("_.*", "_", as.character(mo)), mo_rank_new))
|
||||
any(new_mo$mo_rank_new %in% microorganisms$mo)
|
||||
new_df[which(new_df$fullname %in% new_mo$fullname), "mo"] <- new_mo$mo_rank_new
|
||||
|
||||
|
||||
# species (requires combination with genus)
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(taxonomy %>%
|
||||
filter(species != "") %>%
|
||||
distinct(kingdom, genus, species, .keep_all = TRUE) %>%
|
||||
select(kingdom:species) %>%
|
||||
mutate(
|
||||
fullname = paste(genus, species),
|
||||
rank = "species",
|
||||
status = "accepted",
|
||||
source = "manually added"
|
||||
) %>%
|
||||
filter(!paste(kingdom, genus, species, rank) %in% paste(taxonomy$kingdom, taxonomy$genus, taxonomy$species, taxonomy$rank)) %>%
|
||||
# get GBIF identifier where available
|
||||
left_join(current_gbif %>%
|
||||
select(kingdom, genus, species = specificEpithet, rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank", "genus", "species")
|
||||
) %>%
|
||||
mutate(source = ifelse(!is.na(gbif), "GBIF", source)))
|
||||
bind_rows(taxonomy_all_missing)
|
||||
|
||||
# we need to fix parent GBIF identifiers
|
||||
taxonomy$gbif_parent[taxonomy$rank == "phylum" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$kingdom[taxonomy$rank == "phylum" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "class" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$phylum[taxonomy$rank == "class" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "order" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$class[taxonomy$rank == "order" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "family" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$order[taxonomy$rank == "family" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "genus" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$family[taxonomy$rank == "genus" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "species" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(taxonomy$genus[taxonomy$rank == "species" & !is.na(taxonomy$gbif)], taxonomy$fullname)]
|
||||
taxonomy$gbif_parent[taxonomy$rank == "subspecies" & !is.na(taxonomy$gbif)] <- taxonomy$gbif[match(paste(taxonomy$genus[taxonomy$rank == "subspecies" & !is.na(taxonomy$gbif)], taxonomy$species[taxonomy$rank == "subspecies" & !is.na(taxonomy$gbif)]), taxonomy$fullname)]
|
||||
|
||||
# these still have no record in our data set:
|
||||
all(taxonomy$lpsn_parent %in% taxonomy$lpsn)
|
||||
all(taxonomy$gbif_parent %in% taxonomy$gbif)
|
||||
|
||||
# fix for duplicate fullnames within a kingdom (such as Nitrospira which is the name of the genus AND its class)
|
||||
taxonomy <- taxonomy %>%
|
||||
mutate(rank_index = case_when(rank == "subspecies" ~ 1,
|
||||
rank == "species" ~ 2,
|
||||
rank == "genus" ~ 3,
|
||||
rank == "family" ~ 4,
|
||||
rank == "order" ~ 5,
|
||||
rank == "class" ~ 6,
|
||||
TRUE ~ 7),
|
||||
fullname_rank = paste0(fullname, " {", rank, "}")) %>%
|
||||
arrange(kingdom, fullname, rank_index) %>%
|
||||
group_by(kingdom, fullname) %>%
|
||||
mutate(fullname = if_else(row_number() > 1, fullname_rank, fullname)) %>%
|
||||
ungroup() %>%
|
||||
select(-fullname_rank) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# now also add missing species (requires combination with genus)
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(
|
||||
taxonomy %>%
|
||||
filter(species != "") %>%
|
||||
distinct(kingdom, genus, species, .keep_all = TRUE) %>%
|
||||
select(kingdom:species) %>%
|
||||
mutate(
|
||||
fullname = paste(genus, species),
|
||||
rank = "species",
|
||||
status = "accepted",
|
||||
source = "manually added"
|
||||
) %>%
|
||||
filter(!paste(kingdom, genus, species, rank) %in% paste(taxonomy$kingdom, taxonomy$genus, taxonomy$species, taxonomy$rank)) %>%
|
||||
# get GBIF identifier where available
|
||||
left_join(current_gbif %>%
|
||||
select(kingdom, genus, species = specificEpithet, rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
|
||||
by = c("kingdom", "rank", "genus", "species")
|
||||
) %>%
|
||||
mutate(source = ifelse(!is.na(gbif), "GBIF", source))
|
||||
)
|
||||
|
||||
|
||||
# remove NAs from taxonomy again, and keep unique full names
|
||||
@ -678,15 +698,16 @@ taxonomy <- taxonomy %>%
|
||||
distinct(kingdom, fullname, .keep_all = TRUE) %>%
|
||||
filter(kingdom != "")
|
||||
|
||||
|
||||
# Save intermediate results -----------------------------------------------
|
||||
|
||||
saveRDS(taxonomy, "data-raw/taxonomy.rds")
|
||||
saveRDS(taxonomy, "data-raw/taxonomy1.rds")
|
||||
|
||||
|
||||
# Get previously manually added entries -----------------------------------
|
||||
|
||||
manually_added <- AMR::microorganisms %>%
|
||||
filter(source == "manually added", !fullname %in% taxonomy$fullname) %>%
|
||||
filter(source == "manually added", !paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname)) %>%
|
||||
select(fullname:subspecies, ref, source, rank)
|
||||
|
||||
# get latest taxonomy for those entries
|
||||
@ -703,15 +724,14 @@ for (cc in unique(manually_added$class[manually_added$class != "" & manually_add
|
||||
manually_added$phylum[which(manually_added$class == cc)] <- taxonomy$phylum[which(taxonomy$class == cc & is.na(taxonomy$lpsn))][1]
|
||||
}
|
||||
|
||||
# previously required:
|
||||
taxonomy$ref[which(taxonomy$genus == "Streptococcus" & taxonomy$species %like% "group")] <- "Lancefield, 1933"
|
||||
manually_added <- manually_added %>%
|
||||
mutate(
|
||||
status = "accepted",
|
||||
rank = ifelse(fullname %like% "unknown", "(unknown rank)", rank)
|
||||
)
|
||||
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(manually_added %>%
|
||||
mutate(
|
||||
status = "accepted",
|
||||
rank = ifelse(fullname %like% "unknown", "(unknown rank)", rank)
|
||||
)) %>%
|
||||
bind_rows(manually_added) %>%
|
||||
arrange(fullname)
|
||||
|
||||
table(taxonomy$rank, useNA = "always")
|
||||
@ -741,11 +761,63 @@ taxonomy <- taxonomy %>%
|
||||
"Actinobacteria", # old, now Actinomycetota
|
||||
"Actinomycetota"
|
||||
) |
|
||||
genus %in% MO_PREVALENT_GENERA)
|
||||
genus %in% AMR:::MO_PREVALENT_GENERA)
|
||||
~ 2,
|
||||
TRUE ~ 3
|
||||
))
|
||||
table(taxonomy$prevalence, useNA = "always")
|
||||
# (a lot will be removed further below)
|
||||
|
||||
|
||||
# Add old entries that must be kept ---------------------------------------
|
||||
|
||||
# these are bacteria now removed, but have an renamed-to identifier to a current record, so add them
|
||||
old_to_keep <- microorganisms %>%
|
||||
filter(!paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname),
|
||||
kingdom == "Bacteria",
|
||||
(gbif_renamed_to %in% taxonomy$gbif & !is.na(gbif_renamed_to)) | (lpsn_renamed_to %in% taxonomy$lpsn & !is.na(lpsn_renamed_to)),
|
||||
rank %in% c("genus", "species", "subspecies")) %>%
|
||||
select(-mo, -snomed)
|
||||
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(old_to_keep) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# and these had prevalence = 1, why do they miss now?
|
||||
old_to_keep2 <- microorganisms %>%
|
||||
filter(!paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname),
|
||||
prevalence == 1,
|
||||
!is.na(gbif_parent) & gbif_parent %in% taxonomy$gbif,
|
||||
rank %in% c("genus", "species", "subspecies")) %>%
|
||||
select(-mo, -snomed)
|
||||
|
||||
taxonomy <- taxonomy %>%
|
||||
bind_rows(old_to_keep2) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# strangly, Trichomonas is no longer in GBIF?
|
||||
old_to_keep3 <- microorganisms %>%
|
||||
filter(fullname %like% "^trichomona") %>%
|
||||
select(-mo, -snomed)
|
||||
|
||||
taxonomy <- taxonomy %>%
|
||||
filter(!fullname %in% old_to_keep3$fullname) %>%
|
||||
bind_rows(old_to_keep3) %>%
|
||||
arrange(fullname)
|
||||
|
||||
# fix rank
|
||||
taxonomy <- taxonomy %>%
|
||||
mutate(rank = case_when(
|
||||
subspecies != "" ~ "subspecies",
|
||||
species != "" ~ "species",
|
||||
genus != "" ~ "genus",
|
||||
family != "" ~ "family",
|
||||
order != "" ~ "order",
|
||||
class != "" ~ "class",
|
||||
phylum != "" ~ "phylum",
|
||||
kingdom != "" ~ "kingdom",
|
||||
TRUE ~ NA_character_
|
||||
))
|
||||
|
||||
|
||||
# Add microbial IDs -------------------------------------------------------
|
||||
@ -773,12 +845,12 @@ mo_phylum <- taxonomy %>%
|
||||
filter(rank == "phylum") %>%
|
||||
distinct(kingdom, phylum) %>%
|
||||
left_join(AMR::microorganisms %>%
|
||||
filter(rank == "phylum") %>%
|
||||
transmute(kingdom,
|
||||
phylum = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "phylum")
|
||||
filter(rank == "phylum") %>%
|
||||
transmute(kingdom,
|
||||
phylum = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "phylum")
|
||||
) %>%
|
||||
group_by(kingdom) %>%
|
||||
mutate(
|
||||
@ -799,12 +871,12 @@ mo_class <- taxonomy %>%
|
||||
filter(rank == "class") %>%
|
||||
distinct(kingdom, class) %>%
|
||||
left_join(AMR::microorganisms %>%
|
||||
filter(rank == "class") %>%
|
||||
transmute(kingdom,
|
||||
class = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "class")
|
||||
filter(rank == "class") %>%
|
||||
transmute(kingdom,
|
||||
class = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "class")
|
||||
) %>%
|
||||
group_by(kingdom) %>%
|
||||
mutate(
|
||||
@ -825,12 +897,12 @@ mo_order <- taxonomy %>%
|
||||
filter(rank == "order") %>%
|
||||
distinct(kingdom, order) %>%
|
||||
left_join(AMR::microorganisms %>%
|
||||
filter(rank == "order") %>%
|
||||
transmute(kingdom,
|
||||
order = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "order")
|
||||
filter(rank == "order") %>%
|
||||
transmute(kingdom,
|
||||
order = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "order")
|
||||
) %>%
|
||||
group_by(kingdom) %>%
|
||||
mutate(
|
||||
@ -851,12 +923,12 @@ mo_family <- taxonomy %>%
|
||||
filter(rank == "family") %>%
|
||||
distinct(kingdom, family) %>%
|
||||
left_join(AMR::microorganisms %>%
|
||||
filter(rank == "family") %>%
|
||||
transmute(kingdom,
|
||||
family = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "family")
|
||||
filter(rank == "family") %>%
|
||||
transmute(kingdom,
|
||||
family = fullname,
|
||||
mo_old = gsub("[A-Z]{1,2}_", "", as.character(mo))
|
||||
),
|
||||
by = c("kingdom", "family")
|
||||
) %>%
|
||||
group_by(kingdom) %>%
|
||||
mutate(
|
||||
@ -878,10 +950,10 @@ mo_genus <- taxonomy %>%
|
||||
distinct(kingdom, genus) %>%
|
||||
# get available old MO codes
|
||||
left_join(AMR::microorganisms %>%
|
||||
filter(rank == "genus") %>%
|
||||
transmute(mo_genus_old = gsub("^[A-Z]+_", "", as.character(mo)), kingdom, genus) %>%
|
||||
distinct(kingdom, genus, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus")
|
||||
filter(rank == "genus") %>%
|
||||
transmute(mo_genus_old = gsub("^[A-Z]+_", "", as.character(mo)), kingdom, genus) %>%
|
||||
distinct(kingdom, genus, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus")
|
||||
) %>%
|
||||
distinct(kingdom, genus, .keep_all = TRUE) %>%
|
||||
# since kingdom is part of the code, genus abbreviations may be duplicated between kingdoms
|
||||
@ -924,11 +996,11 @@ mo_species <- taxonomy %>%
|
||||
filter(rank == "species") %>%
|
||||
distinct(kingdom, genus, species) %>%
|
||||
left_join(microorganisms %>%
|
||||
filter(rank == "species") %>%
|
||||
transmute(mo_species_old = gsub("^[A-Z]+_[A-Z]+_", "", as.character(mo)), kingdom, genus, species) %>%
|
||||
filter(mo_species_old %unlike% "-") %>%
|
||||
distinct(kingdom, genus, species, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus", "species")
|
||||
filter(rank == "species") %>%
|
||||
transmute(mo_species_old = gsub("^[A-Z]+_[A-Z]+_", "", as.character(mo)), kingdom, genus, species) %>%
|
||||
filter(mo_species_old %unlike% "-") %>%
|
||||
distinct(kingdom, genus, species, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus", "species")
|
||||
) %>%
|
||||
distinct(kingdom, genus, species, .keep_all = TRUE) %>%
|
||||
group_by(kingdom, genus) %>%
|
||||
@ -972,11 +1044,11 @@ mo_subspecies <- taxonomy %>%
|
||||
filter(rank == "subspecies") %>%
|
||||
distinct(kingdom, genus, species, subspecies) %>%
|
||||
left_join(microorganisms %>%
|
||||
filter(rank %in% c("subspecies", "subsp.", "infraspecies")) %>%
|
||||
transmute(mo_subspecies_old = gsub("^[A-Z]+_[A-Z]+_[A-Z]+_", "", as.character(mo)), kingdom, genus, species, subspecies) %>%
|
||||
filter(mo_subspecies_old %unlike% "-") %>%
|
||||
distinct(kingdom, genus, species, subspecies, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus", "species", "subspecies")
|
||||
filter(rank %in% c("subspecies", "subsp.", "infraspecies")) %>%
|
||||
transmute(mo_subspecies_old = gsub("^[A-Z]+_[A-Z]+_[A-Z]+_", "", as.character(mo)), kingdom, genus, species, subspecies) %>%
|
||||
filter(mo_subspecies_old %unlike% "-") %>%
|
||||
distinct(kingdom, genus, species, subspecies, .keep_all = TRUE),
|
||||
by = c("kingdom", "genus", "species", "subspecies")
|
||||
) %>%
|
||||
distinct(kingdom, genus, species, subspecies, .keep_all = TRUE) %>%
|
||||
group_by(kingdom, genus, species) %>%
|
||||
@ -1050,12 +1122,29 @@ taxonomy <- taxonomy %>%
|
||||
arrange(fullname)
|
||||
|
||||
# now check these - e.g. Nitrospira is the name of a genus AND its class
|
||||
taxonomy %>% filter(fullname %in% .[duplicated(fullname), "fullname", drop = TRUE])
|
||||
taxonomy %>% filter(fullname %in% .[duplicated(fullname), "fullname", drop = TRUE]) %>% View()
|
||||
taxonomy <- taxonomy %>%
|
||||
distinct(fullname, .keep_all = TRUE)
|
||||
mutate(rank_index = case_when(kingdom == "Bacteria" ~ 1,
|
||||
kingdom == "Fungi" ~ 2,
|
||||
kingdom == "Protozoa" ~ 3,
|
||||
kingdom == "Archaea" ~ 4,
|
||||
TRUE ~ 5)) %>%
|
||||
arrange(fullname, rank_index) %>%
|
||||
distinct(fullname, .keep_all = TRUE) %>%
|
||||
select(-rank_index) %>%
|
||||
filter(mo != "")
|
||||
|
||||
# This must not exist:
|
||||
taxonomy %>% filter(mo %like% "__")
|
||||
# this must not exist:
|
||||
taxonomy %>% filter(mo %like% "__") %>% View()
|
||||
taxonomy <- taxonomy %>% filter(mo %unlike% "__")
|
||||
|
||||
# this must be empty of course
|
||||
taxonomy %>% filter(mo %in% .[duplicated(mo), "mo", drop = TRUE]) %>% View()
|
||||
taxonomy <- taxonomy %>% distinct(mo, .keep_all = TRUE)
|
||||
|
||||
# Save intermediate results -----------------------------------------------
|
||||
|
||||
saveRDS(taxonomy, "data-raw/taxonomy2.rds")
|
||||
|
||||
|
||||
# Remove unwanted taxonomic entries from Protoza/Fungi --------------------
|
||||
@ -1067,15 +1156,33 @@ taxonomy <- taxonomy %>%
|
||||
!(phylum %in% c("Choanozoa", "Mycetozoa") & prevalence == 3),
|
||||
# Fungi:
|
||||
!(phylum %in% c("Ascomycota", "Zygomycota", "Basidiomycota") & prevalence == 3),
|
||||
!(genus %in% c("Phoma", "Leptosphaeria") & rank %in% c("species", "subspecies")), # only genus of this rare fungus, with resp. 1300 and 800 species
|
||||
!(genus %in% c("Phoma", "Leptosphaeria", "Physarum") & rank %in% c("species", "subspecies")), # only genus of this rare fungus, with resp. 1300 and 800 species
|
||||
# (leave Alternaria in there, part of human mycobiome and opportunistic pathogen)
|
||||
# Animalia:
|
||||
!genus %in% c("Lucilia", "Lumbricus"),
|
||||
!(class == "Insecta" & rank %in% c("species", "subspecies")), # keep only genus of insects
|
||||
!(genus == "Amoeba" & kingdom == "Animalia"),
|
||||
!(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species
|
||||
kingdom != "Plantae"
|
||||
) # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
|
||||
|
||||
message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n")
|
||||
# no ghost families, orders classes, phyla
|
||||
taxonomy <- taxonomy %>%
|
||||
group_by(kingdom, family) %>% filter(n() > 1 | fullname %like% "unknown" | rank == "kingdom") %>%
|
||||
group_by(kingdom, order) %>% filter(n() > 1 | fullname %like% "unknown" | rank == "kingdom") %>%
|
||||
group_by(kingdom, class) %>% filter(n() > 1 | fullname %like% "unknown" | rank == "kingdom") %>%
|
||||
group_by(kingdom, phylum) %>% filter(n() > 1 | fullname %like% "unknown" | rank == "kingdom") %>%
|
||||
ungroup()
|
||||
|
||||
|
||||
message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n",
|
||||
"This was ", format(nrow(microorganisms), big.mark = ","), " rows.\n")
|
||||
|
||||
# these are the new ones:
|
||||
taxonomy %>% filter(!paste(kingdom, fullname) %in% paste(microorganisms$kingdom, microorganisms$fullname)) %>% View()
|
||||
# these were removed:
|
||||
microorganisms %>% filter(!paste(kingdom, fullname) %in% paste(taxonomy$kingdom, taxonomy$fullname)) %>% View()
|
||||
microorganisms %>% filter(!fullname %in% taxonomy$fullname) %>% View()
|
||||
|
||||
|
||||
# Add SNOMED CT -----------------------------------------------------------
|
||||
@ -1121,15 +1228,13 @@ taxonomy <- taxonomy %>%
|
||||
# set class <mo>
|
||||
class(taxonomy$mo) <- c("mo", "character")
|
||||
|
||||
# Moraxella catarrhalis was named Branhamella catarrhalis (Catlin, 1970), but this is unaccepted in clinical microbiology
|
||||
# we keep them both
|
||||
taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")]
|
||||
taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")]
|
||||
taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")] <- "accepted"
|
||||
taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")] <- NA_character_
|
||||
|
||||
taxonomy <- taxonomy %>%
|
||||
AMR:::dataset_UTF8_to_ASCII()
|
||||
### this was previously needed?? Since 2022 M. catarrhalis seems to be "accepted" again
|
||||
# # Moraxella catarrhalis was named Branhamella catarrhalis (Catlin, 1970), but this is unaccepted in clinical microbiology
|
||||
# # we keep them both
|
||||
# taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")]
|
||||
# taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")]
|
||||
# taxonomy$status[which(taxonomy$fullname == "Moraxella catarrhalis")] <- "accepted"
|
||||
# taxonomy$lpsn_renamed_to[which(taxonomy$fullname == "Moraxella catarrhalis")] <- NA_character_
|
||||
|
||||
|
||||
# Save to package ---------------------------------------------------------
|
||||
@ -1138,7 +1243,7 @@ microorganisms <- taxonomy
|
||||
usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(microorganisms)
|
||||
|
||||
# DON'T FORGET TO UPDATE R/globals.R!
|
||||
# DON'T FORGET TO UPDATE R/_globals.R!
|
||||
|
||||
|
||||
# Test updates ------------------------------------------------------------
|
||||
@ -1154,29 +1259,34 @@ devtools::load_all(".")
|
||||
|
||||
|
||||
# reset previously changed mo codes
|
||||
rsi_translation$mo <- as.mo(rsi_translation$mo, language = NULL)
|
||||
usethis::use_data(rsi_translation, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(rsi_translation)
|
||||
if (!identical(rsi_translation$mo, as.mo(rsi_translation$mo, language = NULL))) {
|
||||
rsi_translation$mo <- as.mo(rsi_translation$mo, language = NULL)
|
||||
usethis::use_data(rsi_translation, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(rsi_translation)
|
||||
}
|
||||
|
||||
microorganisms.codes$mo <- as.mo(microorganisms.codes$mo, language = NULL)
|
||||
# new NAs introduced?
|
||||
any(is.na(microorganisms.codes$mo))
|
||||
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(microorganisms.codes)
|
||||
if (!identical(microorganisms.codes$mo, as.mo(microorganisms.codes$mo, language = NULL))) {
|
||||
microorganisms.codes <- microorganisms.codes %>% filter(mo %in% microorganisms$mo)
|
||||
microorganisms.codes$mo <- as.mo(microorganisms.codes$mo, language = NULL)
|
||||
usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(microorganisms.codes)
|
||||
}
|
||||
|
||||
example_isolates$mo <- as.mo(example_isolates$mo, language = NULL)
|
||||
usethis::use_data(example_isolates, overwrite = TRUE, version = 2)
|
||||
rm(example_isolates)
|
||||
|
||||
intrinsic_resistant$microorganism <- suppressMessages(mo_name(intrinsic_resistant$microorganism))
|
||||
usethis::use_data(intrinsic_resistant, overwrite = TRUE, version = 2)
|
||||
rm(intrinsic_resistant)
|
||||
if (!identical(example_isolates$mo, as.mo(example_isolates$mo, language = NULL))) {
|
||||
example_isolates$mo <- as.mo(example_isolates$mo, language = NULL)
|
||||
usethis::use_data(example_isolates, overwrite = TRUE, version = 2)
|
||||
rm(example_isolates)
|
||||
}
|
||||
|
||||
# load new data sets again
|
||||
devtools::load_all(".")
|
||||
source("data-raw/_pre_commit_hook.R")
|
||||
devtools::load_all(".")
|
||||
|
||||
if (!identical(intrinsic_resistant$mo, as.mo(intrinsic_resistant$mo, language = NULL))) {
|
||||
stop("Run data-raw/reproduction_of_intrinsic_resistant.R again")
|
||||
}
|
||||
|
||||
|
||||
# run the unit tests
|
||||
Sys.setenv(NOT_CRAN = "true")
|
||||
|
Binary file not shown.
BIN
data-raw/taxonomy1.rds
Normal file
BIN
data-raw/taxonomy1.rds
Normal file
Binary file not shown.
BIN
data-raw/taxonomy2.rds
Normal file
BIN
data-raw/taxonomy2.rds
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user