1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-12 17:01:57 +02:00

completely updated antibiotics

This commit is contained in:
2022-10-30 14:31:45 +01:00
parent 3d9faf89cf
commit d40e0ef20b
60 changed files with 102028 additions and 95116 deletions

View File

@ -592,13 +592,13 @@ for (i in 2:6) {
status = "accepted",
source = "manually added"
) %>%
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank))# %>%
# get GBIF identifier where available
# left_join(current_gbif %>%
# select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
# by = c("kingdom", "rank", i_name)
# ) %>%
# mutate(source = ifelse(!is.na(gbif), "GBIF", source))
filter(!paste(kingdom, .[[ncol(.) - 4]], rank) %in% paste(taxonomy$kingdom, taxonomy[[i + 1]], taxonomy$rank)) # %>%
# get GBIF identifier where available
# left_join(current_gbif %>%
# select(kingdom, all_of(i_name), rank = taxonRank, ref = scientificNameAuthorship, gbif = taxonID, gbif_parent = parentNameUsageID),
# by = c("kingdom", "rank", i_name)
# ) %>%
# mutate(source = ifelse(!is.na(gbif), "GBIF", source))
message("n = ", nrow(to_add))
taxonomy <- taxonomy %>%
bind_rows(to_add)
@ -610,7 +610,9 @@ for (i in 2:6) {
new_df <- AMR::microorganisms[0, ]
for (tax in c("phylum", "class", "order", "family", "genus")) {
print(tax)
out <- AMR::microorganisms %>% pull(tax) %>% unique()
out <- AMR::microorganisms %>%
pull(tax) %>%
unique()
missing <- vapply(FUN.VALUE = logical(1), out, function(x) length(which(AMR::microorganisms[[tax]] == x & AMR::microorganisms$rank == tax)) == 0)
missing <- names(missing)[which(missing == TRUE & names(missing) != "" & names(missing) %unlike% "unknown")]
out <- microorganisms %>%
@ -618,18 +620,20 @@ for (tax in c("phylum", "class", "order", "family", "genus")) {
distinct(.[[tax]], .keep_all = TRUE) %>%
mutate_at(vars((which(colnames(.) == tax) + 1):subspecies), ~"") %>%
mutate_at(vars(lpsn:gbif_renamed_to), ~NA_character_) %>%
mutate(rank = tax,
ref = NA_character_,
status = "accepted",
fullname = .[[tax]],
source = "manually added",
snomed = rep(list(character(0)), nrow(.)))
mutate(
rank = tax,
ref = NA_character_,
status = "accepted",
fullname = .[[tax]],
source = "manually added",
snomed = rep(list(character(0)), nrow(.))
)
new_df <- bind_rows(new_df, out)
if (".[[tax]]" %in% colnames(new_df)) {
new_df <- new_df %>% select(-`.[[tax]]`)
}
}
new_df <- new_df %>%
new_df <- new_df %>%
mutate(mo = as.character(mo))
new_mo <- new_df %>%
@ -645,7 +649,7 @@ new_mo <- new_df %>%
transmute(fullname, mo_rank_new = paste0(gsub("_.*", "_", as.character(mo)), mo_rank_new))
any(new_mo$mo_rank_new %in% microorganisms$mo)
new_df[which(new_df$fullname %in% new_mo$fullname), "mo"] <- new_mo$mo_rank_new
# species (requires combination with genus)
taxonomy <- taxonomy %>%
@ -1041,11 +1045,11 @@ taxonomy <- taxonomy %>%
.before = 1
) %>%
select(!starts_with("mo_")) %>%
arrange(fullname)
arrange(fullname)
# now check these - e.g. Nitrospira is the name of a genus AND its class
taxonomy %>% filter(fullname %in% .[duplicated(fullname), "fullname", drop = TRUE])
taxonomy <- taxonomy %>%
taxonomy <- taxonomy %>%
distinct(fullname, .keep_all = TRUE)
# This must not exist: