1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-12 21:41:55 +02:00

add oxygen tolerance

This commit is contained in:
2023-05-11 21:56:27 +02:00
parent bf08d136a0
commit 91fa73dedf
28 changed files with 52310 additions and 52203 deletions

View File

@ -1342,11 +1342,13 @@ bacdive <- vroom::vroom("data-raw/bacdive.csv", skip = 2) %>%
bacdive <- bacdive %>%
# fill in missing species from previous rows
mutate(species = ifelse(is.na(species), lag(species), species)) %>%
filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant")
filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant", species %unlike% "unclassified") %>%
mutate(mo = as.mo(species, keep_synonyms = FALSE))
bacdive <- bacdive %>%
# now determine type per species
group_by(species) %>%
summarise(oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
group_by(mo) %>%
summarise(species = first(species),
oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
all(oxygen == "microaerophile") ~ "microaerophile",
all(oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "anaerobe",
all(oxygen %in% c("anaerobe", "obligate anaerobe", "microaerophile")) ~ "anaerobe/microaerophile",
@ -1354,10 +1356,25 @@ bacdive <- bacdive %>%
all(!oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "aerobe",
all(c("aerobe", "anaerobe") %in% oxygen) ~ "facultative anaerobe",
TRUE ~ NA_character_))
# now find all synonyms and copy them from their current taxonomic names
synonyms <- as.mo(unique(unlist(mo_synonyms(bacdive$mo, keep_synonyms = TRUE))),
keep_synonyms = TRUE)
syns <- tibble(species = synonyms,
mo = synonyms %>% mo_current() %>% as.mo()) %>%
filter(species != mo) %>%
mutate(species = mo_name(species, keep_synonyms = TRUE)) %>%
left_join(bacdive %>% select(mo, oxygen_tolerance)) %>%
# set mo to mo of the synonym
mutate(mo = as.mo(species, keep_synonyms = TRUE)) %>%
select(all_of(colnames(bacdive)))
bacdive <- bacdive %>%
bind_rows(syns) %>%
distinct()
bacdive_genus <- bacdive %>%
mutate(genus = gsub("^([A-Za-z]+) .*", "\\1", species), oxygen = oxygen_tolerance) %>%
group_by(species = genus) %>%
mutate(oxygen = oxygen_tolerance) %>%
group_by(species = mo_genus(mo)) %>%
summarise(oxygen_tolerance = case_when(any(oxygen == "facultative anaerobe") ~ "facultative anaerobe",
any(oxygen == "anaerobe/microaerophile") ~ "anaerobe/microaerophile",
all(oxygen == "microaerophile") ~ "microaerophile",
@ -1369,7 +1386,7 @@ bacdive <- bacdive %>%
filter(species %unlike% " sp[.]") %>%
bind_rows(bacdive_genus) %>%
arrange(species) %>%
mutate(mo = as.mo(species, keep_synonyms = FALSE))
mutate(mo = as.mo(species, keep_synonyms = TRUE))
other_species <- microorganisms %>%
filter(kingdom == "Bacteria", rank == "species", !mo %in% bacdive$mo, genus %in% bacdive$species) %>%
@ -1378,22 +1395,20 @@ other_species <- microorganisms %>%
mutate(oxygen_tolerance = ifelse(oxygen_tolerance %in% c("aerobe", "anaerobe", "microaerophile", "anaerobe/microaerophile"),
oxygen_tolerance,
paste("likely", oxygen_tolerance))) %>%
select(species, oxygen_tolerance, mo = mo2)
select(species, oxygen_tolerance, mo = mo2) %>%
distinct(species, .keep_all = TRUE)
bacdive <- bacdive %>%
bind_rows(other_species) %>%
arrange(species)
arrange(species) %>%
distinct(mo, .keep_all = TRUE) %>%
select(-species)
taxonomy <- taxonomy %>%
left_join(
bacdive %>%
select(-species),
by = "mo") %>%
left_join(bacdive, by = "mo") %>%
relocate(oxygen_tolerance, .after = ref)
# TODO look up synonyms and fill them in as well
# Clean data set ----------------------------------------------------------
# format to tibble and check again for invalid characters