mirror of
https://github.com/msberends/AMR.git
synced 2024-12-27 12:46:12 +01:00
fix coercing NA
to custom codes, fixes #107
This commit is contained in:
parent
9de19fdc49
commit
bf08d136a0
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 2.0.0.9013
|
Version: 2.0.0.9014
|
||||||
Date: 2023-04-21
|
Date: 2023-05-08
|
||||||
Title: Antimicrobial Resistance Data Analysis
|
Title: Antimicrobial Resistance Data Analysis
|
||||||
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
|
||||||
data analysis and to work with microbial and antimicrobial properties by
|
data analysis and to work with microbial and antimicrobial properties by
|
||||||
|
3
NEWS.md
3
NEWS.md
@ -1,10 +1,11 @@
|
|||||||
# AMR 2.0.0.9013
|
# AMR 2.0.0.9014
|
||||||
|
|
||||||
## Changed
|
## Changed
|
||||||
* formatting fix for `sir_interpretation_history()`
|
* formatting fix for `sir_interpretation_history()`
|
||||||
* Fixed some WHONET codes for microorganisms and consequently a couple of entries in `clinical_breakpoints`
|
* Fixed some WHONET codes for microorganisms and consequently a couple of entries in `clinical_breakpoints`
|
||||||
* Added microbial codes for Gram-negative/positive anaerobic bacteria
|
* Added microbial codes for Gram-negative/positive anaerobic bacteria
|
||||||
* `mo_rank()` now returns `NA` for 'unknown' microorganisms (`B_ANAER`, `B_ANAER-NEG`, `B_ANAER-POS`, `B_GRAMN`, `B_GRAMP`, `F_FUNGUS`, `F_YEAST`, and `UNKNOWN`)
|
* `mo_rank()` now returns `NA` for 'unknown' microorganisms (`B_ANAER`, `B_ANAER-NEG`, `B_ANAER-POS`, `B_GRAMN`, `B_GRAMP`, `F_FUNGUS`, `F_YEAST`, and `UNKNOWN`)
|
||||||
|
* Fixed a bug for `as.mo()` that led to coercion of `NA` values when using custom microorganism codes
|
||||||
|
|
||||||
|
|
||||||
# AMR 2.0.0
|
# AMR 2.0.0
|
||||||
|
8
R/mo.R
8
R/mo.R
@ -214,10 +214,10 @@ as.mo <- function(x,
|
|||||||
# From known codes ----
|
# From known codes ----
|
||||||
out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
out[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code] <- AMR::microorganisms.codes$mo[match(toupper(x)[is.na(out) & toupper(x) %in% AMR::microorganisms.codes$code], AMR::microorganisms.codes$code)]
|
||||||
# From SNOMED ----
|
# From SNOMED ----
|
||||||
if (any(is.na(out) & !is.na(x)) && any(is.na(out) & x %in% unlist(AMR_env$MO_lookup$snomed), na.rm = TRUE)) {
|
# based on this extremely fast gem: https://stackoverflow.com/a/11002456/4575331
|
||||||
# found this extremely fast gem here: https://stackoverflow.com/a/11002456/4575331
|
snomeds <- unlist(AMR_env$MO_lookup$snomed)
|
||||||
out[is.na(out) & x %in% unlist(AMR_env$MO_lookup$snomed)] <- AMR_env$MO_lookup$mo[rep(seq_along(AMR_env$MO_lookup$snomed), vapply(FUN.VALUE = double(1), AMR_env$MO_lookup$snomed, length))[match(x[is.na(out) & x %in% unlist(AMR_env$MO_lookup$snomed)], unlist(AMR_env$MO_lookup$snomed))]]
|
snomeds <- snomeds[!is.na(snomeds)]
|
||||||
}
|
out[is.na(out) & x %in% snomeds] <- AMR_env$MO_lookup$mo[rep(seq_along(AMR_env$MO_lookup$snomed), vapply(FUN.VALUE = double(1), AMR_env$MO_lookup$snomed, length))[match(x[is.na(out) & x %in% snomeds], snomeds)]]
|
||||||
# From other familiar output ----
|
# From other familiar output ----
|
||||||
# such as Salmonella groups, colloquial names, etc.
|
# such as Salmonella groups, colloquial names, etc.
|
||||||
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
|
out[is.na(out)] <- convert_colloquial_input(x[is.na(out)])
|
||||||
|
BIN
R/sysdata.rda
BIN
R/sysdata.rda
Binary file not shown.
@ -31,7 +31,7 @@
|
|||||||
# source("data-raw/_pre_commit_hook.R")
|
# source("data-raw/_pre_commit_hook.R")
|
||||||
|
|
||||||
library(dplyr, warn.conflicts = FALSE)
|
library(dplyr, warn.conflicts = FALSE)
|
||||||
try(detach("package:data.table", unload = TRUE), silent = TRUE)
|
try(detach("package:data.table", unload = TRUE), silent = TRUE) # to prevent like() to precede over AMR::like
|
||||||
devtools::load_all(quiet = TRUE)
|
devtools::load_all(quiet = TRUE)
|
||||||
|
|
||||||
suppressMessages(set_AMR_locale("English"))
|
suppressMessages(set_AMR_locale("English"))
|
||||||
@ -165,12 +165,12 @@ MO_PREVALENT_GENERA <- c(
|
|||||||
"Halococcus", "Hendersonula", "Heterophyes", "Histomonas", "Histoplasma", "Hymenolepis", "Hypomyces",
|
"Halococcus", "Hendersonula", "Heterophyes", "Histomonas", "Histoplasma", "Hymenolepis", "Hypomyces",
|
||||||
"Hysterothylacium", "Leishmania", "Malassezia", "Malbranchea", "Metagonimus", "Meyerozyma", "Microsporidium",
|
"Hysterothylacium", "Leishmania", "Malassezia", "Malbranchea", "Metagonimus", "Meyerozyma", "Microsporidium",
|
||||||
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Necator", "Nectria", "Ochroconis", "Oesophagostomum",
|
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Necator", "Nectria", "Ochroconis", "Oesophagostomum",
|
||||||
"Oidiodendron", "Opisthorchis", "Pediculus", "Phlebotomus", "Phoma", "Pichia", "Piedraia", "Pithomyces",
|
"Oidiodendron", "Opisthorchis", "Pediculus", "Penicillium", "Phlebotomus", "Phoma", "Pichia", "Piedraia", "Pithomyces",
|
||||||
"Pityrosporum", "Pneumocystis", "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus",
|
"Pityrosporum", "Pneumocystis", "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus",
|
||||||
"Rhodotorula", "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Spirometra",
|
"Rhodotorula", "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Spirometra",
|
||||||
"Sporobolomyces", "Stachybotrys", "Strongyloides", "Syngamus", "Taenia", "Toxocara", "Trichinella", "Trichobilharzia",
|
"Sporobolomyces", "Stachybotrys", "Strongyloides", "Syngamus", "Taenia", "Talaromyces", "Toxocara", "Trichinella",
|
||||||
"Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus", "Trichuris", "Tritirachium",
|
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus", "Trichuris",
|
||||||
"Trombicula", "Trypanosoma", "Tunga", "Wuchereria"
|
"Tritirachium", "Trombicula", "Trypanosoma", "Tunga", "Wuchereria"
|
||||||
)
|
)
|
||||||
|
|
||||||
# antibiotic groups
|
# antibiotic groups
|
||||||
|
19616
data-raw/bacdive.csv
Normal file
19616
data-raw/bacdive.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -147,23 +147,6 @@ df_remove_nonASCII <- function(df) {
|
|||||||
AMR:::dataset_UTF8_to_ASCII()
|
AMR:::dataset_UTF8_to_ASCII()
|
||||||
}
|
}
|
||||||
|
|
||||||
abbreviate_mo <- function(x, minlength = 5, prefix = "", hyphen_as_space = FALSE, ...) {
|
|
||||||
if (hyphen_as_space == TRUE) {
|
|
||||||
x <- gsub("-", " ", x, fixed = TRUE)
|
|
||||||
}
|
|
||||||
# keep a starting Latin ae
|
|
||||||
suppressWarnings(
|
|
||||||
gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
|
|
||||||
abbreviate(
|
|
||||||
minlength = minlength,
|
|
||||||
use.classes = TRUE,
|
|
||||||
method = "both.sides", ...
|
|
||||||
) %>%
|
|
||||||
paste0(prefix, .) %>%
|
|
||||||
toupper() %>%
|
|
||||||
gsub("(\u00C6|\u00E6)+", "AE", .)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# to retrieve LPSN and authors from LPSN website
|
# to retrieve LPSN and authors from LPSN website
|
||||||
get_lpsn_and_author <- function(rank, name) {
|
get_lpsn_and_author <- function(rank, name) {
|
||||||
@ -936,8 +919,8 @@ mo_phylum <- taxonomy %>%
|
|||||||
) %>%
|
) %>%
|
||||||
group_by(kingdom) %>%
|
group_by(kingdom) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_phylum8 = abbreviate_mo(phylum, minlength = 8, prefix = "[PHL]_"),
|
mo_phylum8 = AMR:::abbreviate_mo(phylum, minlength = 8, prefix = "[PHL]_"),
|
||||||
mo_phylum9 = abbreviate_mo(phylum, minlength = 9, prefix = "[PHL]_"),
|
mo_phylum9 = AMR:::abbreviate_mo(phylum, minlength = 9, prefix = "[PHL]_"),
|
||||||
mo_phylum = ifelse(!is.na(mo_old), mo_old, mo_phylum8),
|
mo_phylum = ifelse(!is.na(mo_old), mo_old, mo_phylum8),
|
||||||
mo_duplicated = duplicated(mo_phylum),
|
mo_duplicated = duplicated(mo_phylum),
|
||||||
mo_phylum = ifelse(mo_duplicated, mo_phylum9, mo_phylum),
|
mo_phylum = ifelse(mo_duplicated, mo_phylum9, mo_phylum),
|
||||||
@ -963,8 +946,8 @@ mo_class <- taxonomy %>%
|
|||||||
) %>%
|
) %>%
|
||||||
group_by(kingdom) %>%
|
group_by(kingdom) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_class8 = abbreviate_mo(class, minlength = 8, prefix = "[CLS]_"),
|
mo_class8 = AMR:::abbreviate_mo(class, minlength = 8, prefix = "[CLS]_"),
|
||||||
mo_class9 = abbreviate_mo(class, minlength = 9, prefix = "[CLS]_"),
|
mo_class9 = AMR:::abbreviate_mo(class, minlength = 9, prefix = "[CLS]_"),
|
||||||
mo_class = ifelse(!is.na(mo_old), mo_old, mo_class8),
|
mo_class = ifelse(!is.na(mo_old), mo_old, mo_class8),
|
||||||
mo_duplicated = duplicated(mo_class),
|
mo_duplicated = duplicated(mo_class),
|
||||||
mo_class = ifelse(mo_duplicated, mo_class9, mo_class),
|
mo_class = ifelse(mo_duplicated, mo_class9, mo_class),
|
||||||
@ -990,8 +973,8 @@ mo_order <- taxonomy %>%
|
|||||||
) %>%
|
) %>%
|
||||||
group_by(kingdom) %>%
|
group_by(kingdom) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_order8 = abbreviate_mo(order, minlength = 8, prefix = "[ORD]_"),
|
mo_order8 = AMR:::abbreviate_mo(order, minlength = 8, prefix = "[ORD]_"),
|
||||||
mo_order9 = abbreviate_mo(order, minlength = 9, prefix = "[ORD]_"),
|
mo_order9 = AMR:::abbreviate_mo(order, minlength = 9, prefix = "[ORD]_"),
|
||||||
mo_order = ifelse(!is.na(mo_old), mo_old, mo_order8),
|
mo_order = ifelse(!is.na(mo_old), mo_old, mo_order8),
|
||||||
mo_duplicated = duplicated(mo_order),
|
mo_duplicated = duplicated(mo_order),
|
||||||
mo_order = ifelse(mo_duplicated, mo_order9, mo_order),
|
mo_order = ifelse(mo_duplicated, mo_order9, mo_order),
|
||||||
@ -1017,8 +1000,8 @@ mo_family <- taxonomy %>%
|
|||||||
) %>%
|
) %>%
|
||||||
group_by(kingdom) %>%
|
group_by(kingdom) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_family8 = abbreviate_mo(family, minlength = 8, prefix = "[FAM]_"),
|
mo_family8 = AMR:::abbreviate_mo(family, minlength = 8, prefix = "[FAM]_"),
|
||||||
mo_family9 = abbreviate_mo(family, minlength = 9, prefix = "[FAM]_"),
|
mo_family9 = AMR:::abbreviate_mo(family, minlength = 9, prefix = "[FAM]_"),
|
||||||
mo_family = ifelse(!is.na(mo_old), mo_old, mo_family8),
|
mo_family = ifelse(!is.na(mo_old), mo_old, mo_family8),
|
||||||
mo_duplicated = duplicated(mo_family),
|
mo_duplicated = duplicated(mo_family),
|
||||||
mo_family = ifelse(mo_duplicated, mo_family9, mo_family),
|
mo_family = ifelse(mo_duplicated, mo_family9, mo_family),
|
||||||
@ -1046,11 +1029,11 @@ mo_genus <- taxonomy %>%
|
|||||||
group_by(kingdom) %>%
|
group_by(kingdom) %>%
|
||||||
# generate new MO codes for genus and set the right one
|
# generate new MO codes for genus and set the right one
|
||||||
mutate(
|
mutate(
|
||||||
mo_genus_new5 = abbreviate_mo(genus, 5),
|
mo_genus_new5 = AMR:::abbreviate_mo(genus, 5),
|
||||||
mo_genus_new5b = paste0(abbreviate_mo(genus, 5), 1),
|
mo_genus_new5b = paste0(AMR:::abbreviate_mo(genus, 5), 1),
|
||||||
mo_genus_new6 = abbreviate_mo(genus, 6),
|
mo_genus_new6 = AMR:::abbreviate_mo(genus, 6),
|
||||||
mo_genus_new7 = abbreviate_mo(genus, 7),
|
mo_genus_new7 = AMR:::abbreviate_mo(genus, 7),
|
||||||
mo_genus_new8 = abbreviate_mo(genus, 8),
|
mo_genus_new8 = AMR:::abbreviate_mo(genus, 8),
|
||||||
mo_genus_new = case_when(
|
mo_genus_new = case_when(
|
||||||
!is.na(mo_genus_old) ~ mo_genus_old,
|
!is.na(mo_genus_old) ~ mo_genus_old,
|
||||||
!mo_genus_new5 %in% mo_genus_old ~ mo_genus_new5,
|
!mo_genus_new5 %in% mo_genus_old ~ mo_genus_new5,
|
||||||
@ -1092,12 +1075,12 @@ mo_species <- taxonomy %>%
|
|||||||
distinct(kingdom, genus, species, .keep_all = TRUE) %>%
|
distinct(kingdom, genus, species, .keep_all = TRUE) %>%
|
||||||
group_by(kingdom, genus) %>%
|
group_by(kingdom, genus) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_species_new4 = abbreviate_mo(species, 4, hyphen_as_space = TRUE),
|
mo_species_new4 = AMR:::abbreviate_mo(species, 4, hyphen_as_space = TRUE),
|
||||||
mo_species_new5 = abbreviate_mo(species, 5, hyphen_as_space = TRUE),
|
mo_species_new5 = AMR:::abbreviate_mo(species, 5, hyphen_as_space = TRUE),
|
||||||
mo_species_new5b = paste0(abbreviate_mo(species, 5, hyphen_as_space = TRUE), 1),
|
mo_species_new5b = paste0(AMR:::abbreviate_mo(species, 5, hyphen_as_space = TRUE), 1),
|
||||||
mo_species_new6 = abbreviate_mo(species, 6, hyphen_as_space = TRUE),
|
mo_species_new6 = AMR:::abbreviate_mo(species, 6, hyphen_as_space = TRUE),
|
||||||
mo_species_new7 = abbreviate_mo(species, 7, hyphen_as_space = TRUE),
|
mo_species_new7 = AMR:::abbreviate_mo(species, 7, hyphen_as_space = TRUE),
|
||||||
mo_species_new8 = abbreviate_mo(species, 8, hyphen_as_space = TRUE),
|
mo_species_new8 = AMR:::abbreviate_mo(species, 8, hyphen_as_space = TRUE),
|
||||||
mo_species_new = case_when(
|
mo_species_new = case_when(
|
||||||
!is.na(mo_species_old) ~ mo_species_old,
|
!is.na(mo_species_old) ~ mo_species_old,
|
||||||
!mo_species_new4 %in% mo_species_old ~ mo_species_new4,
|
!mo_species_new4 %in% mo_species_old ~ mo_species_new4,
|
||||||
@ -1141,12 +1124,12 @@ mo_subspecies <- taxonomy %>%
|
|||||||
distinct(kingdom, genus, species, subspecies, .keep_all = TRUE) %>%
|
distinct(kingdom, genus, species, subspecies, .keep_all = TRUE) %>%
|
||||||
group_by(kingdom, genus, species) %>%
|
group_by(kingdom, genus, species) %>%
|
||||||
mutate(
|
mutate(
|
||||||
mo_subspecies_new4 = abbreviate_mo(subspecies, 4, hyphen_as_space = TRUE),
|
mo_subspecies_new4 = AMR:::abbreviate_mo(subspecies, 4, hyphen_as_space = TRUE),
|
||||||
mo_subspecies_new5 = abbreviate_mo(subspecies, 5, hyphen_as_space = TRUE),
|
mo_subspecies_new5 = AMR:::abbreviate_mo(subspecies, 5, hyphen_as_space = TRUE),
|
||||||
mo_subspecies_new5b = paste0(abbreviate_mo(subspecies, 5, hyphen_as_space = TRUE), 1),
|
mo_subspecies_new5b = paste0(AMR:::abbreviate_mo(subspecies, 5, hyphen_as_space = TRUE), 1),
|
||||||
mo_subspecies_new6 = abbreviate_mo(subspecies, 6, hyphen_as_space = TRUE),
|
mo_subspecies_new6 = AMR:::abbreviate_mo(subspecies, 6, hyphen_as_space = TRUE),
|
||||||
mo_subspecies_new7 = abbreviate_mo(subspecies, 7, hyphen_as_space = TRUE),
|
mo_subspecies_new7 = AMR:::abbreviate_mo(subspecies, 7, hyphen_as_space = TRUE),
|
||||||
mo_subspecies_new8 = abbreviate_mo(subspecies, 8, hyphen_as_space = TRUE),
|
mo_subspecies_new8 = AMR:::abbreviate_mo(subspecies, 8, hyphen_as_space = TRUE),
|
||||||
mo_subspecies_new = case_when(
|
mo_subspecies_new = case_when(
|
||||||
!is.na(mo_subspecies_old) ~ mo_subspecies_old,
|
!is.na(mo_subspecies_old) ~ mo_subspecies_old,
|
||||||
!mo_subspecies_new4 %in% mo_subspecies_old ~ mo_subspecies_new4,
|
!mo_subspecies_new4 %in% mo_subspecies_old ~ mo_subspecies_new4,
|
||||||
@ -1348,6 +1331,69 @@ taxonomy <- taxonomy %>%
|
|||||||
left_join(snomed, by = "fullname")
|
left_join(snomed, by = "fullname")
|
||||||
|
|
||||||
|
|
||||||
|
# Add oxygen tolerance (aerobe/anaerobe) ----------------------------------
|
||||||
|
|
||||||
|
# We will use the BacDive data base for this:
|
||||||
|
# - go to https://bacdive.dsmz.de/advsearch and filter 'Oxygen tolerance' on "*"
|
||||||
|
# - click on the 'Download tabel as CSV' button
|
||||||
|
#
|
||||||
|
bacdive <- vroom::vroom("data-raw/bacdive.csv", skip = 2) %>%
|
||||||
|
select(species, oxygen = `Oxygen tolerance`)
|
||||||
|
bacdive <- bacdive %>%
|
||||||
|
# fill in missing species from previous rows
|
||||||
|
mutate(species = ifelse(is.na(species), lag(species), species)) %>%
|
||||||
|
filter(!is.na(species), !is.na(oxygen), oxygen %unlike% "tolerant")
|
||||||
|
bacdive <- bacdive %>%
|
||||||
|
# now determine type per species
|
||||||
|
group_by(species) %>%
|
||||||
|
summarise(oxygen_tolerance = case_when(any(oxygen %like% "facultative") ~ "facultative anaerobe",
|
||||||
|
all(oxygen == "microaerophile") ~ "microaerophile",
|
||||||
|
all(oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "anaerobe",
|
||||||
|
all(oxygen %in% c("anaerobe", "obligate anaerobe", "microaerophile")) ~ "anaerobe/microaerophile",
|
||||||
|
all(oxygen %in% c("aerobe", "obligate aerobe")) ~ "aerobe",
|
||||||
|
all(!oxygen %in% c("anaerobe", "obligate anaerobe")) ~ "aerobe",
|
||||||
|
all(c("aerobe", "anaerobe") %in% oxygen) ~ "facultative anaerobe",
|
||||||
|
TRUE ~ NA_character_))
|
||||||
|
|
||||||
|
bacdive_genus <- bacdive %>%
|
||||||
|
mutate(genus = gsub("^([A-Za-z]+) .*", "\\1", species), oxygen = oxygen_tolerance) %>%
|
||||||
|
group_by(species = genus) %>%
|
||||||
|
summarise(oxygen_tolerance = case_when(any(oxygen == "facultative anaerobe") ~ "facultative anaerobe",
|
||||||
|
any(oxygen == "anaerobe/microaerophile") ~ "anaerobe/microaerophile",
|
||||||
|
all(oxygen == "microaerophile") ~ "microaerophile",
|
||||||
|
all(oxygen == "anaerobe") ~ "anaerobe",
|
||||||
|
all(oxygen == "aerobe") ~ "aerobe",
|
||||||
|
TRUE ~ "facultative anaerobe"))
|
||||||
|
|
||||||
|
bacdive <- bacdive %>%
|
||||||
|
filter(species %unlike% " sp[.]") %>%
|
||||||
|
bind_rows(bacdive_genus) %>%
|
||||||
|
arrange(species) %>%
|
||||||
|
mutate(mo = as.mo(species, keep_synonyms = FALSE))
|
||||||
|
|
||||||
|
other_species <- microorganisms %>%
|
||||||
|
filter(kingdom == "Bacteria", rank == "species", !mo %in% bacdive$mo, genus %in% bacdive$species) %>%
|
||||||
|
select(species = fullname, genus, mo2 = mo) %>%
|
||||||
|
left_join(bacdive, by = c("genus" = "species")) %>%
|
||||||
|
mutate(oxygen_tolerance = ifelse(oxygen_tolerance %in% c("aerobe", "anaerobe", "microaerophile", "anaerobe/microaerophile"),
|
||||||
|
oxygen_tolerance,
|
||||||
|
paste("likely", oxygen_tolerance))) %>%
|
||||||
|
select(species, oxygen_tolerance, mo = mo2)
|
||||||
|
|
||||||
|
bacdive <- bacdive %>%
|
||||||
|
bind_rows(other_species) %>%
|
||||||
|
arrange(species)
|
||||||
|
|
||||||
|
taxonomy <- taxonomy %>%
|
||||||
|
left_join(
|
||||||
|
bacdive %>%
|
||||||
|
select(-species),
|
||||||
|
by = "mo") %>%
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# TODO look up synonyms and fill them in as well
|
||||||
|
|
||||||
# Clean data set ----------------------------------------------------------
|
# Clean data set ----------------------------------------------------------
|
||||||
|
|
||||||
# format to tibble and check again for invalid characters
|
# format to tibble and check again for invalid characters
|
||||||
|
@ -160,7 +160,7 @@ Furthermore,
|
|||||||
\item Any genus present in the \strong{established} list also has \code{prevalence = 1.0} in the \link{microorganisms} data set;
|
\item Any genus present in the \strong{established} list also has \code{prevalence = 1.0} in the \link{microorganisms} data set;
|
||||||
\item Any other genus present in the \strong{putative} list has \code{prevalence = 1.25} in the \link{microorganisms} data set;
|
\item Any other genus present in the \strong{putative} list has \code{prevalence = 1.25} in the \link{microorganisms} data set;
|
||||||
\item Any other species or subspecies of which the genus is present in the two aforementioned groups, has \code{prevalence = 1.5} in the \link{microorganisms} data set;
|
\item Any other species or subspecies of which the genus is present in the two aforementioned groups, has \code{prevalence = 1.5} in the \link{microorganisms} data set;
|
||||||
\item Any \emph{non-bacterial} genus, species or subspecies of which the genus is present in the following list, has \code{prevalence = 1.5} in the \link{microorganisms} data set: \emph{Absidia}, \emph{Acanthamoeba}, \emph{Acremonium}, \emph{Aedes}, \emph{Alternaria}, \emph{Amoeba}, \emph{Ancylostoma}, \emph{Angiostrongylus}, \emph{Anisakis}, \emph{Anopheles}, \emph{Apophysomyces}, \emph{Aspergillus}, \emph{Aureobasidium}, \emph{Basidiobolus}, \emph{Beauveria}, \emph{Blastocystis}, \emph{Blastomyces}, \emph{Candida}, \emph{Capillaria}, \emph{Chaetomium}, \emph{Chrysonilia}, \emph{Cladophialophora}, \emph{Cladosporium}, \emph{Conidiobolus}, \emph{Contracaecum}, \emph{Cordylobia}, \emph{Cryptococcus}, \emph{Curvularia}, \emph{Demodex}, \emph{Dermatobia}, \emph{Dientamoeba}, \emph{Diphyllobothrium}, \emph{Dirofilaria}, \emph{Echinostoma}, \emph{Entamoeba}, \emph{Enterobius}, \emph{Exophiala}, \emph{Exserohilum}, \emph{Fasciola}, \emph{Fonsecaea}, \emph{Fusarium}, \emph{Giardia}, \emph{Haloarcula}, \emph{Halobacterium}, \emph{Halococcus}, \emph{Hendersonula}, \emph{Heterophyes}, \emph{Histomonas}, \emph{Histoplasma}, \emph{Hymenolepis}, \emph{Hypomyces}, \emph{Hysterothylacium}, \emph{Leishmania}, \emph{Malassezia}, \emph{Malbranchea}, \emph{Metagonimus}, \emph{Meyerozyma}, \emph{Microsporidium}, \emph{Microsporum}, \emph{Mortierella}, \emph{Mucor}, \emph{Mycocentrospora}, \emph{Necator}, \emph{Nectria}, \emph{Ochroconis}, \emph{Oesophagostomum}, \emph{Oidiodendron}, \emph{Opisthorchis}, \emph{Pediculus}, \emph{Phlebotomus}, \emph{Phoma}, \emph{Pichia}, \emph{Piedraia}, \emph{Pithomyces}, \emph{Pityrosporum}, \emph{Pneumocystis}, \emph{Pseudallescheria}, \emph{Pseudoterranova}, \emph{Pulex}, \emph{Rhizomucor}, \emph{Rhizopus}, \emph{Rhodotorula}, \emph{Saccharomyces}, \emph{Sarcoptes}, \emph{Scolecobasidium}, \emph{Scopulariopsis}, \emph{Scytalidium}, \emph{Spirometra}, \emph{Sporobolomyces}, \emph{Stachybotrys}, \emph{Strongyloides}, \emph{Syngamus}, \emph{Taenia}, \emph{Toxocara}, \emph{Trichinella}, \emph{Trichobilharzia}, \emph{Trichoderma}, \emph{Trichomonas}, \emph{Trichophyton}, \emph{Trichosporon}, \emph{Trichostrongylus}, \emph{Trichuris}, \emph{Tritirachium}, \emph{Trombicula}, \emph{Trypanosoma}, \emph{Tunga}, or \emph{Wuchereria};
|
\item Any \emph{non-bacterial} genus, species or subspecies of which the genus is present in the following list, has \code{prevalence = 1.5} in the \link{microorganisms} data set: \emph{Absidia}, \emph{Acanthamoeba}, \emph{Acremonium}, \emph{Aedes}, \emph{Alternaria}, \emph{Amoeba}, \emph{Ancylostoma}, \emph{Angiostrongylus}, \emph{Anisakis}, \emph{Anopheles}, \emph{Apophysomyces}, \emph{Aspergillus}, \emph{Aureobasidium}, \emph{Basidiobolus}, \emph{Beauveria}, \emph{Blastocystis}, \emph{Blastomyces}, \emph{Candida}, \emph{Capillaria}, \emph{Chaetomium}, \emph{Chrysonilia}, \emph{Cladophialophora}, \emph{Cladosporium}, \emph{Conidiobolus}, \emph{Contracaecum}, \emph{Cordylobia}, \emph{Cryptococcus}, \emph{Curvularia}, \emph{Demodex}, \emph{Dermatobia}, \emph{Dientamoeba}, \emph{Diphyllobothrium}, \emph{Dirofilaria}, \emph{Echinostoma}, \emph{Entamoeba}, \emph{Enterobius}, \emph{Exophiala}, \emph{Exserohilum}, \emph{Fasciola}, \emph{Fonsecaea}, \emph{Fusarium}, \emph{Giardia}, \emph{Haloarcula}, \emph{Halobacterium}, \emph{Halococcus}, \emph{Hendersonula}, \emph{Heterophyes}, \emph{Histomonas}, \emph{Histoplasma}, \emph{Hymenolepis}, \emph{Hypomyces}, \emph{Hysterothylacium}, \emph{Leishmania}, \emph{Malassezia}, \emph{Malbranchea}, \emph{Metagonimus}, \emph{Meyerozyma}, \emph{Microsporidium}, \emph{Microsporum}, \emph{Mortierella}, \emph{Mucor}, \emph{Mycocentrospora}, \emph{Necator}, \emph{Nectria}, \emph{Ochroconis}, \emph{Oesophagostomum}, \emph{Oidiodendron}, \emph{Opisthorchis}, \emph{Pediculus}, \emph{Penicillium}, \emph{Phlebotomus}, \emph{Phoma}, \emph{Pichia}, \emph{Piedraia}, \emph{Pithomyces}, \emph{Pityrosporum}, \emph{Pneumocystis}, \emph{Pseudallescheria}, \emph{Pseudoterranova}, \emph{Pulex}, \emph{Rhizomucor}, \emph{Rhizopus}, \emph{Rhodotorula}, \emph{Saccharomyces}, \emph{Sarcoptes}, \emph{Scolecobasidium}, \emph{Scopulariopsis}, \emph{Scytalidium}, \emph{Spirometra}, \emph{Sporobolomyces}, \emph{Stachybotrys}, \emph{Strongyloides}, \emph{Syngamus}, \emph{Taenia}, \emph{Talaromyces}, \emph{Toxocara}, \emph{Trichinella}, \emph{Trichobilharzia}, \emph{Trichoderma}, \emph{Trichomonas}, \emph{Trichophyton}, \emph{Trichosporon}, \emph{Trichostrongylus}, \emph{Trichuris}, \emph{Tritirachium}, \emph{Trombicula}, \emph{Trypanosoma}, \emph{Tunga}, or \emph{Wuchereria};
|
||||||
\item All other records have \code{prevalence = 2.0} in the \link{microorganisms} data set.
|
\item All other records have \code{prevalence = 2.0} in the \link{microorganisms} data set.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ Furthermore,
|
|||||||
\item Any genus present in the \strong{established} list also has \code{prevalence = 1.0} in the \link{microorganisms} data set;
|
\item Any genus present in the \strong{established} list also has \code{prevalence = 1.0} in the \link{microorganisms} data set;
|
||||||
\item Any other genus present in the \strong{putative} list has \code{prevalence = 1.25} in the \link{microorganisms} data set;
|
\item Any other genus present in the \strong{putative} list has \code{prevalence = 1.25} in the \link{microorganisms} data set;
|
||||||
\item Any other species or subspecies of which the genus is present in the two aforementioned groups, has \code{prevalence = 1.5} in the \link{microorganisms} data set;
|
\item Any other species or subspecies of which the genus is present in the two aforementioned groups, has \code{prevalence = 1.5} in the \link{microorganisms} data set;
|
||||||
\item Any \emph{non-bacterial} genus, species or subspecies of which the genus is present in the following list, has \code{prevalence = 1.5} in the \link{microorganisms} data set: \emph{Absidia}, \emph{Acanthamoeba}, \emph{Acremonium}, \emph{Aedes}, \emph{Alternaria}, \emph{Amoeba}, \emph{Ancylostoma}, \emph{Angiostrongylus}, \emph{Anisakis}, \emph{Anopheles}, \emph{Apophysomyces}, \emph{Aspergillus}, \emph{Aureobasidium}, \emph{Basidiobolus}, \emph{Beauveria}, \emph{Blastocystis}, \emph{Blastomyces}, \emph{Candida}, \emph{Capillaria}, \emph{Chaetomium}, \emph{Chrysonilia}, \emph{Cladophialophora}, \emph{Cladosporium}, \emph{Conidiobolus}, \emph{Contracaecum}, \emph{Cordylobia}, \emph{Cryptococcus}, \emph{Curvularia}, \emph{Demodex}, \emph{Dermatobia}, \emph{Dientamoeba}, \emph{Diphyllobothrium}, \emph{Dirofilaria}, \emph{Echinostoma}, \emph{Entamoeba}, \emph{Enterobius}, \emph{Exophiala}, \emph{Exserohilum}, \emph{Fasciola}, \emph{Fonsecaea}, \emph{Fusarium}, \emph{Giardia}, \emph{Haloarcula}, \emph{Halobacterium}, \emph{Halococcus}, \emph{Hendersonula}, \emph{Heterophyes}, \emph{Histomonas}, \emph{Histoplasma}, \emph{Hymenolepis}, \emph{Hypomyces}, \emph{Hysterothylacium}, \emph{Leishmania}, \emph{Malassezia}, \emph{Malbranchea}, \emph{Metagonimus}, \emph{Meyerozyma}, \emph{Microsporidium}, \emph{Microsporum}, \emph{Mortierella}, \emph{Mucor}, \emph{Mycocentrospora}, \emph{Necator}, \emph{Nectria}, \emph{Ochroconis}, \emph{Oesophagostomum}, \emph{Oidiodendron}, \emph{Opisthorchis}, \emph{Pediculus}, \emph{Phlebotomus}, \emph{Phoma}, \emph{Pichia}, \emph{Piedraia}, \emph{Pithomyces}, \emph{Pityrosporum}, \emph{Pneumocystis}, \emph{Pseudallescheria}, \emph{Pseudoterranova}, \emph{Pulex}, \emph{Rhizomucor}, \emph{Rhizopus}, \emph{Rhodotorula}, \emph{Saccharomyces}, \emph{Sarcoptes}, \emph{Scolecobasidium}, \emph{Scopulariopsis}, \emph{Scytalidium}, \emph{Spirometra}, \emph{Sporobolomyces}, \emph{Stachybotrys}, \emph{Strongyloides}, \emph{Syngamus}, \emph{Taenia}, \emph{Toxocara}, \emph{Trichinella}, \emph{Trichobilharzia}, \emph{Trichoderma}, \emph{Trichomonas}, \emph{Trichophyton}, \emph{Trichosporon}, \emph{Trichostrongylus}, \emph{Trichuris}, \emph{Tritirachium}, \emph{Trombicula}, \emph{Trypanosoma}, \emph{Tunga}, or \emph{Wuchereria};
|
\item Any \emph{non-bacterial} genus, species or subspecies of which the genus is present in the following list, has \code{prevalence = 1.5} in the \link{microorganisms} data set: \emph{Absidia}, \emph{Acanthamoeba}, \emph{Acremonium}, \emph{Aedes}, \emph{Alternaria}, \emph{Amoeba}, \emph{Ancylostoma}, \emph{Angiostrongylus}, \emph{Anisakis}, \emph{Anopheles}, \emph{Apophysomyces}, \emph{Aspergillus}, \emph{Aureobasidium}, \emph{Basidiobolus}, \emph{Beauveria}, \emph{Blastocystis}, \emph{Blastomyces}, \emph{Candida}, \emph{Capillaria}, \emph{Chaetomium}, \emph{Chrysonilia}, \emph{Cladophialophora}, \emph{Cladosporium}, \emph{Conidiobolus}, \emph{Contracaecum}, \emph{Cordylobia}, \emph{Cryptococcus}, \emph{Curvularia}, \emph{Demodex}, \emph{Dermatobia}, \emph{Dientamoeba}, \emph{Diphyllobothrium}, \emph{Dirofilaria}, \emph{Echinostoma}, \emph{Entamoeba}, \emph{Enterobius}, \emph{Exophiala}, \emph{Exserohilum}, \emph{Fasciola}, \emph{Fonsecaea}, \emph{Fusarium}, \emph{Giardia}, \emph{Haloarcula}, \emph{Halobacterium}, \emph{Halococcus}, \emph{Hendersonula}, \emph{Heterophyes}, \emph{Histomonas}, \emph{Histoplasma}, \emph{Hymenolepis}, \emph{Hypomyces}, \emph{Hysterothylacium}, \emph{Leishmania}, \emph{Malassezia}, \emph{Malbranchea}, \emph{Metagonimus}, \emph{Meyerozyma}, \emph{Microsporidium}, \emph{Microsporum}, \emph{Mortierella}, \emph{Mucor}, \emph{Mycocentrospora}, \emph{Necator}, \emph{Nectria}, \emph{Ochroconis}, \emph{Oesophagostomum}, \emph{Oidiodendron}, \emph{Opisthorchis}, \emph{Pediculus}, \emph{Penicillium}, \emph{Phlebotomus}, \emph{Phoma}, \emph{Pichia}, \emph{Piedraia}, \emph{Pithomyces}, \emph{Pityrosporum}, \emph{Pneumocystis}, \emph{Pseudallescheria}, \emph{Pseudoterranova}, \emph{Pulex}, \emph{Rhizomucor}, \emph{Rhizopus}, \emph{Rhodotorula}, \emph{Saccharomyces}, \emph{Sarcoptes}, \emph{Scolecobasidium}, \emph{Scopulariopsis}, \emph{Scytalidium}, \emph{Spirometra}, \emph{Sporobolomyces}, \emph{Stachybotrys}, \emph{Strongyloides}, \emph{Syngamus}, \emph{Taenia}, \emph{Talaromyces}, \emph{Toxocara}, \emph{Trichinella}, \emph{Trichobilharzia}, \emph{Trichoderma}, \emph{Trichomonas}, \emph{Trichophyton}, \emph{Trichosporon}, \emph{Trichostrongylus}, \emph{Trichuris}, \emph{Tritirachium}, \emph{Trombicula}, \emph{Trypanosoma}, \emph{Tunga}, or \emph{Wuchereria};
|
||||||
\item All other records have \code{prevalence = 2.0} in the \link{microorganisms} data set.
|
\item All other records have \code{prevalence = 2.0} in the \link{microorganisms} data set.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user