1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 18:41:58 +02:00

incorporate Bartlett et al (2022)

This commit is contained in:
2022-12-19 15:32:41 +01:00
parent 23fe427cbc
commit b1b7534c78
24 changed files with 36375 additions and 36304 deletions

View File

@ -168,29 +168,21 @@ MO_STREP_ABCG <- AMR_env$MO_lookup$mo[which(AMR_env$MO_lookup$genus == "Streptoc
))]
MO_FULLNAME_LOWER <- create_MO_fullname_lower()
MO_PREVALENT_GENERA <- c(
"Absidia", "Acanthamoeba", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
"Alternaria", "Amoeba", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
"Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
"Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
"Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
"Chlamydia", "Chlamydophila", "Christensenella", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
"Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
"Dermatobia", "Dientamoeba", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
"Empedobacter", "Entamoeba", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
"Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
"Heterophyes", "Histomonas", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Leishmania", "Lelliottia",
"Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus", "Meyerozyma",
"Microsporidium", "Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
"Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
"Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
"Phocanema", "Phoma", "Pichia", "Piedraia", "Pithomyces", "Pityrosporum", "Pneumocystis", "Porphyromonas", "Prevotella",
"Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
"Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
"Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
"Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
"Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
"Trichuris", "Tritirachium", "Trypanosoma", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
"Weeksella", "Wuchereria"
"Absidia", "Acanthamoeba", "Acremonium", "Aedes", "Alternaria", "Amoeba", "Ancylostoma", "Angiostrongylus",
"Anisakis", "Anopheles", "Apophysomyces", "Aspergillus", "Aureobasidium", "Basidiobolus", "Beauveria",
"Blastocystis", "Blastomyces", "Candida", "Capillaria", "Chaetomium", "Chrysonilia", "Cladophialophora",
"Cladosporium", "Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Demodex",
"Dermatobia", "Dientamoeba", "Diphyllobothrium", "Dirofilaria", "Echinostoma", "Entamoeba", "Enterobius",
"Exophiala", "Exserohilum", "Fasciola", "Fonsecaea", "Fusarium", "Giardia", "Haloarcula", "Halobacterium",
"Halococcus", "Hendersonula", "Heterophyes", "Histomonas", "Histoplasma", "Hymenolepis", "Hypomyces",
"Hysterothylacium", "Leishmania", "Malassezia", "Malbranchea", "Metagonimus", "Meyerozyma", "Microsporidium",
"Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Necator", "Nectria", "Ochroconis", "Oesophagostomum",
"Oidiodendron", "Opisthorchis", "Pediculus", "Phlebotomus", "Phoma", "Pichia", "Piedraia", "Pithomyces",
"Pityrosporum", "Pneumocystis", "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus",
"Rhodotorula", "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Spirometra",
"Sporobolomyces", "Stachybotrys", "Strongyloides", "Syngamus", "Taenia", "Toxocara", "Trichinella", "Trichobilharzia",
"Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus", "Trichuris", "Tritirachium",
"Trombicula", "Trypanosoma", "Tunga", "Wuchereria"
)
# antibiotic groups

View File

@ -69,9 +69,9 @@ genus_species is Moraxella catarrhalis ERY S AZM, CLR, RXT S Moraxella catarrhal
genus_species is Moraxella catarrhalis ERY I AZM, CLR, RXT I Moraxella catarrhalis Breakpoints 10
genus_species is Moraxella catarrhalis ERY R AZM, CLR, RXT R Moraxella catarrhalis Breakpoints 10
genus_species is Moraxella catarrhalis TCY S DOX, MNO S Moraxella catarrhalis Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN S AMP, AMX, PIP, TZP, TIC S Anaerobic Gram-positives Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN I AMP, AMX, PIP, TZP, TIC I Anaerobic Gram-positives Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN R AMP, AMX, PIP, TZP, TIC R Anaerobic Gram-positives Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN S AMP, AMX, PIP, TZP, TIC S Anaerobic Gram-positives Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN I AMP, AMX, PIP, TZP, TIC I Anaerobic Gram-positives Breakpoints 10
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN R AMP, AMX, PIP, TZP, TIC R Anaerobic Gram-positives Breakpoints 10
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN S AMP, AMX, PIP, TZP, TIC S Anaerobic Gram-negatives Breakpoints 10
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN I AMP, AMX, PIP, TZP, TIC I Anaerobic Gram-negatives Breakpoints 10
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN R AMP, AMX, PIP, TZP, TIC R Anaerobic Gram-negatives Breakpoints 10
@ -175,9 +175,9 @@ genus_species is Moraxella catarrhalis ERY S AZM, CLR, RXT S Moraxella catarrhal
genus_species is Moraxella catarrhalis ERY I AZM, CLR, RXT I Moraxella catarrhalis Breakpoints 11
genus_species is Moraxella catarrhalis ERY R AZM, CLR, RXT R Moraxella catarrhalis Breakpoints 11
genus_species is Moraxella catarrhalis TCY S DOX, MNO S Moraxella catarrhalis Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN S AMP, AMX, PIP, TIC S Anaerobic Gram-positives Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN I AMP, AMX, PIP, TIC I Anaerobic Gram-positives Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN R AMP, AMX, PIP, TIC R Anaerobic Gram-positives Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN S AMP, AMX, PIP, TIC S Anaerobic Gram-positives Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN I AMP, AMX, PIP, TIC I Anaerobic Gram-positives Breakpoints 11
genus one_of Actinomyces, Bifidobacterium, Clostridium, Clostridioides, Cutibacterium, Eggerthella, Eubacterium, Lactobacillus, Propionibacterium PEN R AMP, AMX, PIP, TIC R Anaerobic Gram-positives Breakpoints 11
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN S AMP, AMX, PIP, TIC S Anaerobic Gram-negatives Breakpoints 11
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN I AMP, AMX, PIP, TIC I Anaerobic Gram-negatives Breakpoints 11
genus one_of Bacteroides, Bilophila , Fusobacterium, Mobiluncus, Porphyromonas, Prevotella PEN R AMP, AMX, PIP, TIC R Anaerobic Gram-negatives Breakpoints 11

Can't render this file because it contains an unexpected character in line 5 and column 96.

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
6e6f44705995094be5eddc00e0878308
9e75112567e7786a6712024730056057

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -36,20 +36,29 @@
# 2. Go to https://lpsn.dsmz.de/downloads (register first) and download the latest
# CSV file (~12,5 MB) as "taxonomy.csv". Their API unfortunately does
# not include the full taxonomy and is currently (2022) pretty worthless.
# 3. Set this folder_location to the path where these two files are:
# 3. For data about human pathogens, we use Bartlett et al. (2022),
# https://doi.org/10.1099/mic.0.001269. Their latest supplementary material
# can be found here: https://github.com/padpadpadpad/bartlett_et_al_2022_human_pathogens.
#. Download their latest xlsx file in the `data` folder and save it to our
#. `data-raw` folder.
# 4. Set this folder_location to the path where these two files are:
folder_location <- "~/Downloads/backbone/"
file_gbif <- paste0(folder_location, "Taxon.tsv")
file_lpsn <- paste0(folder_location, "taxonomy.csv")
file_bartlett <- "data-raw/bartlett_et_al_2022_human_pathogens.xlsx"
# 4. Run the rest of this script line by line and check everything :)
if (!file.exists(file_gbif)) stop("GBIF file not found")
if (!file.exists(file_lpsn)) stop("LPSN file not found")
if (!file.exists(file_bartlett)) stop("Bartlett et al. Excel file not found")
library(dplyr)
library(vroom) # to import files
library(rvest) # to scape LPSN website
library(progress) # to show progress bars
library(readxl) # for reading the Bartlett Excel file
devtools::load_all(".") # load AMR package
# Helper functions --------------------------------------------------------
@ -776,32 +785,6 @@ taxonomy$gbif_parent[taxonomy$rank == "subspecies" & !is.na(taxonomy$gbif)] <- t
all(taxonomy$lpsn_parent %in% taxonomy$lpsn)
all(taxonomy$gbif_parent %in% taxonomy$gbif)
# Add prevalence ----------------------------------------------------------
# update prevalence based on taxonomy (our own JSS paper: Berends MS et al. (2022), DOI 10.18637/jss.v104.i03)
taxonomy <- taxonomy %>%
mutate(prevalence = case_when(
class == "Gammaproteobacteria" |
genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
~ 1,
kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") &
(phylum %in% c(
"Sarcomastigophora",
"Firmicutes", # old, now Bacillota
"Bacillota",
"Proteobacteria", # old, now Pseudomonadota
"Pseudomonadota",
"Actinobacteria", # old, now Actinomycetota
"Actinomycetota"
) |
genus %in% AMR:::MO_PREVALENT_GENERA)
~ 2,
TRUE ~ 3
))
table(taxonomy$prevalence, useNA = "always")
# (a lot will be removed further below)
# fix rank
taxonomy <- taxonomy %>%
mutate(rank = case_when(
@ -817,6 +800,71 @@ taxonomy <- taxonomy %>%
))
# Add prevalence ----------------------------------------------------------
pathogens <- read_excel(file_bartlett, sheet = "Tab 6 Full List")
# get all established, both old and current taxonomic names
established <- pathogens %>%
filter(status == "established") %>%
mutate(fullname = paste(genus, species)) %>%
pull(fullname) %>%
c(unlist(mo_current(.)),
unlist(mo_synonyms(., keep_synonyms = FALSE))) %>%
strsplit(" ", fixed = TRUE) %>%
sapply(function(x) ifelse(length(x) == 1, x, paste(x[1], x[2]))) %>%
sort() %>%
unique()
# get all putative, both old and current taxonomic names
putative <- pathogens %>%
filter(status == "putative") %>%
mutate(fullname = paste(genus, species)) %>%
pull(fullname) %>%
c(unlist(mo_current(.)),
unlist(mo_synonyms(., keep_synonyms = FALSE))) %>%
strsplit(" ", fixed = TRUE) %>%
sapply(function(x) ifelse(length(x) == 1, x, paste(x[1], x[2]))) %>%
sort() %>%
unique()
established <- established[established %unlike% "unknown"]
putative <- putative[putative %unlike% "unknown"]
other_bacterial_genera <- c(established, putative) %>%
strsplit(" ", fixed = TRUE) %>%
sapply(function(x) x[1]) %>%
sort() %>%
unique()
other_genera <- AMR:::MO_PREVALENT_GENERA %>%
c(unlist(mo_current(.)),
unlist(mo_synonyms(., keep_synonyms = FALSE))) %>%
strsplit(" ", fixed = TRUE) %>%
sapply(function(x) x[1]) %>%
sort() %>%
unique()
other_genera <- other_genera[other_genera %unlike% "unknown"]
# update prevalence based on taxonomy (following the recent and thorough work of Bartlett et al., 2022)
# see https://doi.org/10.1099/mic.0.001269
taxonomy <- taxonomy %>%
mutate(prevalence = case_when(
# 'established' gets a 1 and means 'have infected at least three persons in three or more references'
paste(genus, species) %in% established & rank %in% c("genus", "species", "subspecies") ~ 1.0,
# 'putative' gets a 2 and means 'fewer than three known cases'
paste(genus, species) %in% putative & rank %in% c("genus", "species", "subspecies") ~ 2.0,
# other species from a genus in either group get a 2.5
genus %in% other_bacterial_genera & rank %in% c("genus", "species", "subspecies") ~ 2.5,
# we keep track of prevalent genera too of non-bacterial species
genus %in% AMR:::MO_PREVALENT_GENERA & kingdom != "Bacteria" & rank %in% c("genus", "species", "subspecies") ~ 2.5,
# all others get a 3
TRUE ~ 3.0))
table(taxonomy$prevalence, useNA = "always")
# (a lot will be removed further below)
# Save intermediate results (2) -------------------------------------------
saveRDS(taxonomy, "data-raw/taxonomy2.rds")