1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-08 07:51:57 +02:00

(v0.7.1.9024) eucast_rules() fix, new MOs

This commit is contained in:
2019-08-06 14:39:22 +02:00
parent 85b62aaf8f
commit 3a1f960f89
23 changed files with 252 additions and 411 deletions

View File

@ -1,4 +1,14 @@
if_mo_property like_is_one_of this_value and_these_antibiotics have_these_values then_change_these_antibiotics to_value reference.rule reference.rule_group
# ---------------------------------------------------------------------------------------------------
# For editing this EUCAST reference file, these values can all be used for target antibiotics:
# all_betalactams, aminoglycosides, carbapenems, cephalosporins, cephalosporins_without_CAZ, fluoroquinolones,
# glycopeptides, macrolides, minopenicillins, polymyxins, streptogramins, tetracyclines, ureidopenicillins
# and all separate EARS-Net letter codes like AMC. They can be separated by comma: 'AMC, fluoroquinolones'.
# The if_mo_property column can be any column name from the AMR::microorganisms data set, or "genus_species" or "gramstain".
# The like.is.one_of column must contain one of: like, is, one_of ('like' will read the first column as regular expression)
# The EUCAST guideline contains references to the 'Burkholderia cepacia complex'. All species in this group can be found in: LiPuma J, Curr Opin Pulm Med. 2005 Nov;11(6):528-33. (PMID 16217180).
# >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<<
# ---------------------------------------------------------------------------------------------------
if_mo_property like.is.one_of this_value and_these_antibiotics have_these_values then_change_these_antibiotics to_value reference.rule reference.rule_group
order is Enterobacteriales AMP S AMX S Enterobacteriales (Order) Breakpoints
order is Enterobacteriales AMP I AMX I Enterobacteriales (Order) Breakpoints
order is Enterobacteriales AMP R AMX R Enterobacteriales (Order) Breakpoints

Can't render this file because it contains an unexpected character in line 6 and column 94.

View File

@ -1,14 +1,16 @@
# EUCAST rules ----
# For editing the reference file, these values can all be used for target antibiotics:
# "aminoglycosides", "tetracyclines", "polymyxins", "macrolides", "glycopeptides",
# "streptogramins", "cephalosporins", "cephalosporins_without_CAZ", "carbapenems",
# "minopenicillins", "ureidopenicillins", "fluoroquinolones", "all_betalactams",
# and all separate EARS-Net letter codes like "AMC". They can be separated by comma: "AMC, fluoroquinolones".
# The mo_property can be any column name from the AMR::microorganisms data set, or "genus_species" or "gramstain".
# The EUCAST guideline contains references to the 'Burkholderia cepacia complex'. The species in this group can be found in:
# LiPuma JJ, Curr Opin Pulm Med. 2005 Nov;11(6):528-33. (PMID 16217180).
# ---------------------------------------------------------------------------------------------------
# For editing this EUCAST reference file, these values can all be used for target antibiotics:
# all_betalactams, aminoglycosides, carbapenems, cephalosporins, cephalosporins_without_CAZ, fluoroquinolones,
# glycopeptides, macrolides, minopenicillins, polymyxins, streptogramins, tetracyclines, ureidopenicillins
# and all separate EARS-Net letter codes like AMC. They can be separated by comma: 'AMC, fluoroquinolones'.
# The if_mo_property column can be any column name from the AMR::microorganisms data set, or "genus_species" or "gramstain".
# The EUCAST guideline contains references to the 'Burkholderia cepacia complex'. All species in this group can be found in:
# LiPuma J, Curr Opin Pulm Med. 2005 Nov;11(6):528-33. (PMID 16217180).
# >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<<
# ---------------------------------------------------------------------------------------------------
eucast_rules_file <- dplyr::arrange(
.data = utils::read.delim(file = "data-raw/eucast_rules.tsv",
skip = 10,
sep = "\t",
stringsAsFactors = FALSE,
header = TRUE,

View File

@ -99,7 +99,7 @@ MOs <- data_total %>%
# and not all fungi: Aspergillus, Candida, Trichphyton and Pneumocystis are the most important,
# so only keep these orders from the fungi:
& !(kingdom == "Fungi"
& !order %in% c("Eurotiales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"))
& !order %in% c("Eurotiales", "Mucorales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales", "Onygenales", "Pneumocystales"))
)
# or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
| genus %in% c("Absidia", "Acremonium", "Actinotignum", "Alternaria", "Anaerosalibacter", "Ancylostoma", "Anisakis", "Apophysomyces",
@ -123,7 +123,7 @@ MOs <- MOs %>%
MOs <- MOs %>%
# remove text if it contains 'Not assigned' like phylum in viruses
mutate_all(~gsub("Not assigned", "", .))
mutate_all(~gsub("(Not assigned|\\[homonym\\]|\\[mistake\\])", "", ., ignore.case = TRUE))
MOs <- MOs %>%
# Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
@ -166,8 +166,10 @@ MOs <- MOs %>%
# Remove non-ASCII characters (these are not allowed by CRAN)
MOs <- MOs %>%
lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
as_tibble(stringsAsFactors = FALSE)
lapply(iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
as_tibble(stringsAsFactors = FALSE) %>%
# remove invalid characters
mutate_all(~gsub("[\"'`]+", "", .))
# Split old taxonomic names - they refer in the original data to a new `taxonID` with `acceptedNameUsageID`
MOs.old <- MOs %>%
@ -219,6 +221,9 @@ MOs <- MOs %>%
!(source == "DSMZ" & fullname %in% (MOs %>% filter(source == "CoL") %>% pull(fullname)))) %>%
distinct(fullname, .keep_all = TRUE)
# what characters are in the fullnames?
paste(unique(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = "")))), collapse = "")
# Add abbreviations so we can easily know which ones are which ones.
# These will become valid and unique microbial IDs for the AMR package.
MOs <- MOs %>%
@ -295,7 +300,6 @@ MOs <- MOs %>%
# put `mo` in front, followed by the rest
select(mo, everything(), -abbr_other, -abbr_genus, -abbr_species, -abbr_subspecies)
# add non-taxonomic entries
MOs <- MOs %>%
bind_rows(
@ -348,6 +352,38 @@ MOs <- MOs %>%
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
data.frame(mo = "F_YEAST",
col_id = NA_integer_,
fullname = "(unknown yeast)",
kingdom = "Fungi",
phylum = "(unknown phylum)",
class = "(unknown class)",
order = "(unknown order)",
family = "(unknown family)",
genus = "(unknown genus)",
species = "(unknown species)",
subspecies = "(unknown subspecies)",
rank = "species",
ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
data.frame(mo = "F_FUNGUS",
col_id = NA_integer_,
fullname = "(unknown fungus)",
kingdom = "Fungi",
phylum = "(unknown phylum)",
class = "(unknown class)",
order = "(unknown order)",
family = "(unknown family)",
genus = "(unknown genus)",
species = "(unknown species)",
subspecies = "(unknown subspecies)",
rank = "species",
ref = NA_character_,
species_id = "",
source = "manually added",
stringsAsFactors = FALSE),
# CoNS
MOs %>%
filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
@ -488,6 +524,11 @@ MOs <- MOs %>%
sum(duplicated(MOs$mo))
colnames(MOs)
# here we welcome the new ones:
MOs %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
# and the ones we lost:
AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View()
# set prevalence per species
MOs <- MOs %>%
mutate(prevalence = case_when(
@ -534,12 +575,16 @@ MOs.old$col_id <- as.integer(MOs.old$col_id)
MOs.old$col_id_new <- as.integer(MOs.old$col_id_new)
# save
### for other server
saveRDS(MOs, "microorganisms.rds")
saveRDS(MOs.old, "microorganisms.old.rds")
### for same server
microorganisms <- MOs
microorganisms.old <- MOs.old
# on the server, do:
usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
rm(microorganisms)
rm(microorganisms.old)
# and update the year in R/data.R
# and update the year and dimensions in R/data.R