2021-12-12 11:36:58 +01:00
|
|
|
|
# ==================================================================== #
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# TITLE: #
|
2022-10-05 09:12:22 +02:00
|
|
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# #
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# SOURCE CODE: #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# https://github.com/msberends/AMR #
|
|
|
|
|
# #
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# PLEASE CITE THIS SOFTWARE AS: #
|
2022-10-05 09:12:22 +02:00
|
|
|
|
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
|
|
|
|
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
|
|
|
|
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
2023-05-27 10:39:22 +02:00
|
|
|
|
# https://doi.org/10.18637/jss.v104.i03 #
|
2022-10-05 09:12:22 +02:00
|
|
|
|
# #
|
2022-12-27 15:16:15 +01:00
|
|
|
|
# Developed at the University of Groningen and the University Medical #
|
|
|
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
|
|
|
# colleagues from around the world, see our website. #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# #
|
|
|
|
|
# This R package is free software; you can freely use and distribute #
|
|
|
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
|
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
|
|
|
# the Free Software Foundation. #
|
|
|
|
|
# We created this package for both routine data analysis and academic #
|
|
|
|
|
# research and it was publicly released in the hope that it will be #
|
|
|
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
|
|
|
# #
|
|
|
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
|
|
|
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
|
|
|
|
# ==================================================================== #
|
|
|
|
|
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# This script runs in 20-30 minutes and renews all guidelines of CLSI and EUCAST!
|
2023-01-21 23:47:20 +01:00
|
|
|
|
# Run it with source("data-raw/reproduction_of_clinical_breakpoints.R")
|
2021-12-13 10:18:28 +01:00
|
|
|
|
|
2019-05-10 16:44:59 +02:00
|
|
|
|
library(dplyr)
|
2020-07-29 10:33:47 +02:00
|
|
|
|
library(readr)
|
|
|
|
|
library(tidyr)
|
2023-04-19 00:31:31 +02:00
|
|
|
|
devtools::load_all()
|
2021-12-13 10:18:28 +01:00
|
|
|
|
|
2023-07-08 17:30:05 +02:00
|
|
|
|
# Install the WHONET software on Windows (http://www.whonet.org/software.html),
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
|
2022-10-29 14:15:23 +02:00
|
|
|
|
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-07-08 17:30:05 +02:00
|
|
|
|
# BE SURE TO RUN data-raw/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!
|
|
|
|
|
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# READ DATA ----
|
2023-04-14 23:14:34 +02:00
|
|
|
|
|
2022-10-30 14:31:45 +01:00
|
|
|
|
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# remove old taxonomic names
|
2022-10-30 14:31:45 +01:00
|
|
|
|
filter(TAXONOMIC_STATUS == "C") %>%
|
2023-06-26 13:52:02 +02:00
|
|
|
|
mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
|
|
|
|
|
|
|
|
|
|
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
|
|
|
|
|
show_col_types = FALSE, guess_max = Inf) %>%
|
|
|
|
|
filter(GUIDELINES %in% c("CLSI", "EUCAST"))
|
|
|
|
|
|
|
|
|
|
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
|
|
|
|
|
arrange(WHONET_ABX_CODE) %>%
|
|
|
|
|
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
|
|
|
|
|
|
|
|
|
|
# MICROORGANISMS WHONET CODES ----
|
|
|
|
|
|
|
|
|
|
whonet_organisms <- whonet_organisms %>%
|
|
|
|
|
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
|
2022-10-30 14:31:45 +01:00
|
|
|
|
mutate(
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# this one was called Issatchenkia orientalis, but it should be:
|
2023-04-14 23:14:34 +02:00
|
|
|
|
ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
|
2023-06-26 13:52:02 +02:00
|
|
|
|
) %>%
|
|
|
|
|
# try to match on GBIF identifier
|
|
|
|
|
left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>%
|
|
|
|
|
# remove duplicates
|
|
|
|
|
arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
|
|
|
|
|
distinct(ORGANISM_CODE, .keep_all = TRUE) %>%
|
|
|
|
|
# add Enterobacterales, which is a subkingdom code in their data
|
|
|
|
|
bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>%
|
|
|
|
|
arrange(ORGANISM)
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-06-26 13:52:02 +02:00
|
|
|
|
|
|
|
|
|
## Add new WHO codes to microorganisms.codes ----
|
|
|
|
|
|
|
|
|
|
matched <- whonet_organisms %>% filter(!is.na(mo))
|
|
|
|
|
unmatched <- whonet_organisms %>% filter(is.na(mo))
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# generate the mo codes and add their names
|
2023-06-26 13:52:02 +02:00
|
|
|
|
message("Getting MO codes for WHONET input...")
|
|
|
|
|
unmatched <- unmatched %>%
|
2023-07-08 17:30:05 +02:00
|
|
|
|
mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
|
|
|
|
|
minimum_matching_score = 0.55,
|
2023-04-14 11:12:26 +02:00
|
|
|
|
keep_synonyms = TRUE,
|
|
|
|
|
language = "en"),
|
2023-04-14 23:14:34 +02:00
|
|
|
|
mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"),
|
|
|
|
|
ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"),
|
|
|
|
|
ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"),
|
|
|
|
|
TRUE ~ mo),
|
2023-04-14 11:12:26 +02:00
|
|
|
|
mo_name = mo_name(mo,
|
|
|
|
|
keep_synonyms = TRUE,
|
|
|
|
|
language = "en"))
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# check if coercion at least resembles the first part (genus)
|
2023-06-26 13:52:02 +02:00
|
|
|
|
unmatched <- unmatched %>%
|
2023-04-14 11:12:26 +02:00
|
|
|
|
mutate(
|
|
|
|
|
first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
|
2023-06-22 15:10:59 +02:00
|
|
|
|
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>%
|
2023-07-08 17:30:05 +02:00
|
|
|
|
arrange(keep)
|
|
|
|
|
unmatched %>%
|
|
|
|
|
View()
|
|
|
|
|
unmatched <- unmatched %>%
|
2023-06-26 13:52:02 +02:00
|
|
|
|
filter(keep == TRUE)
|
|
|
|
|
|
|
|
|
|
organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>%
|
|
|
|
|
bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>%
|
|
|
|
|
mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>%
|
|
|
|
|
arrange(code)
|
|
|
|
|
|
|
|
|
|
# some subspecies exist, while their upper species do not, add them as the species level:
|
|
|
|
|
subspp <- organisms %>%
|
|
|
|
|
filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
|
|
|
|
|
mo_species(mo, keep_synonyms = TRUE) != "" &
|
|
|
|
|
mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>%
|
|
|
|
|
mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
|
|
|
|
|
mo_species(mo, keep_synonyms = TRUE)),
|
|
|
|
|
keep_synonyms = TRUE),
|
|
|
|
|
name = mo_name(mo, keep_synonyms = TRUE))
|
|
|
|
|
organisms <- organisms %>%
|
|
|
|
|
filter(!code %in% subspp$code) %>%
|
|
|
|
|
bind_rows(subspp) %>%
|
|
|
|
|
arrange(code)
|
|
|
|
|
|
2023-07-08 17:30:05 +02:00
|
|
|
|
# add the groups
|
|
|
|
|
organisms <- organisms %>%
|
|
|
|
|
bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
|
|
|
|
|
group = NA,
|
|
|
|
|
mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
|
|
|
|
|
name = mo_name(mo, keep_synonyms = TRUE))) %>%
|
|
|
|
|
arrange(code, group) %>%
|
|
|
|
|
select(-group) %>%
|
|
|
|
|
distinct()
|
|
|
|
|
|
2023-07-10 13:41:52 +02:00
|
|
|
|
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
|
|
|
|
|
organisms <- organisms %>%
|
|
|
|
|
filter(!(code == "SGM" & name %like% "Streptococcus"))
|
|
|
|
|
# this must be empty:
|
|
|
|
|
organisms$code[organisms$code %>% duplicated()]
|
2023-07-08 17:30:05 +02:00
|
|
|
|
|
|
|
|
|
saveRDS(organisms, "data-raw/organisms.rds", version = 2)
|
2023-06-26 13:52:02 +02:00
|
|
|
|
|
|
|
|
|
#---
|
|
|
|
|
# AT THIS POINT, `organisms` is clean and all entries have an mo code
|
|
|
|
|
#---
|
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# update microorganisms.codes with the latest WHONET codes
|
2023-06-22 15:10:59 +02:00
|
|
|
|
microorganisms.codes2 <- microorganisms.codes %>%
|
2023-04-14 11:12:26 +02:00
|
|
|
|
# remove all old WHONET codes, whether we (in the end) keep them or not
|
2023-07-08 17:30:05 +02:00
|
|
|
|
filter(!toupper(code) %in% toupper(organisms$code)) %>%
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# and add the new ones
|
2023-07-08 17:30:05 +02:00
|
|
|
|
bind_rows(organisms %>% select(code, mo)) %>%
|
|
|
|
|
arrange(code) %>%
|
|
|
|
|
distinct(code, .keep_all = TRUE)
|
2023-06-22 15:10:59 +02:00
|
|
|
|
# new codes:
|
|
|
|
|
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
|
|
|
|
|
mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
|
|
|
|
|
microorganisms.codes <- microorganisms.codes2
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# Run this part to update ASIARS-Net:
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# # start
|
|
|
|
|
# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
|
|
|
|
|
# asiarsnet <- asiarsnet %>%
|
|
|
|
|
# mutate(WHONET_Code = toupper(WHONET_Code)) %>%
|
|
|
|
|
# left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
|
|
|
|
|
# mutate(
|
|
|
|
|
# mo1 = as.mo(ORGANISM_CODE),
|
|
|
|
|
# mo2 = as.mo(ORGANISM)
|
|
|
|
|
# ) %>%
|
|
|
|
|
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
|
|
|
|
|
# filter(!is.na(mo))
|
|
|
|
|
# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
|
|
|
|
|
# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
|
|
|
|
|
# # these will be updated
|
|
|
|
|
# bind_rows(insert1, insert2) %>%
|
|
|
|
|
# rename(mo_new = mo) %>%
|
|
|
|
|
# left_join(microorganisms.codes) %>%
|
|
|
|
|
# filter(mo != mo_new)
|
|
|
|
|
# microorganisms.codes <- microorganisms.codes %>%
|
|
|
|
|
# filter(!code %in% c(insert1$code, insert2$code)) %>%
|
|
|
|
|
# bind_rows(insert1, insert2) %>%
|
|
|
|
|
# arrange(code)
|
|
|
|
|
# # end
|
|
|
|
|
|
|
|
|
|
## Save to package ----
|
2023-07-08 17:30:05 +02:00
|
|
|
|
class(microorganisms.codes$mo) <- c("mo", "character")
|
2023-04-14 23:14:34 +02:00
|
|
|
|
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
|
|
|
|
|
rm(microorganisms.codes)
|
|
|
|
|
devtools::load_all()
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# BREAKPOINTS ----
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# now that we have the right MO codes, get the breakpoints and convert them
|
2023-06-26 13:52:02 +02:00
|
|
|
|
|
|
|
|
|
whonet_breakpoints %>%
|
|
|
|
|
count(GUIDELINES, BREAKPOINT_TYPE) %>%
|
|
|
|
|
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>%
|
|
|
|
|
janitor::adorn_totals(where = c("row", "col"))
|
2023-04-14 23:14:34 +02:00
|
|
|
|
|
|
|
|
|
breakpoints <- whonet_breakpoints %>%
|
|
|
|
|
mutate(code = toupper(ORGANISM_CODE)) %>%
|
2023-06-22 15:10:59 +02:00
|
|
|
|
left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
|
2023-04-19 00:31:31 +02:00
|
|
|
|
# GEN (Generic) and ALL (All) are PK/PD codes
|
|
|
|
|
data.frame(code = c("ALL", "GEN"),
|
|
|
|
|
mo = rep(as.mo("UNKNOWN"), 2))))
|
2023-06-22 15:10:59 +02:00
|
|
|
|
# these ones lack an MO name, they cannot be used:
|
2023-04-14 23:14:34 +02:00
|
|
|
|
unknown <- breakpoints %>%
|
2023-07-08 17:30:05 +02:00
|
|
|
|
filter(is.na(mo)) %>%
|
2023-04-14 23:14:34 +02:00
|
|
|
|
pull(code) %>%
|
|
|
|
|
unique()
|
2023-04-19 00:31:31 +02:00
|
|
|
|
breakpoints %>%
|
2023-06-26 13:52:02 +02:00
|
|
|
|
filter(code %in% unknown) %>%
|
|
|
|
|
count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
|
2023-07-08 17:30:05 +02:00
|
|
|
|
# these codes are currently (2023-07-08): clu, kma. No clue, so remove them:
|
2023-04-14 23:14:34 +02:00
|
|
|
|
breakpoints <- breakpoints %>%
|
2023-07-08 17:30:05 +02:00
|
|
|
|
filter(!is.na(mo))
|
2023-04-14 23:14:34 +02:00
|
|
|
|
|
|
|
|
|
# and these ones have unknown antibiotics according to WHONET itself:
|
|
|
|
|
breakpoints %>%
|
|
|
|
|
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
|
|
|
|
|
count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
|
|
|
|
|
arrange(desc(YEAR))
|
2023-06-22 15:10:59 +02:00
|
|
|
|
breakpoints %>%
|
|
|
|
|
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
|
|
|
|
|
pull(WHONET_ABX_CODE) %>%
|
|
|
|
|
unique()
|
2023-07-10 13:41:52 +02:00
|
|
|
|
# they are at the moment all old codes that have the right replacements in `antibiotics`, so we can use as.ab()
|
2023-06-26 13:52:02 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Build new breakpoints table ----
|
|
|
|
|
|
2022-10-30 14:31:45 +01:00
|
|
|
|
breakpoints_new <- breakpoints %>%
|
2023-06-26 13:52:02 +02:00
|
|
|
|
filter(!is.na(WHONET_ABX_CODE)) %>%
|
2022-10-30 14:31:45 +01:00
|
|
|
|
transmute(
|
|
|
|
|
guideline = paste(GUIDELINES, YEAR),
|
2023-06-26 13:52:02 +02:00
|
|
|
|
type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
|
2022-10-30 14:31:45 +01:00
|
|
|
|
method = TEST_METHOD,
|
2023-06-22 15:10:59 +02:00
|
|
|
|
site = SITE_OF_INFECTION,
|
2023-04-14 23:14:34 +02:00
|
|
|
|
mo,
|
2022-10-30 14:31:45 +01:00
|
|
|
|
rank_index = case_when(
|
2023-06-22 15:10:59 +02:00
|
|
|
|
is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
|
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
|
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
|
2023-07-08 17:30:05 +02:00
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) == "species group" ~ 2.5,
|
2023-06-22 15:10:59 +02:00
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
|
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
|
|
|
|
|
mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
|
2022-10-30 14:31:45 +01:00
|
|
|
|
TRUE ~ 6
|
|
|
|
|
),
|
|
|
|
|
ab = as.ab(WHONET_ABX_CODE),
|
2023-06-26 13:52:02 +02:00
|
|
|
|
ref_tbl = ifelse(type == "ECOFF" & is.na(REFERENCE_TABLE), "ECOFF", REFERENCE_TABLE),
|
2022-10-30 14:31:45 +01:00
|
|
|
|
disk_dose = POTENCY,
|
2023-06-26 13:52:02 +02:00
|
|
|
|
breakpoint_S = ifelse(type == "ECOFF" & is.na(S) & !is.na(ECV_ECOFF), ECV_ECOFF, S),
|
|
|
|
|
breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
|
2023-06-22 15:10:59 +02:00
|
|
|
|
uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI")
|
2022-10-30 14:31:45 +01:00
|
|
|
|
) %>%
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
|
2022-10-30 14:31:45 +01:00
|
|
|
|
mutate(disk_dose = disk_dose %>%
|
2023-04-14 23:14:34 +02:00
|
|
|
|
gsub("μ", "u", ., fixed = TRUE) %>% # this is 'mu', \u03bc
|
|
|
|
|
gsub("µ", "u", ., fixed = TRUE) %>% # this is 'micro', u00b5 (yes, they look the same)
|
2022-10-30 14:31:45 +01:00
|
|
|
|
gsub("–", "-", ., fixed = TRUE)) %>%
|
2023-07-08 17:30:05 +02:00
|
|
|
|
arrange(desc(guideline), mo, ab, type, method) %>%
|
2022-10-30 14:31:45 +01:00
|
|
|
|
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
|
2023-07-10 13:41:52 +02:00
|
|
|
|
distinct(guideline, type, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# check the strange duplicates
|
|
|
|
|
breakpoints_new %>%
|
2023-06-26 13:52:02 +02:00
|
|
|
|
mutate(id = paste(guideline, type, ab, mo, method, site)) %>%
|
2023-04-14 23:14:34 +02:00
|
|
|
|
filter(id %in% .$id[which(duplicated(id))])
|
2023-06-22 15:10:59 +02:00
|
|
|
|
# remove duplicates
|
|
|
|
|
breakpoints_new <- breakpoints_new %>%
|
2023-07-10 13:41:52 +02:00
|
|
|
|
distinct(guideline, type, ab, mo, method, site, .keep_all = TRUE)
|
2023-04-14 23:14:34 +02:00
|
|
|
|
|
2023-06-22 15:10:59 +02:00
|
|
|
|
# fix reference table names
|
2023-07-08 17:30:05 +02:00
|
|
|
|
breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
|
2023-06-22 15:10:59 +02:00
|
|
|
|
breakpoints_new <- breakpoints_new %>%
|
|
|
|
|
mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
|
|
|
|
|
is.na(ref_tbl) ~ "Unknown",
|
|
|
|
|
TRUE ~ ref_tbl))
|
|
|
|
|
|
|
|
|
|
# clean disk zones
|
2022-10-22 22:00:15 +02:00
|
|
|
|
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE]))
|
|
|
|
|
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
|
2021-12-13 11:57:34 +01:00
|
|
|
|
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
|
2022-10-30 14:31:45 +01:00
|
|
|
|
# instead, clean based on MIC factor levels
|
2022-10-22 22:00:15 +02:00
|
|
|
|
m <- unique(as.double(as.mic(levels(as.mic(1)))))
|
|
|
|
|
breakpoints_new[which(breakpoints_new$method == "MIC" &
|
2022-10-30 14:31:45 +01:00
|
|
|
|
is.na(breakpoints_new$breakpoint_S)), "breakpoint_S"] <- min(m)
|
2022-10-22 22:00:15 +02:00
|
|
|
|
breakpoints_new[which(breakpoints_new$method == "MIC" &
|
2022-10-30 14:31:45 +01:00
|
|
|
|
is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# raise these one higher valid MIC factor level:
|
2023-07-10 13:41:52 +02:00
|
|
|
|
breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- 128
|
|
|
|
|
breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- 256
|
2023-07-10 19:04:12 +02:00
|
|
|
|
breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- 512
|
2023-07-10 13:41:52 +02:00
|
|
|
|
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- 1024
|
2021-12-13 11:57:34 +01:00
|
|
|
|
|
2023-07-12 12:41:25 +02:00
|
|
|
|
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G now includes all streptococci:
|
|
|
|
|
clinical_breakpoints$mo[clinical_breakpoints$mo == "B_STRPT" & clinical_breakpoints$ref_tbl %like% "strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
|
|
|
|
|
|
2022-05-10 17:01:37 +02:00
|
|
|
|
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
|
2023-04-14 23:14:34 +02:00
|
|
|
|
# EUCAST 2022 guideline: S <= 8 and R > 8
|
2022-05-10 17:01:37 +02:00
|
|
|
|
# WHONET file: S <= 8 and R >= 16
|
2023-06-22 15:10:59 +02:00
|
|
|
|
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
2022-05-10 17:01:37 +02:00
|
|
|
|
# this will make an MIC of 12 I, which should be R, so:
|
2022-10-22 22:00:15 +02:00
|
|
|
|
breakpoints_new <- breakpoints_new %>%
|
|
|
|
|
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
|
2022-10-30 14:31:45 +01:00
|
|
|
|
pmax(breakpoint_S, breakpoint_R / 2),
|
|
|
|
|
breakpoint_R
|
|
|
|
|
))
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# fix disks as well
|
2023-06-22 15:10:59 +02:00
|
|
|
|
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
|
2022-10-22 22:00:15 +02:00
|
|
|
|
breakpoints_new <- breakpoints_new %>%
|
|
|
|
|
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
|
2022-10-30 14:31:45 +01:00
|
|
|
|
breakpoint_R + 1,
|
|
|
|
|
breakpoint_R
|
|
|
|
|
))
|
2022-10-29 14:15:23 +02:00
|
|
|
|
# fix missing R breakpoint where there is an S breakpoint
|
|
|
|
|
breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
|
|
|
|
|
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# check again
|
2023-06-22 15:10:59 +02:00
|
|
|
|
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
2022-10-22 22:00:15 +02:00
|
|
|
|
# compare with current version
|
2023-01-21 23:47:20 +01:00
|
|
|
|
clinical_breakpoints %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-07-10 13:41:52 +02:00
|
|
|
|
# must have "human" and "ECOFF"
|
|
|
|
|
breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
|
|
|
|
|
|
2023-04-19 00:31:31 +02:00
|
|
|
|
# check dimensions
|
|
|
|
|
dim(breakpoints_new)
|
|
|
|
|
dim(clinical_breakpoints)
|
|
|
|
|
|
2023-06-26 13:52:02 +02:00
|
|
|
|
# SAVE TO PACKAGE ----
|
2022-10-22 22:00:15 +02:00
|
|
|
|
|
2023-01-21 23:47:20 +01:00
|
|
|
|
clinical_breakpoints <- breakpoints_new
|
|
|
|
|
usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
|
|
|
|
|
rm(clinical_breakpoints)
|
2019-09-20 12:33:05 +02:00
|
|
|
|
devtools::load_all(".")
|