1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-15 20:41:38 +01:00
AMR/data-raw/reproduction_of_clinical_breakpoints.R

348 lines
16 KiB
R
Raw Normal View History

2021-12-12 11:36:58 +01:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2021-12-12 11:36:58 +01:00
# #
# SOURCE #
# https://github.com/msberends/AMR #
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2021-12-12 11:36:58 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
2021-12-13 10:18:28 +01:00
# This script runs in under a minute and renews all guidelines of CLSI and EUCAST!
2023-01-21 23:47:20 +01:00
# Run it with source("data-raw/reproduction_of_clinical_breakpoints.R")
2021-12-13 10:18:28 +01:00
2019-05-10 16:44:59 +02:00
library(dplyr)
2020-07-29 10:33:47 +02:00
library(readr)
library(tidyr)
2023-04-19 00:31:31 +02:00
devtools::load_all()
2021-12-13 10:18:28 +01:00
2022-10-22 22:00:15 +02:00
# Install the WHONET 2022 software on Windows (http://www.whonet.org/software.html),
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
2022-10-29 14:15:23 +02:00
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# MICROORGANISMS WHONET CODES ----
2022-10-30 14:31:45 +01:00
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
2022-10-22 22:00:15 +02:00
# remove old taxonomic names
2022-10-30 14:31:45 +01:00
filter(TAXONOMIC_STATUS == "C") %>%
transmute(ORGANISM_CODE = tolower(WHONET_ORG_CODE), ORGANISM) %>%
mutate(
2023-04-14 11:12:26 +02:00
# what's wrong here? all these are only in the table on subspecies level (where species == subspecies), not on species level
2022-10-30 14:31:45 +01:00
ORGANISM = if_else(ORGANISM_CODE == "sau", "Staphylococcus aureus", ORGANISM),
2023-04-14 11:12:26 +02:00
ORGANISM = if_else(ORGANISM_CODE == "pam", "Pasteurella multocida", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "kpn", "Klebsiella pneumoniae", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "caj", "Campylobacter jejuni", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "mmo", "Morganella morganii", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "sap", "Staphylococcus saprophyticus", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "fne", "Fusobacterium necrophorum", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "fnu", "Fusobacterium nucleatum", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "sdy", "Streptococcus dysgalactiae", ORGANISM),
2023-04-14 23:14:34 +02:00
ORGANISM = if_else(ORGANISM_CODE == "axy", "Achromobacter xylosoxidans", ORGANISM),
# and this one was called Issatchenkia orientalis, but it should be:
ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
2022-10-30 14:31:45 +01:00
)
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# add some general codes
2022-10-30 14:31:45 +01:00
whonet_organisms <- whonet_organisms %>%
bind_rows(data.frame(
ORGANISM_CODE = c("ebc", "cof"),
ORGANISM = c("Enterobacterales", "Campylobacter")
))
2022-10-22 22:00:15 +02:00
2023-04-14 11:12:26 +02:00
whonet_organisms.bak <- whonet_organisms
2023-04-14 23:14:34 +02:00
# generate the mo codes and add their names
2023-04-14 11:12:26 +02:00
whonet_organisms <- whonet_organisms.bak %>%
mutate(mo = as.mo(gsub("(sero[a-z]*| complex| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
minimum_matching_score = 0.6,
2023-04-14 11:12:26 +02:00
keep_synonyms = TRUE,
language = "en"),
2023-04-14 23:14:34 +02:00
mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"),
ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"),
ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"),
TRUE ~ mo),
2023-04-14 11:12:26 +02:00
mo_name = mo_name(mo,
keep_synonyms = TRUE,
language = "en"))
2023-04-14 23:14:34 +02:00
# check if coercion at least resembles the first part (genus)
2023-04-14 11:12:26 +02:00
new_mo_codes <- whonet_organisms %>%
mutate(
first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>%
filter(keep == TRUE) %>%
transmute(code = toupper(ORGANISM_CODE),
mo = mo)
2023-04-14 23:14:34 +02:00
# update microorganisms.codes with the latest WHONET codes
microorganisms.codes2 <- microorganisms.codes %>%
2023-04-14 11:12:26 +02:00
# remove all old WHONET codes, whether we (in the end) keep them or not
filter(!toupper(code) %in% toupper(new_mo_codes$code)) %>%
2023-04-14 23:14:34 +02:00
# and add the new ones
bind_rows(new_mo_codes) %>%
2023-04-14 11:12:26 +02:00
arrange(code)
# new codes:
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
microorganisms.codes <- microorganisms.codes2
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# Run this part to update ASIARS-Net:
# start
2022-10-22 22:00:15 +02:00
asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
asiarsnet <- asiarsnet %>%
mutate(WHONET_Code = toupper(WHONET_Code)) %>%
2022-10-30 14:31:45 +01:00
left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
mutate(
mo1 = as.mo(ORGANISM_CODE),
mo2 = as.mo(ORGANISM)
) %>%
mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
2022-10-22 22:00:15 +02:00
filter(!is.na(mo))
insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
# these will be updated
2022-10-30 14:31:45 +01:00
bind_rows(insert1, insert2) %>%
rename(mo_new = mo) %>%
left_join(microorganisms.codes) %>%
filter(mo != mo_new)
microorganisms.codes <- microorganisms.codes %>%
filter(!code %in% c(insert1$code, insert2$code)) %>%
bind_rows(insert1, insert2) %>%
2022-10-22 22:00:15 +02:00
arrange(code)
2023-04-14 23:14:34 +02:00
# end
# save to package
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
rm(microorganisms.codes)
devtools::load_all()
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# BREAKPOINTS ----
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# now that we have the right MO codes, get the breakpoints and convert them
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
filter(BREAKPOINT_TYPE == "Human", GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
arrange(WHONET_ABX_CODE) %>%
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
breakpoints <- whonet_breakpoints %>%
mutate(code = toupper(ORGANISM_CODE)) %>%
left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
2023-04-19 00:31:31 +02:00
# GEN (Generic) and ALL (All) are PK/PD codes
data.frame(code = c("ALL", "GEN"),
mo = rep(as.mo("UNKNOWN"), 2))))
# these ones lack an MO name, they cannot be used:
2023-04-14 23:14:34 +02:00
unknown <- breakpoints %>%
filter(is.na(mo)) %>%
pull(code) %>%
unique()
2023-04-19 00:31:31 +02:00
breakpoints %>%
filter(code %in% unknown)
2023-04-14 23:14:34 +02:00
breakpoints <- breakpoints %>%
filter(!is.na(mo))
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints %>%
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
arrange(desc(YEAR))
breakpoints %>%
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
pull(WHONET_ABX_CODE) %>%
unique()
2023-04-14 23:14:34 +02:00
# we cannot use them
# breakpoints <- breakpoints %>%
# filter(WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE)
2023-04-14 23:14:34 +02:00
# now check with our own antibiotics
breakpoints %>%
filter(!toupper(WHONET_ABX_CODE) %in% antibiotics$ab) %>%
pull(WHONET_ABX_CODE) %>%
unique()
# they are at the moment all old codes that have right replacements in `antibiotics`, so we can use as.ab()
2022-10-30 14:31:45 +01:00
breakpoints_new <- breakpoints %>%
2023-04-14 23:14:34 +02:00
# only last available 10 years
# filter(YEAR > max(YEAR) - 10) %>%
2022-10-30 14:31:45 +01:00
transmute(
guideline = paste(GUIDELINES, YEAR),
method = TEST_METHOD,
site = SITE_OF_INFECTION,
2023-04-14 23:14:34 +02:00
mo,
2022-10-30 14:31:45 +01:00
rank_index = case_when(
is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
2022-10-30 14:31:45 +01:00
TRUE ~ 6
),
ab = as.ab(WHONET_ABX_CODE),
ref_tbl = REFERENCE_TABLE,
disk_dose = POTENCY,
breakpoint_S = S,
breakpoint_R = R,
uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI")
2022-10-30 14:31:45 +01:00
) %>%
2022-10-22 22:00:15 +02:00
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
2022-10-30 14:31:45 +01:00
mutate(disk_dose = disk_dose %>%
2023-04-14 23:14:34 +02:00
gsub("μ", "u", ., fixed = TRUE) %>% # this is 'mu', \u03bc
gsub("µ", "u", ., fixed = TRUE) %>% # this is 'micro', u00b5 (yes, they look the same)
2022-10-30 14:31:45 +01:00
gsub("", "-", ., fixed = TRUE)) %>%
2022-08-28 10:31:50 +02:00
arrange(desc(guideline), ab, mo, method) %>%
2022-10-30 14:31:45 +01:00
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
2022-10-22 22:00:15 +02:00
distinct(guideline, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
2023-04-14 23:14:34 +02:00
# check the strange duplicates
breakpoints_new %>%
mutate(id = paste(guideline, ab, mo, method, site)) %>%
filter(id %in% .$id[which(duplicated(id))])
# remove duplicates
breakpoints_new <- breakpoints_new %>%
distinct(guideline, ab, mo, method, site, .keep_all = TRUE)
2023-04-14 23:14:34 +02:00
# fix reference table names
breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl))
breakpoints_new <- breakpoints_new %>%
mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
is.na(ref_tbl) ~ "Unknown",
TRUE ~ ref_tbl))
# clean disk zones
2022-10-22 22:00:15 +02:00
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE]))
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
2021-12-13 11:57:34 +01:00
2022-10-22 22:00:15 +02:00
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
2022-10-30 14:31:45 +01:00
# instead, clean based on MIC factor levels
2022-10-22 22:00:15 +02:00
m <- unique(as.double(as.mic(levels(as.mic(1)))))
breakpoints_new[which(breakpoints_new$method == "MIC" &
2022-10-30 14:31:45 +01:00
is.na(breakpoints_new$breakpoint_S)), "breakpoint_S"] <- min(m)
2022-10-22 22:00:15 +02:00
breakpoints_new[which(breakpoints_new$method == "MIC" &
2022-10-30 14:31:45 +01:00
is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
2022-10-22 22:00:15 +02:00
# raise these one higher valid MIC factor level:
breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]
2021-12-13 11:57:34 +01:00
2022-05-10 17:01:37 +02:00
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
2023-04-14 23:14:34 +02:00
# EUCAST 2022 guideline: S <= 8 and R > 8
2022-05-10 17:01:37 +02:00
# WHONET file: S <= 8 and R >= 16
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
2022-05-10 17:01:37 +02:00
# this will make an MIC of 12 I, which should be R, so:
2022-10-22 22:00:15 +02:00
breakpoints_new <- breakpoints_new %>%
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
2022-10-30 14:31:45 +01:00
pmax(breakpoint_S, breakpoint_R / 2),
breakpoint_R
))
2022-10-22 22:00:15 +02:00
# fix disks as well
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
2022-10-22 22:00:15 +02:00
breakpoints_new <- breakpoints_new %>%
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
2022-10-30 14:31:45 +01:00
breakpoint_R + 1,
breakpoint_R
))
2022-10-29 14:15:23 +02:00
# fix missing R breakpoint where there is an S breakpoint
breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
2022-10-22 22:00:15 +02:00
# check again
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
2022-10-22 22:00:15 +02:00
# compare with current version
2023-01-21 23:47:20 +01:00
clinical_breakpoints %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
2022-10-22 22:00:15 +02:00
2023-04-19 00:31:31 +02:00
# check dimensions
dim(breakpoints_new)
dim(clinical_breakpoints)
# ECOFFs ----
# ECOFF = Epidemiological Cut-Off
whonet_ecoff <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
filter(BREAKPOINT_TYPE == "ECOFF", GUIDELINES %in% c("CLSI", "EUCAST"))
ecoff <- whonet_ecoff %>%
filter(!ORGANISM_CODE %in% c("clu", "BFX", "PFX", "kma", "cdh")) %>%
transmute(guideline = paste(GUIDELINES, YEAR),
mo = as.mo(ORGANISM_CODE, keep_synonyms = TRUE),
ab = as.ab(WHONET_ABX_CODE),
method = TEST_METHOD,
ecoff = as.double(ECV_ECOFF)) %>%
filter(!is.na(ecoff)) %>%
distinct()
# join to breakpoints
breakpoints_new <- breakpoints_new %>%
bind_rows(breakpoints_new %>%
right_join(ecoff, by = c("guideline", "mo", "ab", "method"))) %>%
mutate(ref_tbl = ifelse(is.na(ref_tbl), "ECOFF", ref_tbl)) %>%
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
arrange(desc(guideline), ab, mo, method) %>%
mutate(rank_index = case_when(
is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
TRUE ~ 6
)) %>%
mutate(uti = ifelse(is.na(uti), FALSE, uti)) %>%
relocate(ecoff, .after = breakpoint_R)
breakpoints_new.bak <- mutate(uti = ifelse(is.na(uti), FALSE, uti), .after = ecoff)
# EXTEND CoNS/CoPS/GAS/GBS ----
# extend all coagulase-postive/-negative staphylococci
CoNS <- breakpoints_new %>% filter(mo == as.mo("CoNS"))
for (m in MO_CONS[mo_subspecies(MO_CONS, keep_synonyms = TRUE) == ""]) {
breakpoints_new <- breakpoints_new %>%
bind_rows(CoNS %>%
mutate(mo = m))
}
CoPS <- breakpoints_new %>% filter(mo == as.mo("CoPS"))
for (m in MO_COPS[mo_subspecies(MO_COPS, keep_synonyms = TRUE) == ""]) {
breakpoints_new <- breakpoints_new %>%
bind_rows(CoPS %>%
mutate(mo = m))
}
# do the same for group A and B streptococci
breakpoints_new <- breakpoints_new %>%
bind_rows(breakpoints_new %>%
filter(mo == as.mo("Streptococcus Group A")) %>%
mutate(mo = as.mo("Streptococcus pyogenes"))) %>%
bind_rows(breakpoints_new %>%
filter(mo == as.mo("Streptococcus Group B")) %>%
mutate(mo = as.mo("Streptococcus agalactiae")))
# remove duplicates again for CoNS/CoPS/GBS and arrange
breakpoints_new <- breakpoints_new %>%
mutate(mo = as.mo(mo, keep_synonyms = TRUE)) %>%
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
arrange(desc(guideline), ab, mo, method)
2023-04-19 00:31:31 +02:00
2022-10-22 22:00:15 +02:00
# Save to package ----
2023-01-21 23:47:20 +01:00
clinical_breakpoints <- breakpoints_new
usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
rm(clinical_breakpoints)
2019-09-20 12:33:05 +02:00
devtools::load_all(".")