2021-12-12 11:36:58 +01:00
|
|
|
|
# ==================================================================== #
|
|
|
|
|
# TITLE #
|
2022-10-02 15:31:16 +02:00
|
|
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# #
|
|
|
|
|
# SOURCE #
|
|
|
|
|
# https://github.com/msberends/AMR #
|
|
|
|
|
# #
|
2022-10-02 15:31:16 +02:00
|
|
|
|
# CITE AS #
|
|
|
|
|
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
|
|
|
|
|
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
|
|
|
|
|
# Data. Journal of Statistical Software, 104(3), 1-31. #
|
|
|
|
|
# doi:10.18637/jss.v104.i03 #
|
|
|
|
|
# #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# Developed at the University of Groningen, the Netherlands, in #
|
|
|
|
|
# collaboration with non-profit organisations Certe Medical #
|
2022-08-28 10:31:50 +02:00
|
|
|
|
# Diagnostics & Advice, and University Medical Center Groningen. #
|
2021-12-12 11:36:58 +01:00
|
|
|
|
# #
|
|
|
|
|
# This R package is free software; you can freely use and distribute #
|
|
|
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
|
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
|
|
|
# the Free Software Foundation. #
|
|
|
|
|
# We created this package for both routine data analysis and academic #
|
|
|
|
|
# research and it was publicly released in the hope that it will be #
|
|
|
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
|
|
|
# #
|
|
|
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
|
|
|
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
|
|
|
|
|
# ==================================================================== #
|
|
|
|
|
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# This script runs in under a minute and renews all guidelines of CLSI and EUCAST!
|
2022-05-10 21:34:30 +02:00
|
|
|
|
# Run it with source("data-raw/reproduction_of_rsi_translation.R")
|
2021-12-13 10:18:28 +01:00
|
|
|
|
|
2019-05-10 16:44:59 +02:00
|
|
|
|
library(dplyr)
|
2020-07-29 10:33:47 +02:00
|
|
|
|
library(readr)
|
|
|
|
|
library(tidyr)
|
2021-12-13 10:18:28 +01:00
|
|
|
|
library(AMR)
|
|
|
|
|
|
|
|
|
|
# Install the WHONET software on Windows (http://www.whonet.org/software.html),
|
|
|
|
|
# and copy the folder C:\WHONET\Codes to data-raw/WHONET/Codes
|
2022-05-10 21:34:30 +02:00
|
|
|
|
DRGLST <- read_tsv("data-raw/WHONET/Codes/DRGLST.txt", na = c("", "NA", "-"), show_col_types = FALSE)
|
|
|
|
|
DRGLST1 <- read_tsv("data-raw/WHONET/Codes/DRGLST1.txt", na = c("", "NA", "-"), show_col_types = FALSE)
|
|
|
|
|
ORGLIST <- read_tsv("data-raw/WHONET/Codes/ORGLIST.txt", na = c("", "NA", "-"), show_col_types = FALSE)
|
2021-12-13 10:18:28 +01:00
|
|
|
|
|
|
|
|
|
# create data set for generic rules (i.e., AB-specific but not MO-specific)
|
|
|
|
|
rsi_generic <- DRGLST %>%
|
|
|
|
|
filter(CLSI == "X" | EUCST == "X") %>%
|
2022-08-28 10:31:50 +02:00
|
|
|
|
select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
|
|
|
|
|
mutate(
|
|
|
|
|
ab = as.ab(ab),
|
|
|
|
|
across(matches("(CLSI|EUCST)"), as.double)
|
|
|
|
|
) %>%
|
|
|
|
|
pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
|
|
|
|
|
separate(method, into = c("guideline", "method"), sep = "_") %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
mutate(method = ifelse(method %like% "D",
|
2022-08-28 10:31:50 +02:00
|
|
|
|
gsub("D", "DISK_", method, fixed = TRUE),
|
|
|
|
|
gsub("M", "MIC_", method, fixed = TRUE)
|
|
|
|
|
)) %>%
|
|
|
|
|
separate(method, into = c("method", "rsi"), sep = "_") %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# I is in the middle, so we only need R and S (saves data)
|
2022-08-28 10:31:50 +02:00
|
|
|
|
filter(rsi %in% c("R", "S")) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
pivot_wider(names_from = rsi, values_from = value) %>%
|
2022-08-28 10:31:50 +02:00
|
|
|
|
transmute(
|
|
|
|
|
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
|
|
|
|
|
method,
|
|
|
|
|
site = NA_character_,
|
|
|
|
|
mo = as.mo("UNKNOWN"),
|
|
|
|
|
ab,
|
|
|
|
|
ref_tbl = "Generic rules",
|
|
|
|
|
disk_dose,
|
|
|
|
|
breakpoint_S = S,
|
|
|
|
|
breakpoint_R = R,
|
|
|
|
|
uti = FALSE
|
|
|
|
|
) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
|
|
|
|
|
rsi_generic
|
2019-05-10 16:44:59 +02:00
|
|
|
|
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# create data set for AB-specific and MO-specific rules
|
2022-08-28 10:31:50 +02:00
|
|
|
|
rsi_specific <- DRGLST1 %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# only support guidelines for humans (for now)
|
2022-08-28 10:31:50 +02:00
|
|
|
|
filter(
|
|
|
|
|
HOST == "Human" & SITE_INF %unlike% "canine|feline",
|
|
|
|
|
# only CLSI and EUCAST
|
|
|
|
|
GUIDELINES %like% "(CLSI|EUCST)"
|
|
|
|
|
) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# get microorganism names from another WHONET table
|
2022-08-28 10:31:50 +02:00
|
|
|
|
mutate(ORG_CODE = tolower(ORG_CODE)) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
left_join(ORGLIST %>%
|
2022-08-28 10:31:50 +02:00
|
|
|
|
transmute(
|
|
|
|
|
ORG_CODE = tolower(ORG),
|
|
|
|
|
SCT_TEXT = case_when(
|
|
|
|
|
is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
|
|
|
|
|
is.na(SCT_TEXT) ~ ORGANISM,
|
|
|
|
|
TRUE ~ SCT_TEXT
|
|
|
|
|
)
|
|
|
|
|
) %>%
|
|
|
|
|
# WHO for 'Generic'
|
|
|
|
|
bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
|
|
|
|
|
# WHO for 'Enterobacterales'
|
|
|
|
|
bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# still some manual cleaning required
|
2022-08-28 10:31:50 +02:00
|
|
|
|
filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
|
|
|
|
|
transmute(
|
|
|
|
|
guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
|
|
|
|
|
method = toupper(TESTMETHOD),
|
|
|
|
|
site = SITE_INF,
|
|
|
|
|
mo = as.mo(SCT_TEXT),
|
|
|
|
|
ab = as.ab(WHON5_CODE),
|
|
|
|
|
ref_tbl = REF_TABLE,
|
|
|
|
|
disk_dose = POTENCY,
|
|
|
|
|
breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
|
|
|
|
|
breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
|
|
|
|
|
uti = site %like% "(UTI|urinary|urine)"
|
|
|
|
|
) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
|
|
|
|
|
rsi_specific
|
2019-05-10 16:44:59 +02:00
|
|
|
|
|
2022-08-28 10:31:50 +02:00
|
|
|
|
rsi_translation <- rsi_generic %>%
|
|
|
|
|
bind_rows(rsi_specific) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
# add the taxonomic rank index, used for sorting (so subspecies match first, order matches last)
|
2022-08-28 10:31:50 +02:00
|
|
|
|
mutate(
|
|
|
|
|
rank_index = case_when(
|
|
|
|
|
mo_rank(mo) %like% "(infra|sub)" ~ 1,
|
|
|
|
|
mo_rank(mo) == "species" ~ 2,
|
|
|
|
|
mo_rank(mo) == "genus" ~ 3,
|
|
|
|
|
mo_rank(mo) == "family" ~ 4,
|
|
|
|
|
mo_rank(mo) == "order" ~ 5,
|
|
|
|
|
TRUE ~ 6
|
|
|
|
|
),
|
|
|
|
|
.after = mo
|
|
|
|
|
) %>%
|
|
|
|
|
arrange(desc(guideline), ab, mo, method) %>%
|
|
|
|
|
distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
|
2021-12-13 10:18:28 +01:00
|
|
|
|
as.data.frame(stringsAsFactors = FALSE)
|
2021-01-12 22:08:04 +01:00
|
|
|
|
|
2021-12-13 11:57:34 +01:00
|
|
|
|
# disks MUST be 6-50 mm, so correct where that is wrong:
|
|
|
|
|
rsi_translation[which(rsi_translation$method == "DISK" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
(is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
|
2021-12-13 11:57:34 +01:00
|
|
|
|
rsi_translation[which(rsi_translation$method == "DISK" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
(is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
|
2021-12-13 11:57:34 +01:00
|
|
|
|
m <- unique(as.double(as.mic(levels(as.mic(1)))))
|
|
|
|
|
rsi_translation[which(rsi_translation$method == "MIC" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
|
2021-12-13 11:57:34 +01:00
|
|
|
|
rsi_translation[which(rsi_translation$method == "MIC" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
|
2021-12-13 11:57:34 +01:00
|
|
|
|
|
|
|
|
|
# WHONET has no >1024 but instead uses 1025, 513, etc, so raise these one higher valid MIC factor level:
|
|
|
|
|
rsi_translation[which(rsi_translation$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
|
|
|
|
|
rsi_translation[which(rsi_translation$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1]
|
|
|
|
|
rsi_translation[which(rsi_translation$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1]
|
|
|
|
|
rsi_translation[which(rsi_translation$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]
|
|
|
|
|
|
2022-05-10 17:01:37 +02:00
|
|
|
|
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
|
|
|
|
|
# EUCAST 2021 guideline: S <= 8 and R > 8
|
|
|
|
|
# WHONET file: S <= 8 and R >= 16
|
|
|
|
|
# this will make an MIC of 12 I, which should be R, so:
|
|
|
|
|
eucast_mics <- which(rsi_translation$guideline %like% "EUCAST" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
rsi_translation$method == "MIC" &
|
|
|
|
|
log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
|
|
|
|
|
!is.na(rsi_translation$breakpoint_R))
|
2022-05-10 21:34:30 +02:00
|
|
|
|
old_R <- rsi_translation[eucast_mics, "breakpoint_R", drop = TRUE]
|
|
|
|
|
old_S <- rsi_translation[eucast_mics, "breakpoint_S", drop = TRUE]
|
2022-08-28 10:31:50 +02:00
|
|
|
|
new_R <- 2^(log2(old_R) - 1)
|
2022-05-10 21:34:30 +02:00
|
|
|
|
new_R[new_R < old_S | is.na(as.mic(new_R))] <- old_S[new_R < old_S | is.na(as.mic(new_R))]
|
|
|
|
|
rsi_translation[eucast_mics, "breakpoint_R"] <- new_R
|
2022-05-10 17:01:37 +02:00
|
|
|
|
eucast_disks <- which(rsi_translation$guideline %like% "EUCAST" &
|
2022-08-28 10:31:50 +02:00
|
|
|
|
rsi_translation$method == "DISK" &
|
|
|
|
|
rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
|
|
|
|
|
!is.na(rsi_translation$breakpoint_R))
|
2022-05-10 17:01:37 +02:00
|
|
|
|
rsi_translation[eucast_disks, "breakpoint_R"] <- rsi_translation[eucast_disks, "breakpoint_R", drop = TRUE] + 1
|
|
|
|
|
|
2021-12-14 22:39:23 +01:00
|
|
|
|
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
|
|
|
|
|
rsi_translation$disk_dose <- gsub("μ", "u", rsi_translation$disk_dose, fixed = TRUE)
|
|
|
|
|
rsi_translation$disk_dose <- gsub("–", "-", rsi_translation$disk_dose, fixed = TRUE)
|
|
|
|
|
|
2019-05-10 16:44:59 +02:00
|
|
|
|
# save to package
|
2022-05-10 17:01:37 +02:00
|
|
|
|
usethis::use_data(rsi_translation, overwrite = TRUE, compress = "xz")
|
2019-05-10 16:44:59 +02:00
|
|
|
|
rm(rsi_translation)
|
2019-09-20 12:33:05 +02:00
|
|
|
|
devtools::load_all(".")
|