1
0
mirror of https://github.com/msberends/AMR.git synced 2026-03-30 22:15:53 +02:00

(v3.0.1.9041) add breakpoints 2026

This commit is contained in:
2026-03-30 10:01:49 +02:00
parent 9c95aa455c
commit 3a736bc484
37 changed files with 5975 additions and 345 deletions

View File

@@ -37,6 +37,11 @@ devtools::load_all()
# BE SURE TO RUN data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!
# For non-interactive use
if (!interactive()) {
View <- glimpse
}
# READ DATA ----
# files are retrieved from https://github.com/AClark-WHONET/AMRIE
@@ -46,21 +51,21 @@ file_organisms <- file.path(github_repo, "Organisms.txt")
file_breakpoints <- file.path(github_repo, "Breakpoints.txt")
file_antibiotics <- file.path(github_repo, "Antibiotics.txt")
whonet_organisms <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_organisms_raw <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
# remove old taxonomic names
filter(TAXONOMIC_STATUS == "C") |>
mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
whonet_breakpoints <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_breakpoints_raw <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
filter(GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_antibiotics_raw <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
arrange(WHONET_ABX_CODE) |>
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
# MICROORGANISMS WHONET CODES ----
whonet_organisms <- whonet_organisms |>
whonet_organisms <- whonet_organisms_raw |>
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
mutate(
# this one was called Issatchenkia orientalis, but it should be:
@@ -110,6 +115,13 @@ organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES
mutate(name = mo_name(mo, keep_synonyms = TRUE)) |>
arrange(code)
# self-defined codes in the MO table must be retained
existing_codes <- microorganisms$fullname[microorganisms$fullname %like% ".* \\("]
existing_codes <- gsub(".*\\((.*)\\)", "\\1", existing_codes)
organisms <- organisms |>
filter(!code %in% existing_codes)
# some subspecies exist, while their upper species do not, add them as the species level:
subspp <- organisms |>
filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
@@ -139,9 +151,10 @@ organisms <- organisms |> filter(code != "XXX")
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
# 2024-06-14 still the case
# 2025-04-20 still the case
# 2026-03-27 still the case, but fixed using `existing_codes` above
organisms |> filter(code == "SGM")
organisms <- organisms |>
filter(!(code == "SGM" & name %like% "Streptococcus"))
# organisms <- organisms |>
# filter(!(code == "SGM" & name %like% "Streptococcus"))
# this must be empty:
organisms$code[organisms$code |> duplicated()]
@@ -162,7 +175,7 @@ microorganisms.codes2 <- microorganisms.codes |>
# new codes:
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
microorganisms.codes <- microorganisms.codes2
microorganisms.codes <- microorganisms.codes2 |> distinct()
# Run this part to update ASIARS-Net:
# 2024-06-14: file not available anymore
@@ -201,10 +214,15 @@ devtools::load_all()
# now that we have the correct MO codes, get the breakpoints and convert them
whonet_breakpoints |>
whonet_breakpoints_raw |>
count(GUIDELINES, BREAKPOINT_TYPE) |>
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
janitor::adorn_totals(where = c("row", "col"))
whonet_breakpoints_raw |>
filter(YEAR == format(Sys.Date(), "%Y")) |>
count(GUIDELINES, YEAR, BREAKPOINT_TYPE) |>
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
janitor::adorn_totals(where = c("row", "col"))
# compared to current
AMR::clinical_breakpoints |>
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
@@ -213,7 +231,7 @@ AMR::clinical_breakpoints |>
as.data.frame() |>
janitor::adorn_totals(where = c("row", "col"))
breakpoints <- whonet_breakpoints |>
breakpoints <- whonet_breakpoints_raw |>
mutate(code = toupper(ORGANISM_CODE)) |>
left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
# GEN (Generic) and ALL (All) are PK/PD codes
@@ -233,7 +251,7 @@ breakpoints <- breakpoints |>
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints |>
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |>
filter(!WHONET_ABX_CODE %in% whonet_antibiotics_raw$WHONET_ABX_CODE) |>
count(GUIDELINES, WHONET_ABX_CODE) |>
mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
ab_name = ab_name(ab))
@@ -296,7 +314,7 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.d
# regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
breakpoints_new |>
filter(host %like% "foal") |>
count(guideline, host)
count(guideline, host, ab)
breakpoints_new <- breakpoints_new |>
filter(host %unlike% "foal") |>
mutate(host = ifelse(host %like% "horse", "horse", host))
@@ -304,7 +322,7 @@ breakpoints_new <- breakpoints_new |>
# FIXES FOR WHONET ERRORS ----
m <- unique(as.double(as.mic(levels(as.mic(1)))))
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
# WHONET has no >1024 but instead uses 1025, 513, and 129, so as.mic() cannot be used to clean.
# instead, raise these one higher valid MIC factor level:
breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
@@ -318,6 +336,7 @@ anyNA(breakpoints_new$breakpoint_S)
# a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
# 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
# 2026-03-27/ Now 2026 is in it as well, but making R same to S is fine
breakpoints_new |>
filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
count(guideline)
@@ -325,10 +344,15 @@ breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guidelin
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
# Haemophilus same error (must only be H. influenzae)
breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
breakpoints_new |>
filter(method == "MIC" & guideline %like% "EUCAST" & mo %like% as.mo("B_HMPHL")) |>
count(guideline, mo)
breakpoints_new <- breakpoints_new |>
bind_rows(
breakpoints_new |>
@@ -345,6 +369,17 @@ breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
# WHONET contains breakpoint for EUCAST that are not actually in EUCAST:
# IPM in M. morganii is not in it since v10
wrong <- with(breakpoints_new, guideline %like% "EUCAST" & ab == "IPM" & mo == as.mo("M. morganii") & ref_tbl != "ECOFF")
breakpoints_new |> filter(wrong)
breakpoints_new <- breakpoints_new |> filter(!wrong)
# Breakpoints for COPS were part of EUCAST until v11
wrong <- with(breakpoints_new, guideline %like% "EUCAST" & mo == as.mo("CoPS") & ref_tbl != "ECOFF")
breakpoints_new |> filter(wrong)
breakpoints_new <- breakpoints_new |> filter(!wrong)
# WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
# 2024-02-22/ fixed now
@@ -389,7 +424,7 @@ breakpoints_new |>
filter(id %in% .$id[which(duplicated(id))]) |>
arrange(desc(guideline)) |>
View()
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints_raw df, we have to remove duplicates
# 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
breakpoints_new <- breakpoints_new |>
distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)
@@ -398,9 +433,9 @@ breakpoints_new <- breakpoints_new |>
# CHECKS AND SAVE TO PACKAGE ----
# check again
breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
breakpoints_new |> filter(guideline == "EUCAST 2026", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# compare with current version
clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
clinical_breakpoints |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# must have "human" and "ECOFF"
breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")