1
0
mirror of https://github.com/msberends/AMR.git synced 2026-03-30 20:55:53 +02:00

(v3.0.1.9041) add breakpoints 2026

This commit is contained in:
2026-03-30 10:01:49 +02:00
parent 9c95aa455c
commit 3a736bc484
37 changed files with 5975 additions and 345 deletions

View File

@@ -37,6 +37,11 @@ devtools::load_all()
# BE SURE TO RUN data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!
# For non-interactive use
if (!interactive()) {
View <- glimpse
}
# READ DATA ----
# files are retrieved from https://github.com/AClark-WHONET/AMRIE
@@ -46,21 +51,21 @@ file_organisms <- file.path(github_repo, "Organisms.txt")
file_breakpoints <- file.path(github_repo, "Breakpoints.txt")
file_antibiotics <- file.path(github_repo, "Antibiotics.txt")
whonet_organisms <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_organisms_raw <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
# remove old taxonomic names
filter(TAXONOMIC_STATUS == "C") |>
mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
whonet_breakpoints <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_breakpoints_raw <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
filter(GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
whonet_antibiotics_raw <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
arrange(WHONET_ABX_CODE) |>
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
# MICROORGANISMS WHONET CODES ----
whonet_organisms <- whonet_organisms |>
whonet_organisms <- whonet_organisms_raw |>
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
mutate(
# this one was called Issatchenkia orientalis, but it should be:
@@ -110,6 +115,13 @@ organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES
mutate(name = mo_name(mo, keep_synonyms = TRUE)) |>
arrange(code)
# self-defined codes in the MO table must be retained
existing_codes <- microorganisms$fullname[microorganisms$fullname %like% ".* \\("]
existing_codes <- gsub(".*\\((.*)\\)", "\\1", existing_codes)
organisms <- organisms |>
filter(!code %in% existing_codes)
# some subspecies exist, while their upper species do not, add them as the species level:
subspp <- organisms |>
filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
@@ -139,9 +151,10 @@ organisms <- organisms |> filter(code != "XXX")
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
# 2024-06-14 still the case
# 2025-04-20 still the case
# 2026-03-27 still the case, but fixed using `existing_codes` above
organisms |> filter(code == "SGM")
organisms <- organisms |>
filter(!(code == "SGM" & name %like% "Streptococcus"))
# organisms <- organisms |>
# filter(!(code == "SGM" & name %like% "Streptococcus"))
# this must be empty:
organisms$code[organisms$code |> duplicated()]
@@ -162,7 +175,7 @@ microorganisms.codes2 <- microorganisms.codes |>
# new codes:
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
microorganisms.codes <- microorganisms.codes2
microorganisms.codes <- microorganisms.codes2 |> distinct()
# Run this part to update ASIARS-Net:
# 2024-06-14: file not available anymore
@@ -201,10 +214,15 @@ devtools::load_all()
# now that we have the correct MO codes, get the breakpoints and convert them
whonet_breakpoints |>
whonet_breakpoints_raw |>
count(GUIDELINES, BREAKPOINT_TYPE) |>
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
janitor::adorn_totals(where = c("row", "col"))
whonet_breakpoints_raw |>
filter(YEAR == format(Sys.Date(), "%Y")) |>
count(GUIDELINES, YEAR, BREAKPOINT_TYPE) |>
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
janitor::adorn_totals(where = c("row", "col"))
# compared to current
AMR::clinical_breakpoints |>
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
@@ -213,7 +231,7 @@ AMR::clinical_breakpoints |>
as.data.frame() |>
janitor::adorn_totals(where = c("row", "col"))
breakpoints <- whonet_breakpoints |>
breakpoints <- whonet_breakpoints_raw |>
mutate(code = toupper(ORGANISM_CODE)) |>
left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
# GEN (Generic) and ALL (All) are PK/PD codes
@@ -233,7 +251,7 @@ breakpoints <- breakpoints |>
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints |>
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |>
filter(!WHONET_ABX_CODE %in% whonet_antibiotics_raw$WHONET_ABX_CODE) |>
count(GUIDELINES, WHONET_ABX_CODE) |>
mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
ab_name = ab_name(ab))
@@ -296,7 +314,7 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.d
# regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
breakpoints_new |>
filter(host %like% "foal") |>
count(guideline, host)
count(guideline, host, ab)
breakpoints_new <- breakpoints_new |>
filter(host %unlike% "foal") |>
mutate(host = ifelse(host %like% "horse", "horse", host))
@@ -304,7 +322,7 @@ breakpoints_new <- breakpoints_new |>
# FIXES FOR WHONET ERRORS ----
m <- unique(as.double(as.mic(levels(as.mic(1)))))
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
# WHONET has no >1024 but instead uses 1025, 513, and 129, so as.mic() cannot be used to clean.
# instead, raise these one higher valid MIC factor level:
breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
@@ -318,6 +336,7 @@ anyNA(breakpoints_new$breakpoint_S)
# a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
# 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
# 2026-03-27/ Now 2026 is in it as well, but making R same to S is fine
breakpoints_new |>
filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
count(guideline)
@@ -325,10 +344,15 @@ breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guidelin
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
# Haemophilus same error (must only be H. influenzae)
breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
breakpoints_new |>
filter(method == "MIC" & guideline %like% "EUCAST" & mo %like% as.mo("B_HMPHL")) |>
count(guideline, mo)
breakpoints_new <- breakpoints_new |>
bind_rows(
breakpoints_new |>
@@ -345,6 +369,17 @@ breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
# WHONET contains breakpoint for EUCAST that are not actually in EUCAST:
# IPM in M. morganii is not in it since v10
wrong <- with(breakpoints_new, guideline %like% "EUCAST" & ab == "IPM" & mo == as.mo("M. morganii") & ref_tbl != "ECOFF")
breakpoints_new |> filter(wrong)
breakpoints_new <- breakpoints_new |> filter(!wrong)
# Breakpoints for COPS were part of EUCAST until v11
wrong <- with(breakpoints_new, guideline %like% "EUCAST" & mo == as.mo("CoPS") & ref_tbl != "ECOFF")
breakpoints_new |> filter(wrong)
breakpoints_new <- breakpoints_new |> filter(!wrong)
# WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
# 2024-02-22/ fixed now
@@ -389,7 +424,7 @@ breakpoints_new |>
filter(id %in% .$id[which(duplicated(id))]) |>
arrange(desc(guideline)) |>
View()
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints_raw df, we have to remove duplicates
# 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
breakpoints_new <- breakpoints_new |>
distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)
@@ -398,9 +433,9 @@ breakpoints_new <- breakpoints_new |>
# CHECKS AND SAVE TO PACKAGE ----
# check again
breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
breakpoints_new |> filter(guideline == "EUCAST 2026", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# compare with current version
clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
clinical_breakpoints |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# must have "human" and "ECOFF"
breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")

View File

@@ -1 +1 @@
c7062e60fa4fbc2eee233044d15903ce
c43a990cf91f959913d207e5a85e2bd5

File diff suppressed because it is too large Load Diff

View File

@@ -2832,6 +2832,7 @@
"FU-" "B_FSBCTR"
"FUA.SP" "F_FUSRM"
"FUL" "B_FSBCTR_ULCR"
"FUO" "F_FUSRM_OXYS"
"FUR" "F_FUSRM"
"FUROXY" "F_FUSRM_OXYS"
"FURPET" "F_FUSRM_PTRL"
@@ -2936,6 +2937,7 @@
"GLO.SP" "B_GLBCT"
"GLOSAN" "B_GLBCT_SNGN"
"GLOSPP" "B_GLBCT"
"GLS" "B_GLSSR"
"GM+" "B_GRAMP"
"GM-" "B_GRAMN"
"GMO" "B_GEMLL_MRBL"
@@ -3026,7 +3028,6 @@
"HABSPP" "B_HMTBC"
"HAC" "B_AGGRG_ACTN"
"HACEK" "B_HACEK"
"HACEK" "B_HACEK"
"HAE" "B_HMPHL"
"HAE.SP" "B_HMPHL"
"HAEAEG" "B_HMPHL_AEGY"
@@ -3122,7 +3123,7 @@
"HPL" "B_HMPHL_PRPH"
"HPO" "F_OGATA"
"HPOSPP" "F_HNDRS_ASTR"
"HPR" "B_HMPHL_PRSS"
"HPR" "B_GLSSR_PRSS"
"HPU" "B_HLCBCT_PLLR"
"HPY" "B_HLCBCT_PYLR"
"HRB" "B_HRBSP"
@@ -3471,6 +3472,7 @@
"LQU" "B_LGNLL_QTRN"
"LRC" "B_LPTSP_INTR"
"LRE" "B_LCTBC_RETR"
"LRF" "B_LCTCC_RFFN"
"LRI" "B_LMNRL_RCHR"
"LRU" "B_LGNLL_RBRL"
"LSA" "B_LCTBC_SLVR"
@@ -3760,6 +3762,7 @@
"MNE" "B_MYCBC_NERM"
"MNL" "B_MRXLL_NNLQ"
"MNO" "B_MYCBC_NNCH"
"MNT" "B_MYCBC"
"MNV" "B_MNNHM_VRGN"
"MO-" "B_MRXLL"
"MO.BOV" "B_MRXLL_BOVS"
@@ -4295,6 +4298,7 @@
"PAT.SP" "B_PANTO"
"PAU" "B_SLMNL_ENTR_ENTR"
"PAV" "B_AVBCT_AVIM"
"PBA" "B_PSDCL_ALBA"
"PBC" "B_PRVTL_BCCL"
"PBE" "B_PSTRL_BTTY"
"PBI" "B_PRBCT"
@@ -4591,6 +4595,7 @@
"PSA" "F_PSDLL"
"PSA.SP" "F_PSDLL"
"PSASPP" "F_PSDLL"
"PSB" "B_PSDCL"
"PSC" "F_PSDCH"
"PSCSPP" "B_PSDCL"
"PSD" "B_STPHY_PSDN"
@@ -4706,6 +4711,7 @@
"RAH.SP" "B_RHNLL"
"RAHAQU" "B_RHNLL_AQTL"
"RAHSPP" "B_RHNLL"
"RAI" "B_RLSTN_INSD"
"RAK" "B_RTTSA_AKAR"
"RAL" "B_RLSTN"
"RAL.SP" "B_RLSTN"
@@ -4800,6 +4806,7 @@
"ROD" "B_RDNTB"
"RODPNE" "B_RDNTB_PNMT"
"RODSPP" "B_RDNTB"
"ROK" "B_ROTHI_KRST"
"ROL" "F_RHZPS_MCRS"
"ROM" "B_RSMNS"
"ROMMUC" "B_RSMNS"
@@ -5042,8 +5049,10 @@
"SAV" "B_SLMNL_ARCH"
"SB2" "B_STRPT_BOVS"
"SBA" "B_SLMNL_BRLL"
"SBC" "B_SLBCL"
"SBE" "B_SHWNL_BNTH"
"SBG" "B_SLMNL_BNGR"
"SBI" "B_SLBCL_SLVS"
"SBL" "B_SLMNL_BLCK"
"SBM" "B_SLMNL_BVSM"
"SBN" "B_SLMNL_BBRG"
@@ -5078,6 +5087,7 @@
"SCS" "F_SCLCB_CNST"
"SCT" "B_STRPT_CNST"
"SCU" "B_STPHY_CRNS"
"SCV" "F_SCPLR_VCLS"
"SCY" "F_SCYTL"
"SCYSPP" "F_SCYTL"
"SD1" "B_SHGLL_DYSN"
@@ -5656,6 +5666,7 @@
"TAYSPP" "B_TYLRL"
"TBE" "F_GTRCH_RDLL"
"TBESPP" "F_TRCHS"
"TBH" "F_TRCHP_BNHM"
"TBN" "B_TRPRL_BRNR"
"TCA" "F_DBRYM_CHVL"
"TCASPP" "F_CANDD"
@@ -5841,6 +5852,8 @@
"TYASPP" "F_TRCHP"
"TYE" "P_TRYPN_JNSN"
"TYI" "F_TRCHP_INDT"
"TYM" "B_TRPHR"
"TYW" "B_TRPHR_WHPP"
"ULO" "F_ULCLD"
"UNK" "UNKNOWN"
"UPEC" "B_ESCHR_COLI"
@@ -5850,6 +5863,7 @@
"UREPAR" "B_URPLS_PRVM"
"URESPP" "B_URPLS"
"UREURE" "B_URPLS_URLY"
"URP" "B_URPLS_PRVM"
"UUR" "B_URPLS_URLY"
"V.ALG" "B_VIBRI_ALGN"
"V.CHO" "B_VIBRI_CHLR"

View File

@@ -1 +1 @@
986d5110a46bbf297ebaeb4dd5179fff
6ef98bb1bcd27052fde453bb12c0b285

Binary file not shown.

Binary file not shown.