(v3.0.1.9041) add breakpoints 2026

2026-05-28 02:21:39 +02:00 · 2026-03-30 10:01:49 +02:00
parent 9c95aa455c
commit 3a736bc484
37 changed files with 5975 additions and 345 deletions
--- a/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R
+++ b/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R
@@ -37,6 +37,11 @@ devtools::load_all()

 # BE SURE TO RUN data-raw/_reproduction_scripts/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!

+# For non-interactive use
+if (!interactive()) {
+  View <- glimpse
+}
+
 # READ DATA ----

 # files are retrieved from https://github.com/AClark-WHONET/AMRIE
@@ -46,21 +51,21 @@ file_organisms <- file.path(github_repo, "Organisms.txt")
 file_breakpoints <- file.path(github_repo, "Breakpoints.txt")
 file_antibiotics <- file.path(github_repo, "Antibiotics.txt")

-whonet_organisms <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
+whonet_organisms_raw <- read_tsv(file_organisms, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
  # remove old taxonomic names
  filter(TAXONOMIC_STATUS == "C") |>
  mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))

-whonet_breakpoints <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
+whonet_breakpoints_raw <- read_tsv(file_breakpoints, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
  filter(GUIDELINES %in% c("CLSI", "EUCAST"))

-whonet_antibiotics <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
+whonet_antibiotics_raw <- read_tsv(file_antibiotics, na = c("", "NA", "-"), show_col_types = FALSE, guess_max = Inf) |>
  arrange(WHONET_ABX_CODE) |>
  distinct(WHONET_ABX_CODE, .keep_all = TRUE)

 # MICROORGANISMS WHONET CODES ----

-whonet_organisms <- whonet_organisms |>
+whonet_organisms <- whonet_organisms_raw |>
  select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
  mutate(
    # this one was called Issatchenkia orientalis, but it should be:
@@ -110,6 +115,13 @@ organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES
  mutate(name = mo_name(mo, keep_synonyms = TRUE)) |> 
  arrange(code)

+# self-defined codes in the MO table must be retained
+existing_codes <- microorganisms$fullname[microorganisms$fullname %like% ".* \\("]
+existing_codes <- gsub(".*\\((.*)\\)", "\\1", existing_codes)
+
+organisms <- organisms |>
+  filter(!code %in% existing_codes)
+
 # some subspecies exist, while their upper species do not, add them as the species level:
 subspp <- organisms |>
  filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
@@ -139,9 +151,10 @@ organisms <- organisms |> filter(code != "XXX")
 # 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
 # 2024-06-14 still the case
 # 2025-04-20 still the case
+# 2026-03-27 still the case, but fixed using `existing_codes` above
 organisms |> filter(code == "SGM")
-organisms <- organisms |> 
-  filter(!(code == "SGM" & name %like% "Streptococcus"))
+# organisms <- organisms |> 
+#   filter(!(code == "SGM" & name %like% "Streptococcus"))
 # this must be empty:
 organisms$code[organisms$code |> duplicated()]

@@ -162,7 +175,7 @@ microorganisms.codes2 <- microorganisms.codes |>
 # new codes:
 microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
 mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
-microorganisms.codes <- microorganisms.codes2
+microorganisms.codes <- microorganisms.codes2 |> distinct()

 # Run this part to update ASIARS-Net:
 # 2024-06-14: file not available anymore
@@ -201,10 +214,15 @@ devtools::load_all()

 # now that we have the correct MO codes, get the breakpoints and convert them

-whonet_breakpoints |> 
+whonet_breakpoints_raw |> 
  count(GUIDELINES, BREAKPOINT_TYPE) |> 
  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> 
  janitor::adorn_totals(where = c("row", "col"))
+whonet_breakpoints_raw |> 
+  filter(YEAR == format(Sys.Date(), "%Y")) |>
+  count(GUIDELINES, YEAR, BREAKPOINT_TYPE) |> 
+  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> 
+  janitor::adorn_totals(where = c("row", "col"))
 # compared to current
 AMR::clinical_breakpoints |>
  count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
@@ -213,7 +231,7 @@ AMR::clinical_breakpoints |>
  as.data.frame() |>
  janitor::adorn_totals(where = c("row", "col"))

-breakpoints <- whonet_breakpoints |>
+breakpoints <- whonet_breakpoints_raw |>
  mutate(code = toupper(ORGANISM_CODE)) |>
  left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
                      # GEN (Generic) and ALL (All) are PK/PD codes
@@ -233,7 +251,7 @@ breakpoints <- breakpoints |>

 # and these ones have unknown antibiotics according to WHONET itself:
 breakpoints |> 
-  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |> 
+  filter(!WHONET_ABX_CODE %in% whonet_antibiotics_raw$WHONET_ABX_CODE) |> 
  count(GUIDELINES, WHONET_ABX_CODE) |>
  mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
         ab_name = ab_name(ab))
@@ -296,7 +314,7 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.d
 # regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
 breakpoints_new |> 
  filter(host %like% "foal") |>
-  count(guideline, host)
+  count(guideline, host, ab)
 breakpoints_new <- breakpoints_new |> 
  filter(host %unlike% "foal") |> 
  mutate(host = ifelse(host %like% "horse", "horse", host))
@@ -304,7 +322,7 @@ breakpoints_new <- breakpoints_new |>
 # FIXES FOR WHONET ERRORS ----
 m <- unique(as.double(as.mic(levels(as.mic(1)))))

-# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
+# WHONET has no >1024 but instead uses 1025, 513, and 129, so as.mic() cannot be used to clean.
 # instead, raise these one higher valid MIC factor level:
 breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
 breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
@@ -318,6 +336,7 @@ anyNA(breakpoints_new$breakpoint_S)

 # a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
 # 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
+# 2026-03-27/ Now 2026 is in it as well, but making R same to S is fine
 breakpoints_new |>
  filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
  count(guideline)
@@ -325,10 +344,15 @@ breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guidelin


 # fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
-breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
+# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
+breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
 # Haemophilus same error (must only be H. influenzae)
-breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
+# 2026-03-27/ Only erroneous in EUCAST until 2024, it's fixed for 2025 and 2026, but we need to fix this historically too
+breakpoints_new$mo[breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
 # EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
+breakpoints_new |>
+  filter(method == "MIC" & guideline %like% "EUCAST" & mo %like% as.mo("B_HMPHL")) |>
+  count(guideline, mo)
 breakpoints_new <- breakpoints_new |> 
  bind_rows(
    breakpoints_new |>
@@ -345,6 +369,17 @@ breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
 breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
 # Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
 breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
+
+# WHONET contains breakpoint for EUCAST that are not actually in EUCAST:
+# IPM in M. morganii is not in it since v10
+wrong <- with(breakpoints_new, guideline %like% "EUCAST" & ab == "IPM" & mo == as.mo("M. morganii") & ref_tbl != "ECOFF")
+breakpoints_new |> filter(wrong)
+breakpoints_new <- breakpoints_new |> filter(!wrong)
+# Breakpoints for COPS were part of EUCAST until v11
+wrong <- with(breakpoints_new, guideline %like% "EUCAST" & mo == as.mo("CoPS") & ref_tbl != "ECOFF")
+breakpoints_new |> filter(wrong)
+breakpoints_new <- breakpoints_new |> filter(!wrong)
+
 # WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
 # 2024-02-22/ fixed now

@@ -389,7 +424,7 @@ breakpoints_new |>
  filter(id %in% .$id[which(duplicated(id))]) |> 
  arrange(desc(guideline)) |>
  View()
-# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
+# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints_raw df, we have to remove duplicates
 # 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
 breakpoints_new <- breakpoints_new |> 
  distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)
@@ -398,9 +433,9 @@ breakpoints_new <- breakpoints_new |>
 # CHECKS AND SAVE TO PACKAGE ----

 # check again
-breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
+breakpoints_new |> filter(guideline == "EUCAST 2026", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
 # compare with current version
-clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
+clinical_breakpoints |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")

 # must have "human" and "ECOFF"
 breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
--- a/data-raw/clin_break.md5
+++ b/data-raw/clin_break.md5
@@ -1 +1 @@
-c7062e60fa4fbc2eee233044d15903ce
+c43a990cf91f959913d207e5a85e2bd5
--- a/data-raw/datasets/clinical_breakpoints.dta
+++ b/data-raw/datasets/clinical_breakpoints.dta
--- a/data-raw/datasets/clinical_breakpoints.feather
+++ b/data-raw/datasets/clinical_breakpoints.feather
--- a/data-raw/datasets/clinical_breakpoints.parquet
+++ b/data-raw/datasets/clinical_breakpoints.parquet
--- a/data-raw/datasets/clinical_breakpoints.rds
+++ b/data-raw/datasets/clinical_breakpoints.rds
--- a/data-raw/datasets/clinical_breakpoints.sav
+++ b/data-raw/datasets/clinical_breakpoints.sav
--- a/data-raw/datasets/clinical_breakpoints.txt
+++ b/data-raw/datasets/clinical_breakpoints.txt
--- a/data-raw/datasets/clinical_breakpoints.xlsx
+++ b/data-raw/datasets/clinical_breakpoints.xlsx
--- a/data-raw/datasets/microorganisms.codes.dta
+++ b/data-raw/datasets/microorganisms.codes.dta
--- a/data-raw/datasets/microorganisms.codes.feather
+++ b/data-raw/datasets/microorganisms.codes.feather
--- a/data-raw/datasets/microorganisms.codes.parquet
+++ b/data-raw/datasets/microorganisms.codes.parquet
--- a/data-raw/datasets/microorganisms.codes.rds
+++ b/data-raw/datasets/microorganisms.codes.rds
--- a/data-raw/datasets/microorganisms.codes.sav
+++ b/data-raw/datasets/microorganisms.codes.sav
--- a/data-raw/datasets/microorganisms.codes.txt
+++ b/data-raw/datasets/microorganisms.codes.txt
@@ -2832,6 +2832,7 @@
 "FU-"	"B_FSBCTR"
 "FUA.SP"	"F_FUSRM"
 "FUL"	"B_FSBCTR_ULCR"
+"FUO"	"F_FUSRM_OXYS"
 "FUR"	"F_FUSRM"
 "FUROXY"	"F_FUSRM_OXYS"
 "FURPET"	"F_FUSRM_PTRL"
@@ -2936,6 +2937,7 @@
 "GLO.SP"	"B_GLBCT"
 "GLOSAN"	"B_GLBCT_SNGN"
 "GLOSPP"	"B_GLBCT"
+"GLS"	"B_GLSSR"
 "GM+"	"B_GRAMP"
 "GM-"	"B_GRAMN"
 "GMO"	"B_GEMLL_MRBL"
@@ -3026,7 +3028,6 @@
 "HABSPP"	"B_HMTBC"
 "HAC"	"B_AGGRG_ACTN"
 "HACEK"	"B_HACEK"
-"HACEK"	"B_HACEK"
 "HAE"	"B_HMPHL"
 "HAE.SP"	"B_HMPHL"
 "HAEAEG"	"B_HMPHL_AEGY"
@@ -3122,7 +3123,7 @@
 "HPL"	"B_HMPHL_PRPH"
 "HPO"	"F_OGATA"
 "HPOSPP"	"F_HNDRS_ASTR"
-"HPR"	"B_HMPHL_PRSS"
+"HPR"	"B_GLSSR_PRSS"
 "HPU"	"B_HLCBCT_PLLR"
 "HPY"	"B_HLCBCT_PYLR"
 "HRB"	"B_HRBSP"
@@ -3471,6 +3472,7 @@
 "LQU"	"B_LGNLL_QTRN"
 "LRC"	"B_LPTSP_INTR"
 "LRE"	"B_LCTBC_RETR"
+"LRF"	"B_LCTCC_RFFN"
 "LRI"	"B_LMNRL_RCHR"
 "LRU"	"B_LGNLL_RBRL"
 "LSA"	"B_LCTBC_SLVR"
@@ -3760,6 +3762,7 @@
 "MNE"	"B_MYCBC_NERM"
 "MNL"	"B_MRXLL_NNLQ"
 "MNO"	"B_MYCBC_NNCH"
+"MNT"	"B_MYCBC"
 "MNV"	"B_MNNHM_VRGN"
 "MO-"	"B_MRXLL"
 "MO.BOV"	"B_MRXLL_BOVS"
@@ -4295,6 +4298,7 @@
 "PAT.SP"	"B_PANTO"
 "PAU"	"B_SLMNL_ENTR_ENTR"
 "PAV"	"B_AVBCT_AVIM"
+"PBA"	"B_PSDCL_ALBA"
 "PBC"	"B_PRVTL_BCCL"
 "PBE"	"B_PSTRL_BTTY"
 "PBI"	"B_PRBCT"
@@ -4591,6 +4595,7 @@
 "PSA"	"F_PSDLL"
 "PSA.SP"	"F_PSDLL"
 "PSASPP"	"F_PSDLL"
+"PSB"	"B_PSDCL"
 "PSC"	"F_PSDCH"
 "PSCSPP"	"B_PSDCL"
 "PSD"	"B_STPHY_PSDN"
@@ -4706,6 +4711,7 @@
 "RAH.SP"	"B_RHNLL"
 "RAHAQU"	"B_RHNLL_AQTL"
 "RAHSPP"	"B_RHNLL"
+"RAI"	"B_RLSTN_INSD"
 "RAK"	"B_RTTSA_AKAR"
 "RAL"	"B_RLSTN"
 "RAL.SP"	"B_RLSTN"
@@ -4800,6 +4806,7 @@
 "ROD"	"B_RDNTB"
 "RODPNE"	"B_RDNTB_PNMT"
 "RODSPP"	"B_RDNTB"
+"ROK"	"B_ROTHI_KRST"
 "ROL"	"F_RHZPS_MCRS"
 "ROM"	"B_RSMNS"
 "ROMMUC"	"B_RSMNS"
@@ -5042,8 +5049,10 @@
 "SAV"	"B_SLMNL_ARCH"
 "SB2"	"B_STRPT_BOVS"
 "SBA"	"B_SLMNL_BRLL"
+"SBC"	"B_SLBCL"
 "SBE"	"B_SHWNL_BNTH"
 "SBG"	"B_SLMNL_BNGR"
+"SBI"	"B_SLBCL_SLVS"
 "SBL"	"B_SLMNL_BLCK"
 "SBM"	"B_SLMNL_BVSM"
 "SBN"	"B_SLMNL_BBRG"
@@ -5078,6 +5087,7 @@
 "SCS"	"F_SCLCB_CNST"
 "SCT"	"B_STRPT_CNST"
 "SCU"	"B_STPHY_CRNS"
+"SCV"	"F_SCPLR_VCLS"
 "SCY"	"F_SCYTL"
 "SCYSPP"	"F_SCYTL"
 "SD1"	"B_SHGLL_DYSN"
@@ -5656,6 +5666,7 @@
 "TAYSPP"	"B_TYLRL"
 "TBE"	"F_GTRCH_RDLL"
 "TBESPP"	"F_TRCHS"
+"TBH"	"F_TRCHP_BNHM"
 "TBN"	"B_TRPRL_BRNR"
 "TCA"	"F_DBRYM_CHVL"
 "TCASPP"	"F_CANDD"
@@ -5841,6 +5852,8 @@
 "TYASPP"	"F_TRCHP"
 "TYE"	"P_TRYPN_JNSN"
 "TYI"	"F_TRCHP_INDT"
+"TYM"	"B_TRPHR"
+"TYW"	"B_TRPHR_WHPP"
 "ULO"	"F_ULCLD"
 "UNK"	"UNKNOWN"
 "UPEC"	"B_ESCHR_COLI"
@@ -5850,6 +5863,7 @@
 "UREPAR"	"B_URPLS_PRVM"
 "URESPP"	"B_URPLS"
 "UREURE"	"B_URPLS_URLY"
+"URP"	"B_URPLS_PRVM"
 "UUR"	"B_URPLS_URLY"
 "V.ALG"	"B_VIBRI_ALGN"
 "V.CHO"	"B_VIBRI_CHLR"
--- a/data-raw/datasets/microorganisms.codes.xlsx
+++ b/data-raw/datasets/microorganisms.codes.xlsx
--- a/data-raw/microorganisms.codes.md5
+++ b/data-raw/microorganisms.codes.md5
@@ -1 +1 @@
-986d5110a46bbf297ebaeb4dd5179fff
+6ef98bb1bcd27052fde453bb12c0b285
--- a/data-raw/organisms.rds
+++ b/data-raw/organisms.rds
--- a/data-raw/v_16.0__BreakpointTables.xlsx
+++ b/data-raw/v_16.0__BreakpointTables.xlsx