(v2.1.1.9247) CLSI/EUCAST 2025!

2025-12-10 20:30:23 +01:00 · 2025-04-20 12:55:31 +02:00
parent 0bab49ff86
commit ea443f7483
65 changed files with 8297 additions and 1901 deletions
--- a/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R
+++ b/data-raw/_reproduction_scripts/reproduction_of_clinical_breakpoints.R
@@ -43,45 +43,45 @@ devtools::load_all()

 # READ DATA ----

-whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
+whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
  # remove old taxonomic names
-  filter(TAXONOMIC_STATUS == "C") %>%
+  filter(TAXONOMIC_STATUS == "C") |>
  mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))

 whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
-                               show_col_types = FALSE, guess_max = Inf) %>%
+                               show_col_types = FALSE, guess_max = Inf) |>
  filter(GUIDELINES %in% c("CLSI", "EUCAST"))

-whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
-  arrange(WHONET_ABX_CODE) %>%
+whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
+  arrange(WHONET_ABX_CODE) |>
  distinct(WHONET_ABX_CODE, .keep_all = TRUE)

 # MICROORGANISMS WHONET CODES ----

-whonet_organisms <- whonet_organisms %>%
-  select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
+whonet_organisms <- whonet_organisms |>
+  select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
  mutate(
    # this one was called Issatchenkia orientalis, but it should be:
    ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
-  ) %>% 
+  ) |>
  # try to match on GBIF identifier
-  left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>% 
+  left_join(microorganisms |> distinct(mo, gbif, status) |> filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) |> 
  # remove duplicates
-  arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
-  distinct(ORGANISM_CODE, .keep_all = TRUE) %>% 
+  arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) |>
+  distinct(ORGANISM_CODE, .keep_all = TRUE) |> 
  # add Enterobacterales, which is a subkingdom code in their data
-  bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>% 
+  bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) |> 
  arrange(ORGANISM)


 ## Add new WHO codes to microorganisms.codes ----

-matched <- whonet_organisms %>% filter(!is.na(mo))
-unmatched <- whonet_organisms %>% filter(is.na(mo))
+matched <- whonet_organisms |> filter(!is.na(mo))
+unmatched <- whonet_organisms |> filter(is.na(mo))

 # generate the mo codes and add their names
 message("Getting MO codes for WHONET input...")
-unmatched <- unmatched %>% 
+unmatched <- unmatched |> 
  mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
                    minimum_matching_score = 0.55,
                    keep_synonyms = TRUE,
@@ -94,51 +94,54 @@ unmatched <- unmatched %>%
                           keep_synonyms = TRUE,
                           language = "en"))
 # check if coercion at least resembles the first part (genus)
-unmatched <- unmatched %>% 
+unmatched <- unmatched |> 
  mutate(
    first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
-    keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>% 
+    keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") |> 
  arrange(keep)
-unmatched %>% 
-  View()
-unmatched <- unmatched %>% 
+unmatched |> View()
+unmatched <- unmatched |>
  filter(keep == TRUE)

-organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>% 
-  bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>% 
-  mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>% 
+organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) |> 
+  bind_rows(unmatched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) |> 
+  mutate(name = mo_name(mo, keep_synonyms = TRUE)) |> 
  arrange(code)

 # some subspecies exist, while their upper species do not, add them as the species level:
-subspp <- organisms %>%
+subspp <- organisms |>
  filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
           mo_species(mo, keep_synonyms = TRUE) != "" &
-           mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>% 
+           mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") |> 
  mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
                          mo_species(mo, keep_synonyms = TRUE)),
                    keep_synonyms = TRUE),
         name = mo_name(mo, keep_synonyms = TRUE))
-organisms <- organisms %>%
-  filter(!code %in% subspp$code) %>%
-  bind_rows(subspp) %>%
+organisms <- organisms |>
+  filter(!code %in% subspp$code) |>
+  bind_rows(subspp) |>
  arrange(code)

 # add the groups
-organisms <- organisms %>% 
-  bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
+organisms <- organisms |> 
+  bind_rows(tibble(code = organisms |> filter(!is.na(group)) |> pull(group) |> unique(),
                   group = NA,
-                   mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
-                   name = mo_name(mo, keep_synonyms = TRUE))) %>% 
-  arrange(code, group) %>% 
-  select(-group) %>% 
+                   mo = organisms |> filter(!is.na(group)) |> pull(group) |> unique() |> as.mo(keep_synonyms = TRUE),
+                   name = mo_name(mo, keep_synonyms = TRUE))) |> 
+  arrange(code, group) |> 
+  select(-group) |> 
  distinct()
+# no XXX
+organisms <- organisms |> filter(code != "XXX")

 # 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
 # 2024-06-14 still the case
-organisms <- organisms %>% 
+# 2025-04-20 still the case
+organisms |> filter(code == "SGM")
+organisms <- organisms |> 
  filter(!(code == "SGM" & name %like% "Streptococcus"))
 # this must be empty:
-organisms$code[organisms$code %>% duplicated()]
+organisms$code[organisms$code |> duplicated()]

 saveRDS(organisms, "data-raw/organisms.rds", version = 2)

@@ -147,12 +150,12 @@ saveRDS(organisms, "data-raw/organisms.rds", version = 2)
 #---

 # update microorganisms.codes with the latest WHONET codes
-microorganisms.codes2 <- microorganisms.codes %>% 
+microorganisms.codes2 <- microorganisms.codes |> 
  # remove all old WHONET codes, whether we (in the end) keep them or not
-  filter(!toupper(code) %in% toupper(organisms$code)) %>% 
+  filter(!toupper(code) %in% toupper(organisms$code)) |> 
  # and add the new ones
-  bind_rows(organisms %>% select(code, mo)) %>% 
-  arrange(code) %>% 
+  bind_rows(organisms |> select(code, mo)) |> 
+  arrange(code) |> 
  distinct(code, .keep_all = TRUE)
 # new codes:
 microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
@@ -163,25 +166,25 @@ microorganisms.codes <- microorganisms.codes2
 # 2024-06-14: file not available anymore
 # # start
 # asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
-# asiarsnet <- asiarsnet %>%
-#   mutate(WHONET_Code = toupper(WHONET_Code)) %>%
-#   left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
+# asiarsnet <- asiarsnet |>
+#   mutate(WHONET_Code = toupper(WHONET_Code)) |>
+#   left_join(whonet_organisms |> mutate(WHONET_Code = toupper(ORGANISM_CODE))) |>
 #   mutate(
 #     mo1 = as.mo(ORGANISM_CODE),
 #     mo2 = as.mo(ORGANISM)
-#   ) %>%
-#   mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
+#   ) |>
+#   mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) |>
 #   filter(!is.na(mo))
-# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
-# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
+# insert1 <- asiarsnet |> transmute(code = WHONET_Code, mo)
+# insert2 <- asiarsnet |> transmute(code = as.character(ASIARS_Net_Code), mo)
 # # these will be updated
-# bind_rows(insert1, insert2) %>%
-#   rename(mo_new = mo) %>%
-#   left_join(microorganisms.codes) %>%
+# bind_rows(insert1, insert2) |>
+#   rename(mo_new = mo) |>
+#   left_join(microorganisms.codes) |>
 #   filter(mo != mo_new)
-# microorganisms.codes <- microorganisms.codes %>%
-#   filter(!code %in% c(insert1$code, insert2$code)) %>%
-#   bind_rows(insert1, insert2) %>%
+# microorganisms.codes <- microorganisms.codes |>
+#   filter(!code %in% c(insert1$code, insert2$code)) |>
+#   bind_rows(insert1, insert2) |>
 #   arrange(code)
 # # end

@@ -196,52 +199,52 @@ devtools::load_all()

 # now that we have the correct MO codes, get the breakpoints and convert them

-whonet_breakpoints %>% 
-  count(GUIDELINES, BREAKPOINT_TYPE) %>% 
-  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>% 
+whonet_breakpoints |> 
+  count(GUIDELINES, BREAKPOINT_TYPE) |> 
+  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |> 
  janitor::adorn_totals(where = c("row", "col"))
 # compared to current
-AMR::clinical_breakpoints %>%
-  count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) %>%
-  arrange(tolower(type)) %>%
-  pivot_wider(names_from = type, values_from = n) %>% 
-  as.data.frame() %>%
+AMR::clinical_breakpoints |>
+  count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
+  arrange(tolower(type)) |>
+  pivot_wider(names_from = type, values_from = n) |> 
+  as.data.frame() |>
  janitor::adorn_totals(where = c("row", "col"))

-breakpoints <- whonet_breakpoints %>%
-  mutate(code = toupper(ORGANISM_CODE)) %>%
-  left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
+breakpoints <- whonet_breakpoints |>
+  mutate(code = toupper(ORGANISM_CODE)) |>
+  left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
                      # GEN (Generic) and ALL (All) are PK/PD codes
                      data.frame(code = c("ALL", "GEN"),
                                 mo = rep(as.mo("UNKNOWN"), 2))))
 # these ones lack an MO name, they cannot be used:
-unknown <- breakpoints %>%
-  filter(is.na(mo)) %>%
-  pull(code) %>%
+unknown <- breakpoints |>
+  filter(is.na(mo)) |>
+  pull(code) |>
  unique()
-breakpoints %>% 
-  filter(code %in% unknown) %>% 
+breakpoints |> 
+  filter(code %in% unknown) |> 
  count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
-# 2025-03-11: these codes are currently: clu, kma, fso, tyi. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
-breakpoints <- breakpoints %>% 
+# 2025-04-20: these codes are currently: cps, fso. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
+breakpoints <- breakpoints |> 
  filter(!is.na(mo))

 # and these ones have unknown antibiotics according to WHONET itself:
-breakpoints %>% 
-  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>% 
-  count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>% 
-  arrange(desc(YEAR))
-breakpoints %>% 
-  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
-  pull(WHONET_ABX_CODE) %>%
-  unique()
-# they are at the moment all old codes ("CFC", "ROX", "FIX") that have the right replacements in `antimicrobials`, so we can use as.ab()
+breakpoints |> 
+  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |> 
+  count(GUIDELINES, WHONET_ABX_CODE) |>
+  mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
+         ab_name = ab_name(ab))
+# 2025-04-20: these codes are currently: CFC, ROX, FIX, and N/A. All have the right replacements in `antimicrobials`, so we can safely use as.ab() later on
+# the NAs are for M. tuberculosis, they are empty breakpoints
+breakpoints <- breakpoints |>
+  filter(!is.na(WHONET_ABX_CODE))


 ## Build new breakpoints table ----

-breakpoints_new <- breakpoints %>%
-  filter(!is.na(WHONET_ABX_CODE)) %>% 
+breakpoints_new <- breakpoints |>
+  filter(!is.na(WHONET_ABX_CODE)) |> 
  transmute(
    guideline = paste(GUIDELINES, YEAR),
    type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
@@ -266,20 +269,20 @@ breakpoints_new <- breakpoints %>%
    breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
    uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI"),
    is_SDD = !is.na(SDD)
-  ) %>%
+  ) |>
  # Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
  mutate(disk_dose = disk_dose %>%
    gsub("μ", "mc", ., fixed = TRUE) %>% # this is 'mu', \u03bc
    gsub("µ", "mc", ., fixed = TRUE) %>% # this is 'micro', \u00b5 (yes, they look the same)
    gsub("–", "-", ., fixed = TRUE) %>%
-    gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) %>% # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
-  arrange(desc(guideline), mo, ab, type, method) %>%
-  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
+    gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) |> # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
+  arrange(desc(guideline), mo, ab, type, method) |>
+  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) |>
  distinct(guideline, type, host, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)

 # fix reference table names
-breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
-breakpoints_new <- breakpoints_new %>% 
+breakpoints_new |> filter(guideline %like% "EUCAST", is.na(ref_tbl)) |> View()
+breakpoints_new <- breakpoints_new |> 
  mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
                             is.na(ref_tbl) ~ "Unknown",
                             TRUE ~ ref_tbl))
@@ -289,11 +292,11 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.d
 breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))

 # regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
-breakpoints_new %>% 
-  filter(host %like% "foal") %>%
+breakpoints_new |> 
+  filter(host %like% "foal") |>
  count(guideline, host)
-breakpoints_new <- breakpoints_new %>% 
-  filter(host %unlike% "foal") %>% 
+breakpoints_new <- breakpoints_new |> 
+  filter(host %unlike% "foal") |> 
  mutate(host = ifelse(host %like% "horse", "horse", host))

 # FIXES FOR WHONET ERRORS ----
@@ -301,25 +304,22 @@ m <- unique(as.double(as.mic(levels(as.mic(1)))))

 # WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
 # instead, raise these one higher valid MIC factor level:
+breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
+breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
 breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
 breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1]
 breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1]
 breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]

-# a lot of R breakpoints are missing, though none of the S breakpoints are missing:
+# S breakpoints must never be missing:
 anyNA(breakpoints_new$breakpoint_S)

-breakpoints_new %>% 
-  filter(is.na(breakpoint_R)) %>% 
-  count(guideline, host) |>
-  pivot_wider(names_from = host,
-              values_from = n,
-              values_fill = list(n = 0)) |>
-  View()
-
-# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
-# breakpoints_new[which(breakpoints_new$method == "MIC" &
-#                         is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
+# a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
+# 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
+breakpoints_new |>
+  filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
+  count(guideline)
+breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]


 # fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
@@ -327,32 +327,30 @@ breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %li
 # Haemophilus same error (must only be H. influenzae)
 breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
 # EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
-breakpoints_new <- breakpoints_new %>% 
+breakpoints_new <- breakpoints_new |> 
  bind_rows(
-    breakpoints_new %>%
-      filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") %>% 
+    breakpoints_new |>
+      filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") |> 
      mutate(mo = as.mo("B_HMPHL_PRNF"))
-  ) %>% 
-  arrange(desc(guideline), mo, ab, type, host, method)
+  ) |> 
+  arrange(desc(guideline), mo, ab, type, host, method) |>
+  distinct()
 # Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
-breakpoints_new <- breakpoints_new %>% filter(mo != as.mo("Achromobacter denitrificans"))
+breakpoints_new <- breakpoints_new |> filter(mo != as.mo("Achromobacter denitrificans"))
 # WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
-breakpoints_new <- breakpoints_new %>% filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
+breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEN")
+breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
+breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
 # Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
 breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
 # WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
-# UPDATE 2024-02-22: fixed now
+# 2024-02-22/ fixed now

 # There's a problem with C. diff in EUCAST where breakpoint_R is missing - they are listed as normal human breakpoints but are ECOFF
-rows <- which(breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_CRDDS_DFFC" & is.na(breakpoints_new$breakpoint_R) & !is.na(breakpoints_new$breakpoint_S))
-breakpoints_new$type[rows] <- "ECOFF"
-breakpoints_new$host[rows] <- "ECOFF"
-breakpoints_new$ref_tbl[rows] <- "ECOFF"
-breakpoints_new$breakpoint_R[rows] <- breakpoints_new$breakpoint_S[rows]
-breakpoints_new <- distinct(breakpoints_new, .keep_all = TRUE)
+# 2025-04-20/ fixed now

 # determine rank again now that some changes were made on taxonomic level (genus -> species)
-breakpoints_new <- breakpoints_new %>% 
+breakpoints_new <- breakpoints_new |> 
  mutate(rank_index = case_when(
    mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
    mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
@@ -367,52 +365,50 @@ breakpoints_new <- breakpoints_new %>%
 # WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
 # EUCAST 2023 guideline: S <= 8 and R > 8
 #           WHONET file: S <= 8 and R >= 16
-breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
-# but this will make an MIC of 12 I, which should be R according to EUCAST, so:
-breakpoints_new <- breakpoints_new %>%
+breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
+# but this logic only works for CLSI - for an MIC outside the log2 range (e.g., 12) you must round up (i.e., 16)
+# but for EUCAST, an MIC of 12 would now be considered I, which should be R, so correct those values:
+breakpoints_new <- breakpoints_new |>
  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
    pmax(breakpoint_S, breakpoint_R / 2),
    breakpoint_R
  ))
 # fix disks as well
-breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
-breakpoints_new <- breakpoints_new %>%
+breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
+breakpoints_new <- breakpoints_new |>
  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
    breakpoint_R + 1,
    breakpoint_R
  ))
-# fill missing R breakpoint where there is an S breakpoint
-# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
-# breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
-

 # check the strange duplicates
-breakpoints_new %>% 
-  mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>% 
-  filter(id %in% .$id[which(duplicated(id))]) %>% 
-  arrange(desc(guideline))
-# 2024-06-19 mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
-# remove duplicates
-breakpoints_new <- breakpoints_new %>% 
+breakpoints_new |> 
+  mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>%
+  filter(id %in% .$id[which(duplicated(id))]) |> 
+  arrange(desc(guideline)) |>
+  View()
+# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
+# 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
+breakpoints_new <- breakpoints_new |> 
  distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)


 # CHECKS AND SAVE TO PACKAGE ----

 # check again
-breakpoints_new %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
+breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
 # compare with current version
-clinical_breakpoints %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
+clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")

 # must have "human" and "ECOFF"
-breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
+breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")

 # check dimensions
 dim(breakpoints_new)
 dim(clinical_breakpoints)

 clinical_breakpoints <- breakpoints_new
-clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII()
+clinical_breakpoints <- clinical_breakpoints |> dataset_UTF8_to_ASCII()
 usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
 rm(clinical_breakpoints)
 devtools::load_all(".")
--- a/data-raw/_reproduction_scripts/reproduction_of_dosage.R
+++ b/data-raw/_reproduction_scripts/reproduction_of_dosage.R
@@ -31,12 +31,13 @@ library(dplyr)
 library(readxl)
 library(cleaner)

-# URL:
-# https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/Dosages_v_13.0_Breakpoint_Tables.pdf
-# download the PDF file, open in Adobe Acrobat and export as Excel workbook
-breakpoints_version <- 13
+breakpoint_file <- "data-raw/v_15.0_Breakpoint_Tables.xlsx"
+if (!file.exists(breakpoint_file)) {
+  stop("Breakpoint file not found")
+}
+breakpoints_version <- as.double(gsub("[^0-9.]", "", gsub(".xlsx", "", breakpoint_file, fixed = TRUE)))

-dosage_source <- read_excel("data-raw/Dosages_v_12.0_Breakpoint_Tables.xlsx", skip = 4, na = "None") %>%
+dosage_source <- read_excel(breakpoint_file, skip = 6, sheet = "Dosages", na = "None") %>%
  format_names(snake_case = TRUE, penicillins = "drug") %>%
  filter(!tolower(standard_dosage) %in% c("standard dosage", "standard dosage_source", "under review")) %>%
  filter(!is.na(standard_dosage)) %>%
@@ -173,6 +174,12 @@ dosage_new <- bind_rows(
  # this makes it a tibble as well:
  dataset_UTF8_to_ASCII()

-dosage <- bind_rows(dosage_new, AMR::dosage)
+dosage <- AMR::dosage |>
+  bind_rows(dosage_new) |>
+  arrange(desc(eucast_version), name) |>
+  distinct()

+dosage <- dosage |> dataset_UTF8_to_ASCII()
 usethis::use_data(dosage, internal = FALSE, overwrite = TRUE, version = 2, compress = "xz")
+rm(dosage)
+devtools::load_all(".")