1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-21 12:13:20 +02:00

(v2.1.1.9247) CLSI/EUCAST 2025!

This commit is contained in:
2025-04-20 12:55:31 +02:00
parent 0bab49ff86
commit ea443f7483
65 changed files with 8297 additions and 1901 deletions

View File

@ -43,45 +43,45 @@ devtools::load_all()
# READ DATA ----
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
# remove old taxonomic names
filter(TAXONOMIC_STATUS == "C") %>%
filter(TAXONOMIC_STATUS == "C") |>
mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
show_col_types = FALSE, guess_max = Inf) %>%
show_col_types = FALSE, guess_max = Inf) |>
filter(GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
arrange(WHONET_ABX_CODE) %>%
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
arrange(WHONET_ABX_CODE) |>
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
# MICROORGANISMS WHONET CODES ----
whonet_organisms <- whonet_organisms %>%
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
whonet_organisms <- whonet_organisms |>
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
mutate(
# this one was called Issatchenkia orientalis, but it should be:
ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
) %>%
) |>
# try to match on GBIF identifier
left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>%
left_join(microorganisms |> distinct(mo, gbif, status) |> filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) |>
# remove duplicates
arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
distinct(ORGANISM_CODE, .keep_all = TRUE) %>%
arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) |>
distinct(ORGANISM_CODE, .keep_all = TRUE) |>
# add Enterobacterales, which is a subkingdom code in their data
bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>%
bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) |>
arrange(ORGANISM)
## Add new WHO codes to microorganisms.codes ----
matched <- whonet_organisms %>% filter(!is.na(mo))
unmatched <- whonet_organisms %>% filter(is.na(mo))
matched <- whonet_organisms |> filter(!is.na(mo))
unmatched <- whonet_organisms |> filter(is.na(mo))
# generate the mo codes and add their names
message("Getting MO codes for WHONET input...")
unmatched <- unmatched %>%
unmatched <- unmatched |>
mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
minimum_matching_score = 0.55,
keep_synonyms = TRUE,
@ -94,51 +94,54 @@ unmatched <- unmatched %>%
keep_synonyms = TRUE,
language = "en"))
# check if coercion at least resembles the first part (genus)
unmatched <- unmatched %>%
unmatched <- unmatched |>
mutate(
first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>%
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") |>
arrange(keep)
unmatched %>%
View()
unmatched <- unmatched %>%
unmatched |> View()
unmatched <- unmatched |>
filter(keep == TRUE)
organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>%
bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>%
mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>%
organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) |>
bind_rows(unmatched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) |>
mutate(name = mo_name(mo, keep_synonyms = TRUE)) |>
arrange(code)
# some subspecies exist, while their upper species do not, add them as the species level:
subspp <- organisms %>%
subspp <- organisms |>
filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
mo_species(mo, keep_synonyms = TRUE) != "" &
mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>%
mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") |>
mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
mo_species(mo, keep_synonyms = TRUE)),
keep_synonyms = TRUE),
name = mo_name(mo, keep_synonyms = TRUE))
organisms <- organisms %>%
filter(!code %in% subspp$code) %>%
bind_rows(subspp) %>%
organisms <- organisms |>
filter(!code %in% subspp$code) |>
bind_rows(subspp) |>
arrange(code)
# add the groups
organisms <- organisms %>%
bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
organisms <- organisms |>
bind_rows(tibble(code = organisms |> filter(!is.na(group)) |> pull(group) |> unique(),
group = NA,
mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
name = mo_name(mo, keep_synonyms = TRUE))) %>%
arrange(code, group) %>%
select(-group) %>%
mo = organisms |> filter(!is.na(group)) |> pull(group) |> unique() |> as.mo(keep_synonyms = TRUE),
name = mo_name(mo, keep_synonyms = TRUE))) |>
arrange(code, group) |>
select(-group) |>
distinct()
# no XXX
organisms <- organisms |> filter(code != "XXX")
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
# 2024-06-14 still the case
organisms <- organisms %>%
# 2025-04-20 still the case
organisms |> filter(code == "SGM")
organisms <- organisms |>
filter(!(code == "SGM" & name %like% "Streptococcus"))
# this must be empty:
organisms$code[organisms$code %>% duplicated()]
organisms$code[organisms$code |> duplicated()]
saveRDS(organisms, "data-raw/organisms.rds", version = 2)
@ -147,12 +150,12 @@ saveRDS(organisms, "data-raw/organisms.rds", version = 2)
#---
# update microorganisms.codes with the latest WHONET codes
microorganisms.codes2 <- microorganisms.codes %>%
microorganisms.codes2 <- microorganisms.codes |>
# remove all old WHONET codes, whether we (in the end) keep them or not
filter(!toupper(code) %in% toupper(organisms$code)) %>%
filter(!toupper(code) %in% toupper(organisms$code)) |>
# and add the new ones
bind_rows(organisms %>% select(code, mo)) %>%
arrange(code) %>%
bind_rows(organisms |> select(code, mo)) |>
arrange(code) |>
distinct(code, .keep_all = TRUE)
# new codes:
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
@ -163,25 +166,25 @@ microorganisms.codes <- microorganisms.codes2
# 2024-06-14: file not available anymore
# # start
# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
# asiarsnet <- asiarsnet %>%
# mutate(WHONET_Code = toupper(WHONET_Code)) %>%
# left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
# asiarsnet <- asiarsnet |>
# mutate(WHONET_Code = toupper(WHONET_Code)) |>
# left_join(whonet_organisms |> mutate(WHONET_Code = toupper(ORGANISM_CODE))) |>
# mutate(
# mo1 = as.mo(ORGANISM_CODE),
# mo2 = as.mo(ORGANISM)
# ) %>%
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
# ) |>
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) |>
# filter(!is.na(mo))
# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
# insert1 <- asiarsnet |> transmute(code = WHONET_Code, mo)
# insert2 <- asiarsnet |> transmute(code = as.character(ASIARS_Net_Code), mo)
# # these will be updated
# bind_rows(insert1, insert2) %>%
# rename(mo_new = mo) %>%
# left_join(microorganisms.codes) %>%
# bind_rows(insert1, insert2) |>
# rename(mo_new = mo) |>
# left_join(microorganisms.codes) |>
# filter(mo != mo_new)
# microorganisms.codes <- microorganisms.codes %>%
# filter(!code %in% c(insert1$code, insert2$code)) %>%
# bind_rows(insert1, insert2) %>%
# microorganisms.codes <- microorganisms.codes |>
# filter(!code %in% c(insert1$code, insert2$code)) |>
# bind_rows(insert1, insert2) |>
# arrange(code)
# # end
@ -196,52 +199,52 @@ devtools::load_all()
# now that we have the correct MO codes, get the breakpoints and convert them
whonet_breakpoints %>%
count(GUIDELINES, BREAKPOINT_TYPE) %>%
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>%
whonet_breakpoints |>
count(GUIDELINES, BREAKPOINT_TYPE) |>
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
janitor::adorn_totals(where = c("row", "col"))
# compared to current
AMR::clinical_breakpoints %>%
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) %>%
arrange(tolower(type)) %>%
pivot_wider(names_from = type, values_from = n) %>%
as.data.frame() %>%
AMR::clinical_breakpoints |>
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
arrange(tolower(type)) |>
pivot_wider(names_from = type, values_from = n) |>
as.data.frame() |>
janitor::adorn_totals(where = c("row", "col"))
breakpoints <- whonet_breakpoints %>%
mutate(code = toupper(ORGANISM_CODE)) %>%
left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
breakpoints <- whonet_breakpoints |>
mutate(code = toupper(ORGANISM_CODE)) |>
left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
# GEN (Generic) and ALL (All) are PK/PD codes
data.frame(code = c("ALL", "GEN"),
mo = rep(as.mo("UNKNOWN"), 2))))
# these ones lack an MO name, they cannot be used:
unknown <- breakpoints %>%
filter(is.na(mo)) %>%
pull(code) %>%
unknown <- breakpoints |>
filter(is.na(mo)) |>
pull(code) |>
unique()
breakpoints %>%
filter(code %in% unknown) %>%
breakpoints |>
filter(code %in% unknown) |>
count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
# 2025-03-11: these codes are currently: clu, kma, fso, tyi. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
breakpoints <- breakpoints %>%
# 2025-04-20: these codes are currently: cps, fso. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
breakpoints <- breakpoints |>
filter(!is.na(mo))
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints %>%
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
arrange(desc(YEAR))
breakpoints %>%
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
pull(WHONET_ABX_CODE) %>%
unique()
# they are at the moment all old codes ("CFC", "ROX", "FIX") that have the right replacements in `antimicrobials`, so we can use as.ab()
breakpoints |>
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |>
count(GUIDELINES, WHONET_ABX_CODE) |>
mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
ab_name = ab_name(ab))
# 2025-04-20: these codes are currently: CFC, ROX, FIX, and N/A. All have the right replacements in `antimicrobials`, so we can safely use as.ab() later on
# the NAs are for M. tuberculosis, they are empty breakpoints
breakpoints <- breakpoints |>
filter(!is.na(WHONET_ABX_CODE))
## Build new breakpoints table ----
breakpoints_new <- breakpoints %>%
filter(!is.na(WHONET_ABX_CODE)) %>%
breakpoints_new <- breakpoints |>
filter(!is.na(WHONET_ABX_CODE)) |>
transmute(
guideline = paste(GUIDELINES, YEAR),
type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
@ -266,20 +269,20 @@ breakpoints_new <- breakpoints %>%
breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI"),
is_SDD = !is.na(SDD)
) %>%
) |>
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
mutate(disk_dose = disk_dose %>%
gsub("μ", "mc", ., fixed = TRUE) %>% # this is 'mu', \u03bc
gsub("µ", "mc", ., fixed = TRUE) %>% # this is 'micro', \u00b5 (yes, they look the same)
gsub("", "-", ., fixed = TRUE) %>%
gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) %>% # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
arrange(desc(guideline), mo, ab, type, method) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) |> # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
arrange(desc(guideline), mo, ab, type, method) |>
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) |>
distinct(guideline, type, host, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
# fix reference table names
breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
breakpoints_new <- breakpoints_new %>%
breakpoints_new |> filter(guideline %like% "EUCAST", is.na(ref_tbl)) |> View()
breakpoints_new <- breakpoints_new |>
mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
is.na(ref_tbl) ~ "Unknown",
TRUE ~ ref_tbl))
@ -289,11 +292,11 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.d
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
# regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
breakpoints_new %>%
filter(host %like% "foal") %>%
breakpoints_new |>
filter(host %like% "foal") |>
count(guideline, host)
breakpoints_new <- breakpoints_new %>%
filter(host %unlike% "foal") %>%
breakpoints_new <- breakpoints_new |>
filter(host %unlike% "foal") |>
mutate(host = ifelse(host %like% "horse", "horse", host))
# FIXES FOR WHONET ERRORS ----
@ -301,25 +304,22 @@ m <- unique(as.double(as.mic(levels(as.mic(1)))))
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
# instead, raise these one higher valid MIC factor level:
breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1]
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]
# a lot of R breakpoints are missing, though none of the S breakpoints are missing:
# S breakpoints must never be missing:
anyNA(breakpoints_new$breakpoint_S)
breakpoints_new %>%
filter(is.na(breakpoint_R)) %>%
count(guideline, host) |>
pivot_wider(names_from = host,
values_from = n,
values_fill = list(n = 0)) |>
View()
# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
# breakpoints_new[which(breakpoints_new$method == "MIC" &
# is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
# a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
# 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
breakpoints_new |>
filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
count(guideline)
breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
@ -327,32 +327,30 @@ breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %li
# Haemophilus same error (must only be H. influenzae)
breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
breakpoints_new <- breakpoints_new %>%
breakpoints_new <- breakpoints_new |>
bind_rows(
breakpoints_new %>%
filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") %>%
breakpoints_new |>
filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") |>
mutate(mo = as.mo("B_HMPHL_PRNF"))
) %>%
arrange(desc(guideline), mo, ab, type, host, method)
) |>
arrange(desc(guideline), mo, ab, type, host, method) |>
distinct()
# Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
breakpoints_new <- breakpoints_new %>% filter(mo != as.mo("Achromobacter denitrificans"))
breakpoints_new <- breakpoints_new |> filter(mo != as.mo("Achromobacter denitrificans"))
# WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
breakpoints_new <- breakpoints_new %>% filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEN")
breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
# WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
# UPDATE 2024-02-22: fixed now
# 2024-02-22/ fixed now
# There's a problem with C. diff in EUCAST where breakpoint_R is missing - they are listed as normal human breakpoints but are ECOFF
rows <- which(breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_CRDDS_DFFC" & is.na(breakpoints_new$breakpoint_R) & !is.na(breakpoints_new$breakpoint_S))
breakpoints_new$type[rows] <- "ECOFF"
breakpoints_new$host[rows] <- "ECOFF"
breakpoints_new$ref_tbl[rows] <- "ECOFF"
breakpoints_new$breakpoint_R[rows] <- breakpoints_new$breakpoint_S[rows]
breakpoints_new <- distinct(breakpoints_new, .keep_all = TRUE)
# 2025-04-20/ fixed now
# determine rank again now that some changes were made on taxonomic level (genus -> species)
breakpoints_new <- breakpoints_new %>%
breakpoints_new <- breakpoints_new |>
mutate(rank_index = case_when(
mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
@ -367,52 +365,50 @@ breakpoints_new <- breakpoints_new %>%
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
# EUCAST 2023 guideline: S <= 8 and R > 8
# WHONET file: S <= 8 and R >= 16
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# but this will make an MIC of 12 I, which should be R according to EUCAST, so:
breakpoints_new <- breakpoints_new %>%
breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# but this logic only works for CLSI - for an MIC outside the log2 range (e.g., 12) you must round up (i.e., 16)
# but for EUCAST, an MIC of 12 would now be considered I, which should be R, so correct those values:
breakpoints_new <- breakpoints_new |>
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
pmax(breakpoint_S, breakpoint_R / 2),
breakpoint_R
))
# fix disks as well
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
breakpoints_new <- breakpoints_new %>%
breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
breakpoints_new <- breakpoints_new |>
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
breakpoint_R + 1,
breakpoint_R
))
# fill missing R breakpoint where there is an S breakpoint
# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
# breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
# check the strange duplicates
breakpoints_new %>%
mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>%
filter(id %in% .$id[which(duplicated(id))]) %>%
arrange(desc(guideline))
# 2024-06-19 mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
# remove duplicates
breakpoints_new <- breakpoints_new %>%
breakpoints_new |>
mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>%
filter(id %in% .$id[which(duplicated(id))]) |>
arrange(desc(guideline)) |>
View()
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
# 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
breakpoints_new <- breakpoints_new |>
distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)
# CHECKS AND SAVE TO PACKAGE ----
# check again
breakpoints_new %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# compare with current version
clinical_breakpoints %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# must have "human" and "ECOFF"
breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
# check dimensions
dim(breakpoints_new)
dim(clinical_breakpoints)
clinical_breakpoints <- breakpoints_new
clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII()
clinical_breakpoints <- clinical_breakpoints |> dataset_UTF8_to_ASCII()
usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
rm(clinical_breakpoints)
devtools::load_all(".")

View File

@ -31,12 +31,13 @@ library(dplyr)
library(readxl)
library(cleaner)
# URL:
# https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/Dosages_v_13.0_Breakpoint_Tables.pdf
# download the PDF file, open in Adobe Acrobat and export as Excel workbook
breakpoints_version <- 13
breakpoint_file <- "data-raw/v_15.0_Breakpoint_Tables.xlsx"
if (!file.exists(breakpoint_file)) {
stop("Breakpoint file not found")
}
breakpoints_version <- as.double(gsub("[^0-9.]", "", gsub(".xlsx", "", breakpoint_file, fixed = TRUE)))
dosage_source <- read_excel("data-raw/Dosages_v_12.0_Breakpoint_Tables.xlsx", skip = 4, na = "None") %>%
dosage_source <- read_excel(breakpoint_file, skip = 6, sheet = "Dosages", na = "None") %>%
format_names(snake_case = TRUE, penicillins = "drug") %>%
filter(!tolower(standard_dosage) %in% c("standard dosage", "standard dosage_source", "under review")) %>%
filter(!is.na(standard_dosage)) %>%
@ -173,6 +174,12 @@ dosage_new <- bind_rows(
# this makes it a tibble as well:
dataset_UTF8_to_ASCII()
dosage <- bind_rows(dosage_new, AMR::dosage)
dosage <- AMR::dosage |>
bind_rows(dosage_new) |>
arrange(desc(eucast_version), name) |>
distinct()
dosage <- dosage |> dataset_UTF8_to_ASCII()
usethis::use_data(dosage, internal = FALSE, overwrite = TRUE, version = 2, compress = "xz")
rm(dosage)
devtools::load_all(".")