mirror of
https://github.com/msberends/AMR.git
synced 2025-07-21 12:13:20 +02:00
(v2.1.1.9247) CLSI/EUCAST 2025!
This commit is contained in:
@ -43,45 +43,45 @@ devtools::load_all()
|
||||
|
||||
# READ DATA ----
|
||||
|
||||
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
|
||||
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
|
||||
# remove old taxonomic names
|
||||
filter(TAXONOMIC_STATUS == "C") %>%
|
||||
filter(TAXONOMIC_STATUS == "C") |>
|
||||
mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
|
||||
|
||||
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
|
||||
show_col_types = FALSE, guess_max = Inf) %>%
|
||||
show_col_types = FALSE, guess_max = Inf) |>
|
||||
filter(GUIDELINES %in% c("CLSI", "EUCAST"))
|
||||
|
||||
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
|
||||
arrange(WHONET_ABX_CODE) %>%
|
||||
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) |>
|
||||
arrange(WHONET_ABX_CODE) |>
|
||||
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
|
||||
|
||||
# MICROORGANISMS WHONET CODES ----
|
||||
|
||||
whonet_organisms <- whonet_organisms %>%
|
||||
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
|
||||
whonet_organisms <- whonet_organisms |>
|
||||
select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) |>
|
||||
mutate(
|
||||
# this one was called Issatchenkia orientalis, but it should be:
|
||||
ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
|
||||
) %>%
|
||||
) |>
|
||||
# try to match on GBIF identifier
|
||||
left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>%
|
||||
left_join(microorganisms |> distinct(mo, gbif, status) |> filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) |>
|
||||
# remove duplicates
|
||||
arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
|
||||
distinct(ORGANISM_CODE, .keep_all = TRUE) %>%
|
||||
arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) |>
|
||||
distinct(ORGANISM_CODE, .keep_all = TRUE) |>
|
||||
# add Enterobacterales, which is a subkingdom code in their data
|
||||
bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>%
|
||||
bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) |>
|
||||
arrange(ORGANISM)
|
||||
|
||||
|
||||
## Add new WHO codes to microorganisms.codes ----
|
||||
|
||||
matched <- whonet_organisms %>% filter(!is.na(mo))
|
||||
unmatched <- whonet_organisms %>% filter(is.na(mo))
|
||||
matched <- whonet_organisms |> filter(!is.na(mo))
|
||||
unmatched <- whonet_organisms |> filter(is.na(mo))
|
||||
|
||||
# generate the mo codes and add their names
|
||||
message("Getting MO codes for WHONET input...")
|
||||
unmatched <- unmatched %>%
|
||||
unmatched <- unmatched |>
|
||||
mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
|
||||
minimum_matching_score = 0.55,
|
||||
keep_synonyms = TRUE,
|
||||
@ -94,51 +94,54 @@ unmatched <- unmatched %>%
|
||||
keep_synonyms = TRUE,
|
||||
language = "en"))
|
||||
# check if coercion at least resembles the first part (genus)
|
||||
unmatched <- unmatched %>%
|
||||
unmatched <- unmatched |>
|
||||
mutate(
|
||||
first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
|
||||
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>%
|
||||
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") |>
|
||||
arrange(keep)
|
||||
unmatched %>%
|
||||
View()
|
||||
unmatched <- unmatched %>%
|
||||
unmatched |> View()
|
||||
unmatched <- unmatched |>
|
||||
filter(keep == TRUE)
|
||||
|
||||
organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>%
|
||||
bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>%
|
||||
mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>%
|
||||
organisms <- matched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) |>
|
||||
bind_rows(unmatched |> transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) |>
|
||||
mutate(name = mo_name(mo, keep_synonyms = TRUE)) |>
|
||||
arrange(code)
|
||||
|
||||
# some subspecies exist, while their upper species do not, add them as the species level:
|
||||
subspp <- organisms %>%
|
||||
subspp <- organisms |>
|
||||
filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
|
||||
mo_species(mo, keep_synonyms = TRUE) != "" &
|
||||
mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>%
|
||||
mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") |>
|
||||
mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
|
||||
mo_species(mo, keep_synonyms = TRUE)),
|
||||
keep_synonyms = TRUE),
|
||||
name = mo_name(mo, keep_synonyms = TRUE))
|
||||
organisms <- organisms %>%
|
||||
filter(!code %in% subspp$code) %>%
|
||||
bind_rows(subspp) %>%
|
||||
organisms <- organisms |>
|
||||
filter(!code %in% subspp$code) |>
|
||||
bind_rows(subspp) |>
|
||||
arrange(code)
|
||||
|
||||
# add the groups
|
||||
organisms <- organisms %>%
|
||||
bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
|
||||
organisms <- organisms |>
|
||||
bind_rows(tibble(code = organisms |> filter(!is.na(group)) |> pull(group) |> unique(),
|
||||
group = NA,
|
||||
mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
|
||||
name = mo_name(mo, keep_synonyms = TRUE))) %>%
|
||||
arrange(code, group) %>%
|
||||
select(-group) %>%
|
||||
mo = organisms |> filter(!is.na(group)) |> pull(group) |> unique() |> as.mo(keep_synonyms = TRUE),
|
||||
name = mo_name(mo, keep_synonyms = TRUE))) |>
|
||||
arrange(code, group) |>
|
||||
select(-group) |>
|
||||
distinct()
|
||||
# no XXX
|
||||
organisms <- organisms |> filter(code != "XXX")
|
||||
|
||||
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
|
||||
# 2024-06-14 still the case
|
||||
organisms <- organisms %>%
|
||||
# 2025-04-20 still the case
|
||||
organisms |> filter(code == "SGM")
|
||||
organisms <- organisms |>
|
||||
filter(!(code == "SGM" & name %like% "Streptococcus"))
|
||||
# this must be empty:
|
||||
organisms$code[organisms$code %>% duplicated()]
|
||||
organisms$code[organisms$code |> duplicated()]
|
||||
|
||||
saveRDS(organisms, "data-raw/organisms.rds", version = 2)
|
||||
|
||||
@ -147,12 +150,12 @@ saveRDS(organisms, "data-raw/organisms.rds", version = 2)
|
||||
#---
|
||||
|
||||
# update microorganisms.codes with the latest WHONET codes
|
||||
microorganisms.codes2 <- microorganisms.codes %>%
|
||||
microorganisms.codes2 <- microorganisms.codes |>
|
||||
# remove all old WHONET codes, whether we (in the end) keep them or not
|
||||
filter(!toupper(code) %in% toupper(organisms$code)) %>%
|
||||
filter(!toupper(code) %in% toupper(organisms$code)) |>
|
||||
# and add the new ones
|
||||
bind_rows(organisms %>% select(code, mo)) %>%
|
||||
arrange(code) %>%
|
||||
bind_rows(organisms |> select(code, mo)) |>
|
||||
arrange(code) |>
|
||||
distinct(code, .keep_all = TRUE)
|
||||
# new codes:
|
||||
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
|
||||
@ -163,25 +166,25 @@ microorganisms.codes <- microorganisms.codes2
|
||||
# 2024-06-14: file not available anymore
|
||||
# # start
|
||||
# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
|
||||
# asiarsnet <- asiarsnet %>%
|
||||
# mutate(WHONET_Code = toupper(WHONET_Code)) %>%
|
||||
# left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
|
||||
# asiarsnet <- asiarsnet |>
|
||||
# mutate(WHONET_Code = toupper(WHONET_Code)) |>
|
||||
# left_join(whonet_organisms |> mutate(WHONET_Code = toupper(ORGANISM_CODE))) |>
|
||||
# mutate(
|
||||
# mo1 = as.mo(ORGANISM_CODE),
|
||||
# mo2 = as.mo(ORGANISM)
|
||||
# ) %>%
|
||||
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
|
||||
# ) |>
|
||||
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) |>
|
||||
# filter(!is.na(mo))
|
||||
# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
|
||||
# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
|
||||
# insert1 <- asiarsnet |> transmute(code = WHONET_Code, mo)
|
||||
# insert2 <- asiarsnet |> transmute(code = as.character(ASIARS_Net_Code), mo)
|
||||
# # these will be updated
|
||||
# bind_rows(insert1, insert2) %>%
|
||||
# rename(mo_new = mo) %>%
|
||||
# left_join(microorganisms.codes) %>%
|
||||
# bind_rows(insert1, insert2) |>
|
||||
# rename(mo_new = mo) |>
|
||||
# left_join(microorganisms.codes) |>
|
||||
# filter(mo != mo_new)
|
||||
# microorganisms.codes <- microorganisms.codes %>%
|
||||
# filter(!code %in% c(insert1$code, insert2$code)) %>%
|
||||
# bind_rows(insert1, insert2) %>%
|
||||
# microorganisms.codes <- microorganisms.codes |>
|
||||
# filter(!code %in% c(insert1$code, insert2$code)) |>
|
||||
# bind_rows(insert1, insert2) |>
|
||||
# arrange(code)
|
||||
# # end
|
||||
|
||||
@ -196,52 +199,52 @@ devtools::load_all()
|
||||
|
||||
# now that we have the correct MO codes, get the breakpoints and convert them
|
||||
|
||||
whonet_breakpoints %>%
|
||||
count(GUIDELINES, BREAKPOINT_TYPE) %>%
|
||||
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>%
|
||||
whonet_breakpoints |>
|
||||
count(GUIDELINES, BREAKPOINT_TYPE) |>
|
||||
pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) |>
|
||||
janitor::adorn_totals(where = c("row", "col"))
|
||||
# compared to current
|
||||
AMR::clinical_breakpoints %>%
|
||||
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) %>%
|
||||
arrange(tolower(type)) %>%
|
||||
pivot_wider(names_from = type, values_from = n) %>%
|
||||
as.data.frame() %>%
|
||||
AMR::clinical_breakpoints |>
|
||||
count(GUIDELINES = gsub("[^a-zA-Z]", "", guideline), type) |>
|
||||
arrange(tolower(type)) |>
|
||||
pivot_wider(names_from = type, values_from = n) |>
|
||||
as.data.frame() |>
|
||||
janitor::adorn_totals(where = c("row", "col"))
|
||||
|
||||
breakpoints <- whonet_breakpoints %>%
|
||||
mutate(code = toupper(ORGANISM_CODE)) %>%
|
||||
left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
|
||||
breakpoints <- whonet_breakpoints |>
|
||||
mutate(code = toupper(ORGANISM_CODE)) |>
|
||||
left_join(bind_rows(microorganisms.codes |> filter(!code %in% c("ALL", "GEN")),
|
||||
# GEN (Generic) and ALL (All) are PK/PD codes
|
||||
data.frame(code = c("ALL", "GEN"),
|
||||
mo = rep(as.mo("UNKNOWN"), 2))))
|
||||
# these ones lack an MO name, they cannot be used:
|
||||
unknown <- breakpoints %>%
|
||||
filter(is.na(mo)) %>%
|
||||
pull(code) %>%
|
||||
unknown <- breakpoints |>
|
||||
filter(is.na(mo)) |>
|
||||
pull(code) |>
|
||||
unique()
|
||||
breakpoints %>%
|
||||
filter(code %in% unknown) %>%
|
||||
breakpoints |>
|
||||
filter(code %in% unknown) |>
|
||||
count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
|
||||
# 2025-03-11: these codes are currently: clu, kma, fso, tyi. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
|
||||
breakpoints <- breakpoints %>%
|
||||
# 2025-04-20: these codes are currently: cps, fso. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
|
||||
breakpoints <- breakpoints |>
|
||||
filter(!is.na(mo))
|
||||
|
||||
# and these ones have unknown antibiotics according to WHONET itself:
|
||||
breakpoints %>%
|
||||
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
|
||||
count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
|
||||
arrange(desc(YEAR))
|
||||
breakpoints %>%
|
||||
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
|
||||
pull(WHONET_ABX_CODE) %>%
|
||||
unique()
|
||||
# they are at the moment all old codes ("CFC", "ROX", "FIX") that have the right replacements in `antimicrobials`, so we can use as.ab()
|
||||
breakpoints |>
|
||||
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) |>
|
||||
count(GUIDELINES, WHONET_ABX_CODE) |>
|
||||
mutate(ab = as.ab(WHONET_ABX_CODE, fast_mode = TRUE),
|
||||
ab_name = ab_name(ab))
|
||||
# 2025-04-20: these codes are currently: CFC, ROX, FIX, and N/A. All have the right replacements in `antimicrobials`, so we can safely use as.ab() later on
|
||||
# the NAs are for M. tuberculosis, they are empty breakpoints
|
||||
breakpoints <- breakpoints |>
|
||||
filter(!is.na(WHONET_ABX_CODE))
|
||||
|
||||
|
||||
## Build new breakpoints table ----
|
||||
|
||||
breakpoints_new <- breakpoints %>%
|
||||
filter(!is.na(WHONET_ABX_CODE)) %>%
|
||||
breakpoints_new <- breakpoints |>
|
||||
filter(!is.na(WHONET_ABX_CODE)) |>
|
||||
transmute(
|
||||
guideline = paste(GUIDELINES, YEAR),
|
||||
type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
|
||||
@ -266,20 +269,20 @@ breakpoints_new <- breakpoints %>%
|
||||
breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
|
||||
uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI"),
|
||||
is_SDD = !is.na(SDD)
|
||||
) %>%
|
||||
) |>
|
||||
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
|
||||
mutate(disk_dose = disk_dose %>%
|
||||
gsub("μ", "mc", ., fixed = TRUE) %>% # this is 'mu', \u03bc
|
||||
gsub("µ", "mc", ., fixed = TRUE) %>% # this is 'micro', \u00b5 (yes, they look the same)
|
||||
gsub("–", "-", ., fixed = TRUE) %>%
|
||||
gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) %>% # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
|
||||
arrange(desc(guideline), mo, ab, type, method) %>%
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
|
||||
gsub("(?<=\\d)(?=[a-zA-Z])", " ", ., perl = TRUE)) |> # make sure we keep a space after a number, e.g. "1mcg" to "1 mcg"
|
||||
arrange(desc(guideline), mo, ab, type, method) |>
|
||||
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) |>
|
||||
distinct(guideline, type, host, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
|
||||
|
||||
# fix reference table names
|
||||
breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new |> filter(guideline %like% "EUCAST", is.na(ref_tbl)) |> View()
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
|
||||
is.na(ref_tbl) ~ "Unknown",
|
||||
TRUE ~ ref_tbl))
|
||||
@ -289,11 +292,11 @@ breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.d
|
||||
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
|
||||
|
||||
# regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - only keep adult horses
|
||||
breakpoints_new %>%
|
||||
filter(host %like% "foal") %>%
|
||||
breakpoints_new |>
|
||||
filter(host %like% "foal") |>
|
||||
count(guideline, host)
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
filter(host %unlike% "foal") %>%
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
filter(host %unlike% "foal") |>
|
||||
mutate(host = ifelse(host %like% "horse", "horse", host))
|
||||
|
||||
# FIXES FOR WHONET ERRORS ----
|
||||
@ -301,25 +304,22 @@ m <- unique(as.double(as.mic(levels(as.mic(1)))))
|
||||
|
||||
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
|
||||
# instead, raise these one higher valid MIC factor level:
|
||||
breakpoints_new |> filter(method == "MIC" & (!breakpoint_S %in% c(m, NA))) |> distinct(breakpoint_S)
|
||||
breakpoints_new |> filter(method == "MIC" & (!breakpoint_R %in% c(m, NA))) |> distinct(breakpoint_R)
|
||||
breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
|
||||
breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- m[which(m == 256) + 1]
|
||||
breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m[which(m == 512) + 1]
|
||||
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]
|
||||
|
||||
# a lot of R breakpoints are missing, though none of the S breakpoints are missing:
|
||||
# S breakpoints must never be missing:
|
||||
anyNA(breakpoints_new$breakpoint_S)
|
||||
|
||||
breakpoints_new %>%
|
||||
filter(is.na(breakpoint_R)) %>%
|
||||
count(guideline, host) |>
|
||||
pivot_wider(names_from = host,
|
||||
values_from = n,
|
||||
values_fill = list(n = 0)) |>
|
||||
View()
|
||||
|
||||
# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
|
||||
# breakpoints_new[which(breakpoints_new$method == "MIC" &
|
||||
# is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
|
||||
# a lot of R breakpoints are missing, but for CLSI this is required and can be set using as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
|
||||
# 2025-04-20/ For EUCAST, this should not be the case, only happens to old guideline now it seems
|
||||
breakpoints_new |>
|
||||
filter(method == "MIC" & guideline %like% "EUCAST" & is.na(breakpoint_R)) |>
|
||||
count(guideline)
|
||||
breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(breakpoints_new$method == "MIC" & breakpoints_new$guideline %like% "EUCAST" & is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
|
||||
|
||||
|
||||
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
|
||||
@ -327,32 +327,30 @@ breakpoints_new$mo[breakpoints_new$mo == "B_STRPT" & breakpoints_new$ref_tbl %li
|
||||
# Haemophilus same error (must only be H. influenzae)
|
||||
breakpoints_new$mo[breakpoints_new$mo == "B_HMPHL" & breakpoints_new$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
|
||||
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
bind_rows(
|
||||
breakpoints_new %>%
|
||||
filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") %>%
|
||||
breakpoints_new |>
|
||||
filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") |>
|
||||
mutate(mo = as.mo("B_HMPHL_PRNF"))
|
||||
) %>%
|
||||
arrange(desc(guideline), mo, ab, type, host, method)
|
||||
) |>
|
||||
arrange(desc(guideline), mo, ab, type, host, method) |>
|
||||
distinct()
|
||||
# Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
|
||||
breakpoints_new <- breakpoints_new %>% filter(mo != as.mo("Achromobacter denitrificans"))
|
||||
breakpoints_new <- breakpoints_new |> filter(mo != as.mo("Achromobacter denitrificans"))
|
||||
# WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
|
||||
breakpoints_new <- breakpoints_new %>% filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
|
||||
breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEN")
|
||||
breakpoints_new |> filter(mo == as.mo("Streptococcus viridans") & ab == "GEH")
|
||||
breakpoints_new <- breakpoints_new |> filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
|
||||
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
|
||||
breakpoints_new$mo[breakpoints_new$mo == "B_STPHY" & breakpoints_new$ab == "NIT" & breakpoints_new$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
|
||||
# WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
|
||||
# UPDATE 2024-02-22: fixed now
|
||||
# 2024-02-22/ fixed now
|
||||
|
||||
# There's a problem with C. diff in EUCAST where breakpoint_R is missing - they are listed as normal human breakpoints but are ECOFF
|
||||
rows <- which(breakpoints_new$guideline %like% "EUCAST" & breakpoints_new$mo == "B_CRDDS_DFFC" & is.na(breakpoints_new$breakpoint_R) & !is.na(breakpoints_new$breakpoint_S))
|
||||
breakpoints_new$type[rows] <- "ECOFF"
|
||||
breakpoints_new$host[rows] <- "ECOFF"
|
||||
breakpoints_new$ref_tbl[rows] <- "ECOFF"
|
||||
breakpoints_new$breakpoint_R[rows] <- breakpoints_new$breakpoint_S[rows]
|
||||
breakpoints_new <- distinct(breakpoints_new, .keep_all = TRUE)
|
||||
# 2025-04-20/ fixed now
|
||||
|
||||
# determine rank again now that some changes were made on taxonomic level (genus -> species)
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
mutate(rank_index = case_when(
|
||||
mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
|
||||
mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
|
||||
@ -367,52 +365,50 @@ breakpoints_new <- breakpoints_new %>%
|
||||
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
|
||||
# EUCAST 2023 guideline: S <= 8 and R > 8
|
||||
# WHONET file: S <= 8 and R >= 16
|
||||
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
# but this will make an MIC of 12 I, which should be R according to EUCAST, so:
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
# but this logic only works for CLSI - for an MIC outside the log2 range (e.g., 12) you must round up (i.e., 16)
|
||||
# but for EUCAST, an MIC of 12 would now be considered I, which should be R, so correct those values:
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
|
||||
pmax(breakpoint_S, breakpoint_R / 2),
|
||||
breakpoint_R
|
||||
))
|
||||
# fix disks as well
|
||||
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new |> filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
|
||||
breakpoint_R + 1,
|
||||
breakpoint_R
|
||||
))
|
||||
# fill missing R breakpoint where there is an S breakpoint
|
||||
# 2025-03-12 don't do this anymore - we now use as.sir(..., substitute_missing_r_breakpoint = TRUE/FALSE, ...)
|
||||
# breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
|
||||
|
||||
|
||||
# check the strange duplicates
|
||||
breakpoints_new %>%
|
||||
mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>%
|
||||
filter(id %in% .$id[which(duplicated(id))]) %>%
|
||||
arrange(desc(guideline))
|
||||
# 2024-06-19 mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
|
||||
# remove duplicates
|
||||
breakpoints_new <- breakpoints_new %>%
|
||||
breakpoints_new |>
|
||||
mutate(id = paste(guideline, type, host, method, site, mo, ab, uti)) %>%
|
||||
filter(id %in% .$id[which(duplicated(id))]) |>
|
||||
arrange(desc(guideline)) |>
|
||||
View()
|
||||
# 2024-06-19/ mostly ECOFFs, but there's no explanation in the whonet_breakpoints file, we have to remove duplicates
|
||||
# 2025-04-20/ same, most important one seems M. tuberculosis in CLSI (also in 2025)
|
||||
breakpoints_new <- breakpoints_new |>
|
||||
distinct(guideline, type, host, method, site, mo, ab, uti, .keep_all = TRUE)
|
||||
|
||||
|
||||
# CHECKS AND SAVE TO PACKAGE ----
|
||||
|
||||
# check again
|
||||
breakpoints_new %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
breakpoints_new |> filter(guideline == "EUCAST 2025", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
# compare with current version
|
||||
clinical_breakpoints %>% filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
clinical_breakpoints |> filter(guideline == "EUCAST 2024", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
|
||||
|
||||
# must have "human" and "ECOFF"
|
||||
breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
|
||||
breakpoints_new |> filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
|
||||
|
||||
# check dimensions
|
||||
dim(breakpoints_new)
|
||||
dim(clinical_breakpoints)
|
||||
|
||||
clinical_breakpoints <- breakpoints_new
|
||||
clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII()
|
||||
clinical_breakpoints <- clinical_breakpoints |> dataset_UTF8_to_ASCII()
|
||||
usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
|
||||
rm(clinical_breakpoints)
|
||||
devtools::load_all(".")
|
||||
|
@ -31,12 +31,13 @@ library(dplyr)
|
||||
library(readxl)
|
||||
library(cleaner)
|
||||
|
||||
# URL:
|
||||
# https://www.eucast.org/fileadmin/src/media/PDFs/EUCAST_files/Breakpoint_tables/Dosages_v_13.0_Breakpoint_Tables.pdf
|
||||
# download the PDF file, open in Adobe Acrobat and export as Excel workbook
|
||||
breakpoints_version <- 13
|
||||
breakpoint_file <- "data-raw/v_15.0_Breakpoint_Tables.xlsx"
|
||||
if (!file.exists(breakpoint_file)) {
|
||||
stop("Breakpoint file not found")
|
||||
}
|
||||
breakpoints_version <- as.double(gsub("[^0-9.]", "", gsub(".xlsx", "", breakpoint_file, fixed = TRUE)))
|
||||
|
||||
dosage_source <- read_excel("data-raw/Dosages_v_12.0_Breakpoint_Tables.xlsx", skip = 4, na = "None") %>%
|
||||
dosage_source <- read_excel(breakpoint_file, skip = 6, sheet = "Dosages", na = "None") %>%
|
||||
format_names(snake_case = TRUE, penicillins = "drug") %>%
|
||||
filter(!tolower(standard_dosage) %in% c("standard dosage", "standard dosage_source", "under review")) %>%
|
||||
filter(!is.na(standard_dosage)) %>%
|
||||
@ -173,6 +174,12 @@ dosage_new <- bind_rows(
|
||||
# this makes it a tibble as well:
|
||||
dataset_UTF8_to_ASCII()
|
||||
|
||||
dosage <- bind_rows(dosage_new, AMR::dosage)
|
||||
dosage <- AMR::dosage |>
|
||||
bind_rows(dosage_new) |>
|
||||
arrange(desc(eucast_version), name) |>
|
||||
distinct()
|
||||
|
||||
dosage <- dosage |> dataset_UTF8_to_ASCII()
|
||||
usethis::use_data(dosage, internal = FALSE, overwrite = TRUE, version = 2, compress = "xz")
|
||||
rm(dosage)
|
||||
devtools::load_all(".")
|
||||
|
Reference in New Issue
Block a user