AMR/data-raw/reproduction_of_clinical_br...

# ==================================================================== #
# TITLE:                                                               #
# AMR: An R Package for Working with Antimicrobial Resistance Data     #
#                                                                      #
# SOURCE CODE:                                                         #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# PLEASE CITE THIS SOFTWARE AS:                                        #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C    #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance  #
# Data. Journal of Statistical Software, 104(3), 1-31.                 #
# https://doi.org/10.18637/jss.v104.i03                                #
#                                                                      #
# Developed at the University of Groningen and the University Medical  #
# Center Groningen in The Netherlands, in collaboration with many      #
# colleagues from around the world, see our website.                   #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
# ==================================================================== #

# This script runs in 20-30 minutes and renews all guidelines of CLSI and EUCAST!
# Run it with source("data-raw/reproduction_of_clinical_breakpoints.R")

library(dplyr)
library(readr)
library(tidyr)
devtools::load_all()

# Install the WHONET software on Windows (http://www.whonet.org/software.html),
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)

# BE SURE TO RUN data-raw/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!

# READ DATA ----

whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
  # remove old taxonomic names
  filter(TAXONOMIC_STATUS == "C") %>%
  mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))

whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
                               show_col_types = FALSE, guess_max = Inf) %>%
  filter(GUIDELINES %in% c("CLSI", "EUCAST"))

whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
  arrange(WHONET_ABX_CODE) %>%
  distinct(WHONET_ABX_CODE, .keep_all = TRUE)

# MICROORGANISMS WHONET CODES ----

whonet_organisms <- whonet_organisms %>%
  select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
  mutate(
    # this one was called Issatchenkia orientalis, but it should be:
    ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
  ) %>% 
  # try to match on GBIF identifier
  left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>% 
  # remove duplicates
  arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
  distinct(ORGANISM_CODE, .keep_all = TRUE) %>% 
  # add Enterobacterales, which is a subkingdom code in their data
  bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>% 
  arrange(ORGANISM)


## Add new WHO codes to microorganisms.codes ----

matched <- whonet_organisms %>% filter(!is.na(mo))
unmatched <- whonet_organisms %>% filter(is.na(mo))

# generate the mo codes and add their names
message("Getting MO codes for WHONET input...")
unmatched <- unmatched %>% 
  mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
                    minimum_matching_score = 0.55,
                    keep_synonyms = TRUE,
                    language = "en"),
         mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"),
                        ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"),
                        ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"),
                        TRUE ~ mo),
         mo_name = mo_name(mo,
                           keep_synonyms = TRUE,
                           language = "en"))
# check if coercion at least resembles the first part (genus)
unmatched <- unmatched %>% 
  mutate(
    first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
    keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>% 
  arrange(keep)
unmatched %>% 
  View()
unmatched <- unmatched %>% 
  filter(keep == TRUE)

organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>% 
  bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>% 
  mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>% 
  arrange(code)

# some subspecies exist, while their upper species do not, add them as the species level:
subspp <- organisms %>%
  filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
           mo_species(mo, keep_synonyms = TRUE) != "" &
           mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>% 
  mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
                          mo_species(mo, keep_synonyms = TRUE)),
                    keep_synonyms = TRUE),
         name = mo_name(mo, keep_synonyms = TRUE))
organisms <- organisms %>%
  filter(!code %in% subspp$code) %>%
  bind_rows(subspp) %>%
  arrange(code)

# add the groups
organisms <- organisms %>% 
  bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
                   group = NA,
                   mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
                   name = mo_name(mo, keep_synonyms = TRUE))) %>% 
  arrange(code, group) %>% 
  select(-group) %>% 
  distinct()

# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
organisms <- organisms %>% 
  filter(!(code == "SGM" & name %like% "Streptococcus"))
# this must be empty:
organisms$code[organisms$code %>% duplicated()]

saveRDS(organisms, "data-raw/organisms.rds", version = 2)

#---
# AT THIS POINT, `organisms` is clean and all entries have an mo code
#---

# update microorganisms.codes with the latest WHONET codes
microorganisms.codes2 <- microorganisms.codes %>% 
  # remove all old WHONET codes, whether we (in the end) keep them or not
  filter(!toupper(code) %in% toupper(organisms$code)) %>% 
  # and add the new ones
  bind_rows(organisms %>% select(code, mo)) %>% 
  arrange(code) %>% 
  distinct(code, .keep_all = TRUE)
# new codes:
microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
microorganisms.codes <- microorganisms.codes2

# Run this part to update ASIARS-Net:
# # start
# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
# asiarsnet <- asiarsnet %>%
#   mutate(WHONET_Code = toupper(WHONET_Code)) %>%
#   left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
#   mutate(
#     mo1 = as.mo(ORGANISM_CODE),
#     mo2 = as.mo(ORGANISM)
#   ) %>%
#   mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
#   filter(!is.na(mo))
# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
# # these will be updated
# bind_rows(insert1, insert2) %>%
#   rename(mo_new = mo) %>%
#   left_join(microorganisms.codes) %>%
#   filter(mo != mo_new)
# microorganisms.codes <- microorganisms.codes %>%
#   filter(!code %in% c(insert1$code, insert2$code)) %>%
#   bind_rows(insert1, insert2) %>%
#   arrange(code)
# # end

## Save to package ----
class(microorganisms.codes$mo) <- c("mo", "character")
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
rm(microorganisms.codes)
devtools::load_all()


# BREAKPOINTS ----

# now that we have the right MO codes, get the breakpoints and convert them

whonet_breakpoints %>% 
  count(GUIDELINES, BREAKPOINT_TYPE) %>% 
  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>% 
  janitor::adorn_totals(where = c("row", "col"))

breakpoints <- whonet_breakpoints %>%
  mutate(code = toupper(ORGANISM_CODE)) %>%
  left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
                      # GEN (Generic) and ALL (All) are PK/PD codes
                      data.frame(code = c("ALL", "GEN"),
                                 mo = rep(as.mo("UNKNOWN"), 2))))
# these ones lack an MO name, they cannot be used:
unknown <- breakpoints %>%
  filter(is.na(mo)) %>%
  pull(code) %>%
  unique()
breakpoints %>% 
  filter(code %in% unknown) %>% 
  count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
# these codes are currently (2023-07-08): clu, kma. No clue, so remove them:
breakpoints <- breakpoints %>% 
  filter(!is.na(mo))

# and these ones have unknown antibiotics according to WHONET itself:
breakpoints %>% 
  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>% 
  count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>% 
  arrange(desc(YEAR))
breakpoints %>% 
  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
  pull(WHONET_ABX_CODE) %>%
  unique()
# they are at the moment all old codes that have the right replacements in `antibiotics`, so we can use as.ab()


## Build new breakpoints table ----

breakpoints_new <- breakpoints %>%
  filter(!is.na(WHONET_ABX_CODE)) %>% 
  transmute(
    guideline = paste(GUIDELINES, YEAR),
    type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
    method = TEST_METHOD,
    site = SITE_OF_INFECTION,
    mo,
    rank_index = case_when(
      is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
      mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
      mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
      mo_rank(mo, keep_synonyms = TRUE) == "species group" ~ 2.5,
      mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
      mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
      mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
      TRUE ~ 6
    ),
    ab = as.ab(WHONET_ABX_CODE),
    ref_tbl = ifelse(type == "ECOFF" & is.na(REFERENCE_TABLE), "ECOFF", REFERENCE_TABLE),
    disk_dose = POTENCY,
    breakpoint_S = ifelse(type == "ECOFF" & is.na(S) & !is.na(ECV_ECOFF), ECV_ECOFF, S),
    breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
    uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI")
  ) %>%
  # Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
  mutate(disk_dose = disk_dose %>%
    gsub("μ", "u", ., fixed = TRUE) %>% # this is 'mu', \u03bc
    gsub("µ", "u", ., fixed = TRUE) %>% # this is 'micro', u00b5 (yes, they look the same)
    gsub("–", "-", ., fixed = TRUE)) %>%
  arrange(desc(guideline), mo, ab, type, method) %>%
  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
  distinct(guideline, type, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)

# check the strange duplicates
breakpoints_new %>% 
  mutate(id = paste(guideline, type, ab, mo, method, site)) %>% 
  filter(id %in% .$id[which(duplicated(id))])
# remove duplicates
breakpoints_new <- breakpoints_new %>% 
  distinct(guideline, type, ab, mo, method, site, .keep_all = TRUE)

# fix reference table names
breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
breakpoints_new <- breakpoints_new %>% 
  mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
                             is.na(ref_tbl) ~ "Unknown",
                             TRUE ~ ref_tbl))

# clean disk zones
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE]))
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))

# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
# instead, clean based on MIC factor levels
m <- unique(as.double(as.mic(levels(as.mic(1)))))
breakpoints_new[which(breakpoints_new$method == "MIC" &
  is.na(breakpoints_new$breakpoint_S)), "breakpoint_S"] <- min(m)
breakpoints_new[which(breakpoints_new$method == "MIC" &
  is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
# raise these one higher valid MIC factor level:
breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- 128
breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- 256
breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- 512
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- 1024

# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
clinical_breakpoints$mo[clinical_breakpoints$mo == "B_STRPT" & clinical_breakpoints$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
# Haemophilus same error (must only be H. influenzae)
clinical_breakpoints$mo[clinical_breakpoints$mo == "B_HMPHL" & clinical_breakpoints$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
clinical_breakpoints <- clinical_breakpoints %>% 
  bind_rows(
    clinical_breakpoints %>%
      filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") %>% 
      mutate(mo = as.mo("B_HMPHL_PRNF"))
  ) %>% 
  arrange(desc(guideline), mo, ab, type, method)
# Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
clinical_breakpoints <- clinical_breakpoints %>% filter(mo != as.mo("Achromobacter denitrificans"))
# WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
clinical_breakpoints <- clinical_breakpoints %>% filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
clinical_breakpoints$mo[clinical_breakpoints$mo == "B_STPHY" & clinical_breakpoints$ab == "NIT" & clinical_breakpoints$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
# determine rank again
clinical_breakpoints <- clinical_breakpoints %>% 
  mutate(rank_index = case_when(
    is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
    mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
    mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
    mo_rank(mo, keep_synonyms = TRUE) == "species group" ~ 2.5,
    mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
    mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
    mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
    TRUE ~ 6
  ))


# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
# EUCAST 2022 guideline: S <= 8 and R > 8
#           WHONET file: S <= 8 and R >= 16
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# this will make an MIC of 12 I, which should be R, so:
breakpoints_new <- breakpoints_new %>%
  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
    pmax(breakpoint_S, breakpoint_R / 2),
    breakpoint_R
  ))
# fix disks as well
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
breakpoints_new <- breakpoints_new %>%
  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
    breakpoint_R + 1,
    breakpoint_R
  ))
# fix missing R breakpoint where there is an S breakpoint
breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]

# check again
breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# compare with current version
clinical_breakpoints %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")

# must have "human" and "ECOFF"
breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")

# check dimensions
dim(breakpoints_new)
dim(clinical_breakpoints)

# SAVE TO PACKAGE ----

clinical_breakpoints <- breakpoints_new
clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII()
usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
rm(clinical_breakpoints)
devtools::load_all(".")
-												(v1.7.1.9069) CLSI 2021 support

											
										
										
											2021-12-12 11:36:58 +01:00
+								# ==================================================================== #
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# TITLE:                                                               #
-												New mo algorithm, prepare for 2.0


											
										
										
											2022-10-05 09:12:22 +02:00
+								# AMR: An R Package for Working with Antimicrobial Resistance Data     #
-												(v1.7.1.9069) CLSI 2021 support

											
										
										
											2021-12-12 11:36:58 +01:00
+								#                                                                      #
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# SOURCE CODE:                                                         #
-												(v1.7.1.9069) CLSI 2021 support

											
										
										
											2021-12-12 11:36:58 +01:00
+								# https://github.com/msberends/AMR                                     #
 								#                                                                      #
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# PLEASE CITE THIS SOFTWARE AS:                                        #
-												New mo algorithm, prepare for 2.0


											
										
										
											2022-10-05 09:12:22 +02:00
+								# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C    #
 								# (2022). AMR: An R Package for Working with Antimicrobial Resistance  #
 								# Data. Journal of Statistical Software, 104(3), 1-31.                 #
-												documentation update

											
										
										
											2023-05-27 10:39:22 +02:00
+								# https://doi.org/10.18637/jss.v104.i03                                #
-												New mo algorithm, prepare for 2.0


											
										
										
											2022-10-05 09:12:22 +02:00
+								#                                                                      #
-												support new mo codes

											
										
										
											2022-12-27 15:16:15 +01:00
+								# Developed at the University of Groningen and the University Medical  #
 								# Center Groningen in The Netherlands, in collaboration with many      #
 								# colleagues from around the world, see our website.                   #
-												(v1.7.1.9069) CLSI 2021 support

											
										
										
											2021-12-12 11:36:58 +01:00
+								#                                                                      #
 								# This R package is free software; you can freely use and distribute   #
 								# it for both personal and commercial purposes under the terms of the  #
 								# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
 								# the Free Software Foundation.                                        #
 								# We created this package for both routine data analysis and academic  #
 								# research and it was publicly released in the hope that it will be    #
 								# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
 								#                                                                      #
 								# Visit our website for the full manual and a complete tutorial about  #
 								# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
 								# ==================================================================== #
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# This script runs in 20-30 minutes and renews all guidelines of CLSI and EUCAST!
-												Replace RSI with SIR


											
										
										
											2023-01-21 23:47:20 +01:00
+								# Run it with source("data-raw/reproduction_of_clinical_breakpoints.R")
-												(v1.7.1.9070) Better WHONET support

											
										
										
											2021-12-13 10:18:28 +01:00
-												new antibiotics

											
										
										
											2019-05-10 16:44:59 +02:00
+								library(dplyr)
-												(v1.2.0.9038) CLSI 2019 fix

											
										
										
											2020-07-29 10:33:47 +02:00
+								library(readr)
 								library(tidyr)
-												Fix PK/PD breakpoints

											
										
										
											2023-04-19 00:31:31 +02:00
+								devtools::load_all()
-												(v1.7.1.9070) Better WHONET support

											
										
										
											2021-12-13 10:18:28 +01:00
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								# Install the WHONET software on Windows (http://www.whonet.org/software.html),
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
-												fix missing R breakpoints

											
										
										
											2022-10-29 14:15:23 +02:00
+								# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								# BE SURE TO RUN data-raw/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# READ DATA ----
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								  # remove old taxonomic names
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  filter(TAXONOMIC_STATUS == "C") %>%
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  mutate(ORGANISM_CODE = toupper(WHONET_ORG_CODE))
 								whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"),
 								                               show_col_types = FALSE, guess_max = Inf) %>%
 								  filter(GUIDELINES %in% c("CLSI", "EUCAST"))
 								whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
 								  arrange(WHONET_ABX_CODE) %>%
 								  distinct(WHONET_ABX_CODE, .keep_all = TRUE)
 								# MICROORGANISMS WHONET CODES ----
 								whonet_organisms <- whonet_organisms %>%
 								  select(ORGANISM_CODE, ORGANISM, SPECIES_GROUP, GBIF_TAXON_ID) %>%
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  mutate(
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								    # this one was called Issatchenkia orientalis, but it should be:
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								    ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  ) %>%
 								  # try to match on GBIF identifier
 								  left_join(microorganisms %>% distinct(mo, gbif, status) %>% filter(!is.na(gbif)), by = c("GBIF_TAXON_ID" = "gbif")) %>%
 								  # remove duplicates
 								  arrange(ORGANISM_CODE, GBIF_TAXON_ID, status) %>%
 								  distinct(ORGANISM_CODE, .keep_all = TRUE) %>%
 								  # add Enterobacterales, which is a subkingdom code in their data
 								  bind_rows(data.frame(ORGANISM_CODE = "ebc", ORGANISM = "Enterobacterales", mo = as.mo("Enterobacterales"))) %>%
 								  arrange(ORGANISM)
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
 								## Add new WHO codes to microorganisms.codes ----
 								matched <- whonet_organisms %>% filter(!is.na(mo))
 								unmatched <- whonet_organisms %>% filter(is.na(mo))
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# generate the mo codes and add their names
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								message("Getting MO codes for WHONET input...")
 								unmatched <- unmatched %>%
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  mutate(mo = as.mo(gsub("(sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
 								                    minimum_matching_score = 0.55,
-												Fix some WHONET codes

											
										
										
											2023-04-14 11:12:26 +02:00
+								                    keep_synonyms = TRUE,
 								                    language = "en"),
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								         mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"),
 								                        ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"),
 								                        ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"),
 								                        TRUE ~ mo),
-												Fix some WHONET codes

											
										
										
											2023-04-14 11:12:26 +02:00
+								         mo_name = mo_name(mo,
 								                           keep_synonyms = TRUE,
 								                           language = "en"))
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# check if coercion at least resembles the first part (genus)
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								unmatched <- unmatched %>%
-												Fix some WHONET codes

											
										
										
											2023-04-14 11:12:26 +02:00
+								  mutate(
 								    first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								    keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic") %>%
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  arrange(keep)
 								unmatched %>%
 								  View()
 								unmatched <- unmatched %>%
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  filter(keep == TRUE)
 								organisms <- matched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo) %>%
 								  bind_rows(unmatched %>% transmute(code = toupper(ORGANISM_CODE), group = SPECIES_GROUP, mo)) %>%
 								  mutate(name = mo_name(mo, keep_synonyms = TRUE)) %>%
 								  arrange(code)
 								# some subspecies exist, while their upper species do not, add them as the species level:
 								subspp <- organisms %>%
 								  filter(mo_species(mo, keep_synonyms = TRUE) == mo_subspecies(mo, keep_synonyms = TRUE) &
 								           mo_species(mo, keep_synonyms = TRUE) != "" &
 								           mo_genus(mo, keep_synonyms = TRUE) != "Salmonella") %>%
 								  mutate(mo = as.mo(paste(mo_genus(mo, keep_synonyms = TRUE),
 								                          mo_species(mo, keep_synonyms = TRUE)),
 								                    keep_synonyms = TRUE),
 								         name = mo_name(mo, keep_synonyms = TRUE))
 								organisms <- organisms %>%
 								  filter(!code %in% subspp$code) %>%
 								  bind_rows(subspp) %>%
 								  arrange(code)
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								# add the groups
 								organisms <- organisms %>%
 								  bind_rows(tibble(code = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique(),
 								                   group = NA,
 								                   mo = organisms %>% filter(!is.na(group)) %>% pull(group) %>% unique() %>% as.mo(keep_synonyms = TRUE),
 								                   name = mo_name(mo, keep_synonyms = TRUE))) %>%
 								  arrange(code, group) %>%
 								  select(-group) %>%
 								  distinct()
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
 								organisms <- organisms %>%
 								  filter(!(code == "SGM" & name %like% "Streptococcus"))
 								# this must be empty:
 								organisms$code[organisms$code %>% duplicated()]
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
 								saveRDS(organisms, "data-raw/organisms.rds", version = 2)
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
 								#---
 								# AT THIS POINT, `organisms` is clean and all entries have an mo code
 								#---
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# update microorganisms.codes with the latest WHONET codes
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								microorganisms.codes2 <- microorganisms.codes %>%
-												Fix some WHONET codes

											
										
										
											2023-04-14 11:12:26 +02:00
+								  # remove all old WHONET codes, whether we (in the end) keep them or not
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  filter(!toupper(code) %in% toupper(organisms$code)) %>%
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								  # and add the new ones
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  bind_rows(organisms %>% select(code, mo)) %>%
 								  arrange(code) %>%
 								  distinct(code, .keep_all = TRUE)
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								# new codes:
 								microorganisms.codes2$code[which(!microorganisms.codes2$code %in% microorganisms.codes$code)]
 								mo_name(microorganisms.codes2$mo[which(!microorganisms.codes2$code %in% microorganisms.codes$code)], keep_synonyms = TRUE)
 								microorganisms.codes <- microorganisms.codes2
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# Run this part to update ASIARS-Net:
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# # start
 								# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
 								# asiarsnet <- asiarsnet %>%
 								#   mutate(WHONET_Code = toupper(WHONET_Code)) %>%
 								#   left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
 								#   mutate(
 								#     mo1 = as.mo(ORGANISM_CODE),
 								#     mo2 = as.mo(ORGANISM)
 								#   ) %>%
 								#   mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
 								#   filter(!is.na(mo))
 								# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
 								# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
 								# # these will be updated
 								# bind_rows(insert1, insert2) %>%
 								#   rename(mo_new = mo) %>%
 								#   left_join(microorganisms.codes) %>%
 								#   filter(mo != mo_new)
 								# microorganisms.codes <- microorganisms.codes %>%
 								#   filter(!code %in% c(insert1$code, insert2$code)) %>%
 								#   bind_rows(insert1, insert2) %>%
 								#   arrange(code)
 								# # end
 								## Save to package ----
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								class(microorganisms.codes$mo) <- c("mo", "character")
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
 								rm(microorganisms.codes)
 								devtools::load_all()
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# BREAKPOINTS ----
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# now that we have the right MO codes, get the breakpoints and convert them
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
 								whonet_breakpoints %>%
 								  count(GUIDELINES, BREAKPOINT_TYPE) %>%
 								  pivot_wider(names_from = BREAKPOINT_TYPE, values_from = n) %>%
 								  janitor::adorn_totals(where = c("row", "col"))
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
 								breakpoints <- whonet_breakpoints %>%
 								  mutate(code = toupper(ORGANISM_CODE)) %>%
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								  left_join(bind_rows(microorganisms.codes %>% filter(!code %in% c("ALL", "GEN")),
-												Fix PK/PD breakpoints

											
										
										
											2023-04-19 00:31:31 +02:00
+								                      # GEN (Generic) and ALL (All) are PK/PD codes
 								                      data.frame(code = c("ALL", "GEN"),
 								                                 mo = rep(as.mo("UNKNOWN"), 2))))
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								# these ones lack an MO name, they cannot be used:
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								unknown <- breakpoints %>%
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  filter(is.na(mo)) %>%
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								  pull(code) %>%
 								  unique()
-												Fix PK/PD breakpoints

											
										
										
											2023-04-19 00:31:31 +02:00
+								breakpoints %>%
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  filter(code %in% unknown) %>%
 								  count(GUIDELINES, YEAR, ORGANISM_CODE, BREAKPOINT_TYPE, sort = TRUE)
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								# these codes are currently (2023-07-08): clu, kma. No clue, so remove them:
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								breakpoints <- breakpoints %>%
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  filter(!is.na(mo))
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
 								# and these ones have unknown antibiotics according to WHONET itself:
 								breakpoints %>%
 								  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
 								  count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
 								  arrange(desc(YEAR))
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								breakpoints %>%
 								  filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
 								  pull(WHONET_ABX_CODE) %>%
 								  unique()
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								# they are at the moment all old codes that have the right replacements in `antibiotics`, so we can use as.ab()
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
 								## Build new breakpoints table ----
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								breakpoints_new <- breakpoints %>%
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  filter(!is.na(WHONET_ABX_CODE)) %>%
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  transmute(
 								    guideline = paste(GUIDELINES, YEAR),
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								    type = ifelse(BREAKPOINT_TYPE == "ECOFF", "ECOFF", tolower(BREAKPOINT_TYPE)),
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    method = TEST_METHOD,
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								    site = SITE_OF_INFECTION,
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								    mo,
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    rank_index = case_when(
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								      is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
 								      mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
 								      mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								      mo_rank(mo, keep_synonyms = TRUE) == "species group" ~ 2.5,
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								      mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
 								      mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
 								      mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								      TRUE ~ 6
 								    ),
 								    ab = as.ab(WHONET_ABX_CODE),
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								    ref_tbl = ifelse(type == "ECOFF" & is.na(REFERENCE_TABLE), "ECOFF", REFERENCE_TABLE),
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    disk_dose = POTENCY,
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								    breakpoint_S = ifelse(type == "ECOFF" & is.na(S) & !is.na(ECV_ECOFF), ECV_ECOFF, S),
 								    breakpoint_R = ifelse(type == "ECOFF" & is.na(R) & !is.na(ECV_ECOFF), ECV_ECOFF, R),
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								    uti = ifelse(is.na(site), FALSE, gsub(".*(UTI|urinary|urine).*", "UTI", site) == "UTI")
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  ) %>%
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								  # Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  mutate(disk_dose = disk_dose %>%
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								    gsub("μ", "u", ., fixed = TRUE) %>% # this is 'mu', \u03bc
 								    gsub("µ", "u", ., fixed = TRUE) %>% # this is 'micro', u00b5 (yes, they look the same)
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    gsub("–", "-", ., fixed = TRUE)) %>%
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								  arrange(desc(guideline), mo, ab, type, method) %>%
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								  distinct(guideline, type, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# check the strange duplicates
 								breakpoints_new %>%
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								  mutate(id = paste(guideline, type, ab, mo, method, site)) %>%
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								  filter(id %in% .$id[which(duplicated(id))])
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								# remove duplicates
 								breakpoints_new <- breakpoints_new %>%
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								  distinct(guideline, type, ab, mo, method, site, .keep_all = TRUE)
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								# fix reference table names
-												new species groups, updated clinical breakpoints

											
										
										
											2023-07-08 17:30:05 +02:00
+								breakpoints_new %>% filter(guideline %like% "EUCAST", is.na(ref_tbl)) %>% View()
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								breakpoints_new <- breakpoints_new %>%
 								  mutate(ref_tbl = case_when(is.na(ref_tbl) & guideline %like% "EUCAST 202" ~ lead(ref_tbl),
 								                             is.na(ref_tbl) ~ "Unknown",
 								                             TRUE ~ ref_tbl))
 								# clean disk zones
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE]))
 								breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
-												(v1.7.1.9071) rsi disk fix

											
										
										
											2021-12-13 11:57:34 +01:00
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								# instead, clean based on MIC factor levels
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								m <- unique(as.double(as.mic(levels(as.mic(1)))))
 								breakpoints_new[which(breakpoints_new$method == "MIC" &
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  is.na(breakpoints_new$breakpoint_S)), "breakpoint_S"] <- min(m)
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								breakpoints_new[which(breakpoints_new$method == "MIC" &
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								  is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- max(m)
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# raise these one higher valid MIC factor level:
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								breakpoints_new[which(breakpoints_new$breakpoint_R == 129), "breakpoint_R"] <- 128
 								breakpoints_new[which(breakpoints_new$breakpoint_R == 257), "breakpoint_R"] <- 256
-												unit test

											
										
										
											2023-07-10 19:04:12 +02:00
+								breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- 512
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- 1024
-												(v1.7.1.9071) rsi disk fix

											
										
										
											2021-12-13 11:57:34 +01:00
-												MO group fixes

											
										
										
											2023-07-12 16:04:48 +02:00
+								# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
 								clinical_breakpoints$mo[clinical_breakpoints$mo == "B_STRPT" & clinical_breakpoints$ref_tbl %like% "^strep.* a.* b.*c.*g"] <- as.mo("B_STRPT_ABCG")
 								# Haemophilus same error (must only be H. influenzae)
 								clinical_breakpoints$mo[clinical_breakpoints$mo == "B_HMPHL" & clinical_breakpoints$ref_tbl %like% "^h.* influenzae"] <- as.mo("B_HMPHL_INFL")
 								# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
 								clinical_breakpoints <- clinical_breakpoints %>%
 								  bind_rows(
 								    clinical_breakpoints %>%
 								      filter(guideline %like% "EUCAST", mo == "B_HMPHL_INFL") %>%
 								      mutate(mo = as.mo("B_HMPHL_PRNF"))
 								  ) %>%
 								  arrange(desc(guideline), mo, ab, type, method)
 								# Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
 								clinical_breakpoints <- clinical_breakpoints %>% filter(mo != as.mo("Achromobacter denitrificans"))
 								# WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
 								clinical_breakpoints <- clinical_breakpoints %>% filter(!(mo == as.mo("Streptococcus viridans") & ab == "GEN"))
-												breakpoints fixes

											
										
										
											2023-07-12 16:20:04 +02:00
+								# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
 								clinical_breakpoints$mo[clinical_breakpoints$mo == "B_STPHY" & clinical_breakpoints$ab == "NIT" & clinical_breakpoints$guideline %like% "EUCAST"] <- as.mo("B_STPHY_SPRP")
 								# determine rank again
 								clinical_breakpoints <- clinical_breakpoints %>%
 								  mutate(rank_index = case_when(
 								    is.na(mo_rank(mo, keep_synonyms = TRUE)) ~ 6, # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
 								    mo_rank(mo, keep_synonyms = TRUE) %like% "(infra|sub)" ~ 1,
 								    mo_rank(mo, keep_synonyms = TRUE) == "species" ~ 2,
 								    mo_rank(mo, keep_synonyms = TRUE) == "species group" ~ 2.5,
 								    mo_rank(mo, keep_synonyms = TRUE) == "genus" ~ 3,
 								    mo_rank(mo, keep_synonyms = TRUE) == "family" ~ 4,
 								    mo_rank(mo, keep_synonyms = TRUE) == "order" ~ 5,
 								    TRUE ~ 6
 								  ))
-												add strep groups to ABCG

											
										
										
											2023-07-12 12:41:25 +02:00
-												(v1.8.1.9005) as.rsi() fix for EUCAST

											
										
										
											2022-05-10 17:01:37 +02:00
+								# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
-												Fix clinical breakpoints

											
										
										
											2023-04-14 23:14:34 +02:00
+								# EUCAST 2022 guideline: S <= 8 and R > 8
-												(v1.8.1.9005) as.rsi() fix for EUCAST

											
										
										
											2022-05-10 17:01:37 +02:00
+								#           WHONET file: S <= 8 and R >= 16
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
-												(v1.8.1.9005) as.rsi() fix for EUCAST

											
										
										
											2022-05-10 17:01:37 +02:00
+								# this will make an MIC of 12 I, which should be R, so:
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								breakpoints_new <- breakpoints_new %>%
 								  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "MIC" & log2(breakpoint_R) - log2(breakpoint_S) != 0,
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    pmax(breakpoint_S, breakpoint_R / 2),
 								    breakpoint_R
 								  ))
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# fix disks as well
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "DISK")
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								breakpoints_new <- breakpoints_new %>%
 								  mutate(breakpoint_R = ifelse(guideline %like% "EUCAST" & method == "DISK" & breakpoint_S - breakpoint_R != 0,
-												completely updated `antibiotics`

											
										
										
											2022-10-30 14:31:45 +01:00
+								    breakpoint_R + 1,
 								    breakpoint_R
 								  ))
-												fix missing R breakpoints

											
										
										
											2022-10-29 14:15:23 +02:00
+								# fix missing R breakpoint where there is an S breakpoint
 								breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_R"] <- breakpoints_new[which(is.na(breakpoints_new$breakpoint_R)), "breakpoint_S"]
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# check again
-												Update clinical breakpoints and fix some `as.mo()` bugs (#117)

* Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115
* docs
* implement ecoffs
* unit tests
											
										
										
											2023-06-22 15:10:59 +02:00
+								breakpoints_new %>% filter(guideline == "EUCAST 2023", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
+								# compare with current version
-												Replace RSI with SIR


											
										
										
											2023-01-21 23:47:20 +01:00
+								clinical_breakpoints %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												breakpoints UTI interpretation fix

											
										
										
											2023-07-10 13:41:52 +02:00
+								# must have "human" and "ECOFF"
 								breakpoints_new %>% filter(mo == "B_STRPT_PNMN", ab == "AMP", guideline == "EUCAST 2020", method == "MIC")
-												Fix PK/PD breakpoints

											
										
										
											2023-04-19 00:31:31 +02:00
+								# check dimensions
 								dim(breakpoints_new)
 								dim(clinical_breakpoints)
-												fix `reference_df` endless loop

											
										
										
											2023-06-26 13:52:02 +02:00
+								# SAVE TO PACKAGE ----
-												new rsi_translation

											
										
										
											2022-10-22 22:00:15 +02:00
-												Replace RSI with SIR


											
										
										
											2023-01-21 23:47:20 +01:00
+								clinical_breakpoints <- breakpoints_new
-												breakpoints fixes

											
										
										
											2023-07-12 16:20:04 +02:00
+								clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII()
-												Replace RSI with SIR


											
										
										
											2023-01-21 23:47:20 +01:00
+								usethis::use_data(clinical_breakpoints, overwrite = TRUE, compress = "xz", version = 2)
 								rm(clinical_breakpoints)
-												(v0.7.1.9076) mo codes

											
										
										
											2019-09-20 12:33:05 +02:00
+								devtools::load_all(".")