2021-12-12 11:36:58 +01:00
# ==================================================================== #
2023-06-26 13:52:02 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2021-12-12 11:36:58 +01:00
# #
2023-06-26 13:52:02 +02:00
# SOURCE CODE: #
2021-12-12 11:36:58 +01:00
# https://github.com/msberends/AMR #
# #
2023-06-26 13:52:02 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2022-10-05 09:12:22 +02:00
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2021-12-12 11:36:58 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
2023-06-26 13:52:02 +02:00
# This script runs in 20-30 minutes and renews all guidelines of CLSI and EUCAST!
2023-01-21 23:47:20 +01:00
# Run it with source("data-raw/reproduction_of_clinical_breakpoints.R")
2021-12-13 10:18:28 +01:00
2019-05-10 16:44:59 +02:00
library ( dplyr )
2020-07-29 10:33:47 +02:00
library ( readr )
library ( tidyr )
2023-04-19 00:31:31 +02:00
devtools :: load_all ( )
2021-12-13 10:18:28 +01:00
2023-07-08 17:30:05 +02:00
# Install the WHONET software on Windows (http://www.whonet.org/software.html),
2022-10-22 22:00:15 +02:00
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
2022-10-29 14:15:23 +02:00
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
2022-10-22 22:00:15 +02:00
2023-07-08 17:30:05 +02:00
# BE SURE TO RUN data-raw/reproduction_of_microorganisms.groups.R FIRST TO GET THE GROUPS!
2023-06-26 13:52:02 +02:00
# READ DATA ----
2023-04-14 23:14:34 +02:00
2022-10-30 14:31:45 +01:00
whonet_organisms <- read_tsv ( " data-raw/WHONET/Resources/Organisms.txt" , na = c ( " " , " NA" , " -" ) , show_col_types = FALSE ) %>%
2022-10-22 22:00:15 +02:00
# remove old taxonomic names
2022-10-30 14:31:45 +01:00
filter ( TAXONOMIC_STATUS == " C" ) %>%
2023-06-26 13:52:02 +02:00
mutate ( ORGANISM_CODE = toupper ( WHONET_ORG_CODE ) )
whonet_breakpoints <- read_tsv ( " data-raw/WHONET/Resources/Breakpoints.txt" , na = c ( " " , " NA" , " -" ) ,
show_col_types = FALSE , guess_max = Inf ) %>%
filter ( GUIDELINES %in% c ( " CLSI" , " EUCAST" ) )
whonet_antibiotics <- read_tsv ( " data-raw/WHONET/Resources/Antibiotics.txt" , na = c ( " " , " NA" , " -" ) , show_col_types = FALSE ) %>%
arrange ( WHONET_ABX_CODE ) %>%
distinct ( WHONET_ABX_CODE , .keep_all = TRUE )
# MICROORGANISMS WHONET CODES ----
whonet_organisms <- whonet_organisms %>%
select ( ORGANISM_CODE , ORGANISM , SPECIES_GROUP , GBIF_TAXON_ID ) %>%
2022-10-30 14:31:45 +01:00
mutate (
2023-06-26 13:52:02 +02:00
# this one was called Issatchenkia orientalis, but it should be:
2023-04-14 23:14:34 +02:00
ORGANISM = if_else ( ORGANISM_CODE == " ckr" , " Candida krusei" , ORGANISM )
2023-06-26 13:52:02 +02:00
) %>%
# try to match on GBIF identifier
left_join ( microorganisms %>% distinct ( mo , gbif , status ) %>% filter ( ! is.na ( gbif ) ) , by = c ( " GBIF_TAXON_ID" = " gbif" ) ) %>%
# remove duplicates
arrange ( ORGANISM_CODE , GBIF_TAXON_ID , status ) %>%
distinct ( ORGANISM_CODE , .keep_all = TRUE ) %>%
# add Enterobacterales, which is a subkingdom code in their data
bind_rows ( data.frame ( ORGANISM_CODE = " ebc" , ORGANISM = " Enterobacterales" , mo = as.mo ( " Enterobacterales" ) ) ) %>%
arrange ( ORGANISM )
2022-10-22 22:00:15 +02:00
2023-06-26 13:52:02 +02:00
## Add new WHO codes to microorganisms.codes ----
matched <- whonet_organisms %>% filter ( ! is.na ( mo ) )
unmatched <- whonet_organisms %>% filter ( is.na ( mo ) )
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# generate the mo codes and add their names
2023-06-26 13:52:02 +02:00
message ( " Getting MO codes for WHONET input..." )
unmatched <- unmatched %>%
2023-07-08 17:30:05 +02:00
mutate ( mo = as.mo ( gsub ( " (sero[a-z]*| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])" , " " , ORGANISM ) ,
minimum_matching_score = 0.55 ,
2023-04-14 11:12:26 +02:00
keep_synonyms = TRUE ,
language = " en" ) ,
2023-04-14 23:14:34 +02:00
mo = case_when ( ORGANISM %like% " Anaerobic" & ORGANISM %like% " negative" ~ as.mo ( " B_ANAER-NEG" ) ,
ORGANISM %like% " Anaerobic" & ORGANISM %like% " positive" ~ as.mo ( " B_ANAER-POS" ) ,
ORGANISM %like% " Anaerobic" ~ as.mo ( " B_ANAER" ) ,
TRUE ~ mo ) ,
2023-04-14 11:12:26 +02:00
mo_name = mo_name ( mo ,
keep_synonyms = TRUE ,
language = " en" ) )
2023-04-14 23:14:34 +02:00
# check if coercion at least resembles the first part (genus)
2023-06-26 13:52:02 +02:00
unmatched <- unmatched %>%
2023-04-14 11:12:26 +02:00
mutate (
first_part = sapply ( ORGANISM , function ( x ) strsplit ( gsub ( " [^a-zA-Z _-]+" , " " , x ) , " " ) [ [1 ] ] [1 ] , USE.NAMES = FALSE ) ,
2023-06-22 15:10:59 +02:00
keep = mo_name %like_case% first_part | ORGANISM %like% " Gram " | ORGANISM == " Other" | ORGANISM %like% " anaerobic" ) %>%
2023-07-08 17:30:05 +02:00
arrange ( keep )
unmatched %>%
View ( )
unmatched <- unmatched %>%
2023-06-26 13:52:02 +02:00
filter ( keep == TRUE )
organisms <- matched %>% transmute ( code = toupper ( ORGANISM_CODE ) , group = SPECIES_GROUP , mo ) %>%
bind_rows ( unmatched %>% transmute ( code = toupper ( ORGANISM_CODE ) , group = SPECIES_GROUP , mo ) ) %>%
mutate ( name = mo_name ( mo , keep_synonyms = TRUE ) ) %>%
arrange ( code )
# some subspecies exist, while their upper species do not, add them as the species level:
subspp <- organisms %>%
filter ( mo_species ( mo , keep_synonyms = TRUE ) == mo_subspecies ( mo , keep_synonyms = TRUE ) &
mo_species ( mo , keep_synonyms = TRUE ) != " " &
mo_genus ( mo , keep_synonyms = TRUE ) != " Salmonella" ) %>%
mutate ( mo = as.mo ( paste ( mo_genus ( mo , keep_synonyms = TRUE ) ,
mo_species ( mo , keep_synonyms = TRUE ) ) ,
keep_synonyms = TRUE ) ,
name = mo_name ( mo , keep_synonyms = TRUE ) )
organisms <- organisms %>%
filter ( ! code %in% subspp $ code ) %>%
bind_rows ( subspp ) %>%
arrange ( code )
2023-07-08 17:30:05 +02:00
# add the groups
organisms <- organisms %>%
bind_rows ( tibble ( code = organisms %>% filter ( ! is.na ( group ) ) %>% pull ( group ) %>% unique ( ) ,
group = NA ,
mo = organisms %>% filter ( ! is.na ( group ) ) %>% pull ( group ) %>% unique ( ) %>% as.mo ( keep_synonyms = TRUE ) ,
name = mo_name ( mo , keep_synonyms = TRUE ) ) ) %>%
arrange ( code , group ) %>%
select ( - group ) %>%
distinct ( )
2023-07-10 13:41:52 +02:00
# 2023-07-08 SGM is also Strep gamma in WHONET, must only be Slowly-growing Mycobacterium
2024-06-14 22:39:01 +02:00
# 2024-06-14 still the case
2023-07-10 13:41:52 +02:00
organisms <- organisms %>%
filter ( ! ( code == " SGM" & name %like% " Streptococcus" ) )
# this must be empty:
organisms $ code [organisms $ code %>% duplicated ( ) ]
2023-07-08 17:30:05 +02:00
saveRDS ( organisms , " data-raw/organisms.rds" , version = 2 )
2023-06-26 13:52:02 +02:00
#---
# AT THIS POINT, `organisms` is clean and all entries have an mo code
#---
2023-04-14 23:14:34 +02:00
# update microorganisms.codes with the latest WHONET codes
2023-06-22 15:10:59 +02:00
microorganisms.codes2 <- microorganisms.codes %>%
2023-04-14 11:12:26 +02:00
# remove all old WHONET codes, whether we (in the end) keep them or not
2023-07-08 17:30:05 +02:00
filter ( ! toupper ( code ) %in% toupper ( organisms $ code ) ) %>%
2023-04-14 23:14:34 +02:00
# and add the new ones
2023-07-08 17:30:05 +02:00
bind_rows ( organisms %>% select ( code , mo ) ) %>%
arrange ( code ) %>%
distinct ( code , .keep_all = TRUE )
2023-06-22 15:10:59 +02:00
# new codes:
microorganisms.codes2 $ code [which ( ! microorganisms.codes2 $ code %in% microorganisms.codes $ code ) ]
mo_name ( microorganisms.codes2 $ mo [which ( ! microorganisms.codes2 $ code %in% microorganisms.codes $ code ) ] , keep_synonyms = TRUE )
microorganisms.codes <- microorganisms.codes2
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# Run this part to update ASIARS-Net:
2024-06-14 22:39:01 +02:00
# 2024-06-14: file not available anymore
2023-06-26 13:52:02 +02:00
# # start
# asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
# asiarsnet <- asiarsnet %>%
# mutate(WHONET_Code = toupper(WHONET_Code)) %>%
# left_join(whonet_organisms %>% mutate(WHONET_Code = toupper(ORGANISM_CODE))) %>%
# mutate(
# mo1 = as.mo(ORGANISM_CODE),
# mo2 = as.mo(ORGANISM)
# ) %>%
# mutate(mo = if_else(mo2 == "UNKNOWN" | is.na(mo2), mo1, mo2)) %>%
# filter(!is.na(mo))
# insert1 <- asiarsnet %>% transmute(code = WHONET_Code, mo)
# insert2 <- asiarsnet %>% transmute(code = as.character(ASIARS_Net_Code), mo)
# # these will be updated
# bind_rows(insert1, insert2) %>%
# rename(mo_new = mo) %>%
# left_join(microorganisms.codes) %>%
# filter(mo != mo_new)
# microorganisms.codes <- microorganisms.codes %>%
# filter(!code %in% c(insert1$code, insert2$code)) %>%
# bind_rows(insert1, insert2) %>%
# arrange(code)
# # end
## Save to package ----
2023-07-08 17:30:05 +02:00
class ( microorganisms.codes $ mo ) <- c ( " mo" , " character" )
2023-04-14 23:14:34 +02:00
usethis :: use_data ( microorganisms.codes , overwrite = TRUE , compress = " xz" , version = 2 )
rm ( microorganisms.codes )
devtools :: load_all ( )
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# BREAKPOINTS ----
2022-10-22 22:00:15 +02:00
2023-10-20 15:07:11 +02:00
# now that we have the correct MO codes, get the breakpoints and convert them
2023-06-26 13:52:02 +02:00
whonet_breakpoints %>%
count ( GUIDELINES , BREAKPOINT_TYPE ) %>%
pivot_wider ( names_from = BREAKPOINT_TYPE , values_from = n ) %>%
janitor :: adorn_totals ( where = c ( " row" , " col" ) )
2024-06-14 22:39:01 +02:00
# compared to current
AMR :: clinical_breakpoints | >
count ( GUIDELINES = gsub ( " [^a-zA-Z]" , " " , guideline ) , type ) | >
arrange ( tolower ( type ) ) | >
pivot_wider ( names_from = type , values_from = n ) %>%
as.data.frame ( ) | >
janitor :: adorn_totals ( where = c ( " row" , " col" ) )
2023-04-14 23:14:34 +02:00
breakpoints <- whonet_breakpoints %>%
mutate ( code = toupper ( ORGANISM_CODE ) ) %>%
2023-06-22 15:10:59 +02:00
left_join ( bind_rows ( microorganisms.codes %>% filter ( ! code %in% c ( " ALL" , " GEN" ) ) ,
2023-04-19 00:31:31 +02:00
# GEN (Generic) and ALL (All) are PK/PD codes
data.frame ( code = c ( " ALL" , " GEN" ) ,
mo = rep ( as.mo ( " UNKNOWN" ) , 2 ) ) ) )
2023-06-22 15:10:59 +02:00
# these ones lack an MO name, they cannot be used:
2023-04-14 23:14:34 +02:00
unknown <- breakpoints %>%
2023-07-08 17:30:05 +02:00
filter ( is.na ( mo ) ) %>%
2023-04-14 23:14:34 +02:00
pull ( code ) %>%
unique ( )
2023-04-19 00:31:31 +02:00
breakpoints %>%
2023-06-26 13:52:02 +02:00
filter ( code %in% unknown ) %>%
count ( GUIDELINES , YEAR , ORGANISM_CODE , BREAKPOINT_TYPE , sort = TRUE )
2024-06-14 22:39:01 +02:00
# 2024-06-14: these codes are currently: clu, kma, fso, tyi. No clue (are not in MO list of WHONET), and they are only ECOFFs, so remove them:
2023-04-14 23:14:34 +02:00
breakpoints <- breakpoints %>%
2023-07-08 17:30:05 +02:00
filter ( ! is.na ( mo ) )
2023-04-14 23:14:34 +02:00
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints %>%
filter ( ! WHONET_ABX_CODE %in% whonet_antibiotics $ WHONET_ABX_CODE ) %>%
count ( YEAR , GUIDELINES , WHONET_ABX_CODE ) %>%
arrange ( desc ( YEAR ) )
2023-06-22 15:10:59 +02:00
breakpoints %>%
filter ( ! WHONET_ABX_CODE %in% whonet_antibiotics $ WHONET_ABX_CODE ) %>%
pull ( WHONET_ABX_CODE ) %>%
unique ( )
2023-07-10 13:41:52 +02:00
# they are at the moment all old codes that have the right replacements in `antibiotics`, so we can use as.ab()
2023-06-26 13:52:02 +02:00
## Build new breakpoints table ----
2022-10-30 14:31:45 +01:00
breakpoints_new <- breakpoints %>%
2023-06-26 13:52:02 +02:00
filter ( ! is.na ( WHONET_ABX_CODE ) ) %>%
2022-10-30 14:31:45 +01:00
transmute (
guideline = paste ( GUIDELINES , YEAR ) ,
2023-06-26 13:52:02 +02:00
type = ifelse ( BREAKPOINT_TYPE == " ECOFF" , " ECOFF" , tolower ( BREAKPOINT_TYPE ) ) ,
2024-02-24 15:16:52 +01:00
host = ifelse ( BREAKPOINT_TYPE == " ECOFF" , " ECOFF" , tolower ( HOST ) ) ,
2022-10-30 14:31:45 +01:00
method = TEST_METHOD ,
2023-06-22 15:10:59 +02:00
site = SITE_OF_INFECTION ,
2023-04-14 23:14:34 +02:00
mo ,
2022-10-30 14:31:45 +01:00
rank_index = case_when (
2023-06-22 15:10:59 +02:00
is.na ( mo_rank ( mo , keep_synonyms = TRUE ) ) ~ 6 , # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
mo_rank ( mo , keep_synonyms = TRUE ) %like% " (infra|sub)" ~ 1 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " species" ~ 2 ,
2023-07-08 17:30:05 +02:00
mo_rank ( mo , keep_synonyms = TRUE ) == " species group" ~ 2.5 ,
2023-06-22 15:10:59 +02:00
mo_rank ( mo , keep_synonyms = TRUE ) == " genus" ~ 3 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " family" ~ 4 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " order" ~ 5 ,
2022-10-30 14:31:45 +01:00
TRUE ~ 6
) ,
ab = as.ab ( WHONET_ABX_CODE ) ,
2023-06-26 13:52:02 +02:00
ref_tbl = ifelse ( type == " ECOFF" & is.na ( REFERENCE_TABLE ) , " ECOFF" , REFERENCE_TABLE ) ,
2022-10-30 14:31:45 +01:00
disk_dose = POTENCY ,
2023-06-26 13:52:02 +02:00
breakpoint_S = ifelse ( type == " ECOFF" & is.na ( S ) & ! is.na ( ECV_ECOFF ) , ECV_ECOFF , S ) ,
breakpoint_R = ifelse ( type == " ECOFF" & is.na ( R ) & ! is.na ( ECV_ECOFF ) , ECV_ECOFF , R ) ,
2023-06-22 15:10:59 +02:00
uti = ifelse ( is.na ( site ) , FALSE , gsub ( " .*(UTI|urinary|urine).*" , " UTI" , site ) == " UTI" )
2022-10-30 14:31:45 +01:00
) %>%
2022-10-22 22:00:15 +02:00
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
2022-10-30 14:31:45 +01:00
mutate ( disk_dose = disk_dose %>%
2023-04-14 23:14:34 +02:00
gsub ( " μ" , " u" , ., fixed = TRUE ) %>% # this is 'mu', \u03bc
gsub ( " µ" , " u" , ., fixed = TRUE ) %>% # this is 'micro', u00b5 (yes, they look the same)
2022-10-30 14:31:45 +01:00
gsub ( " – ", " -" , ., fixed = TRUE ) ) %>%
2023-07-08 17:30:05 +02:00
arrange ( desc ( guideline ) , mo , ab , type , method ) %>%
2022-10-30 14:31:45 +01:00
filter ( ! ( is.na ( breakpoint_S ) & is.na ( breakpoint_R ) ) & ! is.na ( mo ) & ! is.na ( ab ) ) %>%
2024-02-24 15:16:52 +01:00
distinct ( guideline , type , host , ab , mo , method , site , breakpoint_S , .keep_all = TRUE )
2022-10-22 22:00:15 +02:00
2023-04-14 23:14:34 +02:00
# check the strange duplicates
breakpoints_new %>%
2024-02-24 15:16:52 +01:00
mutate ( id = paste ( guideline , type , host , ab , mo , method , site ) ) %>%
2024-06-14 22:39:01 +02:00
filter ( id %in% .$id [which ( duplicated ( id ) ) ] ) | >
arrange ( desc ( guideline ) )
2023-06-22 15:10:59 +02:00
# remove duplicates
breakpoints_new <- breakpoints_new %>%
2024-02-24 15:16:52 +01:00
distinct ( guideline , type , host , ab , mo , method , site , .keep_all = TRUE )
2023-04-14 23:14:34 +02:00
2023-06-22 15:10:59 +02:00
# fix reference table names
2023-07-08 17:30:05 +02:00
breakpoints_new %>% filter ( guideline %like% " EUCAST" , is.na ( ref_tbl ) ) %>% View ( )
2023-06-22 15:10:59 +02:00
breakpoints_new <- breakpoints_new %>%
mutate ( ref_tbl = case_when ( is.na ( ref_tbl ) & guideline %like% " EUCAST 202" ~ lead ( ref_tbl ) ,
is.na ( ref_tbl ) ~ " Unknown" ,
TRUE ~ ref_tbl ) )
# clean disk zones
2022-10-22 22:00:15 +02:00
breakpoints_new [which ( breakpoints_new $ method == " DISK" ) , " breakpoint_S" ] <- as.double ( as.disk ( breakpoints_new [which ( breakpoints_new $ method == " DISK" ) , " breakpoint_S" , drop = TRUE ] ) )
breakpoints_new [which ( breakpoints_new $ method == " DISK" ) , " breakpoint_R" ] <- as.double ( as.disk ( breakpoints_new [which ( breakpoints_new $ method == " DISK" ) , " breakpoint_R" , drop = TRUE ] ) )
2021-12-13 11:57:34 +01:00
2024-02-24 15:16:52 +01:00
# regarding animal breakpoints, CLSI has adults and foals for horses, but only for amikacin - remove them
2024-06-14 22:39:01 +02:00
breakpoints_new | >
filter ( host %like% " foal" ) | >
View ( )
2024-02-24 15:16:52 +01:00
breakpoints_new <- breakpoints_new | >
filter ( host %unlike% " foal" ) | >
mutate ( host = ifelse ( host %like% " horse" , " horse" , host ) )
2023-10-20 15:07:11 +02:00
# FIXES FOR WHONET ERRORS ----
2022-10-22 22:00:15 +02:00
# WHONET has no >1024 but instead uses 1025, 513, etc, so as.mic() cannot be used to clean.
2022-10-30 14:31:45 +01:00
# instead, clean based on MIC factor levels
2022-10-22 22:00:15 +02:00
m <- unique ( as.double ( as.mic ( levels ( as.mic ( 1 ) ) ) ) )
breakpoints_new [which ( breakpoints_new $ method == " MIC" &
2022-10-30 14:31:45 +01:00
is.na ( breakpoints_new $ breakpoint_S ) ) , " breakpoint_S" ] <- min ( m )
2022-10-22 22:00:15 +02:00
breakpoints_new [which ( breakpoints_new $ method == " MIC" &
2022-10-30 14:31:45 +01:00
is.na ( breakpoints_new $ breakpoint_R ) ) , " breakpoint_R" ] <- max ( m )
2022-10-22 22:00:15 +02:00
# raise these one higher valid MIC factor level:
2023-07-10 13:41:52 +02:00
breakpoints_new [which ( breakpoints_new $ breakpoint_R == 129 ) , " breakpoint_R" ] <- 128
breakpoints_new [which ( breakpoints_new $ breakpoint_R == 257 ) , " breakpoint_R" ] <- 256
2023-07-10 19:04:12 +02:00
breakpoints_new [which ( breakpoints_new $ breakpoint_R == 513 ) , " breakpoint_R" ] <- 512
2023-07-10 13:41:52 +02:00
breakpoints_new [which ( breakpoints_new $ breakpoint_R == 1025 ) , " breakpoint_R" ] <- 1024
2021-12-13 11:57:34 +01:00
2023-07-12 16:04:48 +02:00
# fix streptococci in WHONET table of EUCAST: Strep A, B, C and G must only include these groups and not all streptococci:
2024-02-24 15:16:52 +01:00
breakpoints_new $ mo [breakpoints_new $ mo == " B_STRPT" & breakpoints_new $ ref_tbl %like% " ^strep.* a.* b.*c.*g" ] <- as.mo ( " B_STRPT_ABCG" )
2023-07-12 16:04:48 +02:00
# Haemophilus same error (must only be H. influenzae)
2024-02-24 15:16:52 +01:00
breakpoints_new $ mo [breakpoints_new $ mo == " B_HMPHL" & breakpoints_new $ ref_tbl %like% " ^h.* influenzae" ] <- as.mo ( " B_HMPHL_INFL" )
2023-07-12 16:04:48 +02:00
# EUCAST says that for H. parainfluenzae the H. influenza rules can be used, so add them
2024-02-24 15:16:52 +01:00
breakpoints_new <- breakpoints_new %>%
2023-07-12 16:04:48 +02:00
bind_rows (
2024-02-24 15:16:52 +01:00
breakpoints_new %>%
2023-07-12 16:04:48 +02:00
filter ( guideline %like% " EUCAST" , mo == " B_HMPHL_INFL" ) %>%
mutate ( mo = as.mo ( " B_HMPHL_PRNF" ) )
) %>%
2024-02-24 15:16:52 +01:00
arrange ( desc ( guideline ) , mo , ab , type , host , method )
2023-07-12 16:04:48 +02:00
# Achromobacter denitrificans is in WHONET included in their A. xylosoxidans table, must be removed
2024-02-24 15:16:52 +01:00
breakpoints_new <- breakpoints_new %>% filter ( mo != as.mo ( " Achromobacter denitrificans" ) )
2023-07-12 16:04:48 +02:00
# WHONET contains gentamicin breakpoints for viridans streptocci, which are intrinsic R - they meant genta-high, which is ALSO in their table, so we just remove gentamicin in viridans streptococci
2024-02-24 15:16:52 +01:00
breakpoints_new <- breakpoints_new %>% filter ( ! ( mo == as.mo ( " Streptococcus viridans" ) & ab == " GEN" ) )
2023-07-12 16:20:04 +02:00
# Nitrofurantoin in Staph (EUCAST) only applies to S. saprophyticus, while WHONET has the DISK correct but the MIC on genus level
2024-02-24 15:16:52 +01:00
breakpoints_new $ mo [breakpoints_new $ mo == " B_STPHY" & breakpoints_new $ ab == " NIT" & breakpoints_new $ guideline %like% " EUCAST" ] <- as.mo ( " B_STPHY_SPRP" )
2023-10-20 15:07:11 +02:00
# WHONET sets the 2023 breakpoints for SAM to MIC of 16/32 for Enterobacterales, should be MIC 8/32 like AMC (see issue #123 on github.com/msberends/AMR)
2024-02-24 15:16:52 +01:00
# UPDATE 2024-02-22: fixed now
2023-10-20 15:07:11 +02:00
# determine rank again now that some changes were made on taxonomic level (genus -> species)
2024-02-24 15:16:52 +01:00
breakpoints_new <- breakpoints_new %>%
2023-07-12 16:20:04 +02:00
mutate ( rank_index = case_when (
is.na ( mo_rank ( mo , keep_synonyms = TRUE ) ) ~ 6 , # for UNKNOWN, B_GRAMN, B_ANAER, B_ANAER-NEG, etc.
mo_rank ( mo , keep_synonyms = TRUE ) %like% " (infra|sub)" ~ 1 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " species" ~ 2 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " species group" ~ 2.5 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " genus" ~ 3 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " family" ~ 4 ,
mo_rank ( mo , keep_synonyms = TRUE ) == " order" ~ 5 ,
TRUE ~ 6
) )
2022-05-10 17:01:37 +02:00
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
2024-06-14 22:39:01 +02:00
# EUCAST 2023 guideline: S <= 8 and R > 8
2022-05-10 17:01:37 +02:00
# WHONET file: S <= 8 and R >= 16
2023-06-22 15:10:59 +02:00
breakpoints_new %>% filter ( guideline == " EUCAST 2023" , ab == " AMC" , mo == " B_[ORD]_ENTRBCTR" , method == " MIC" )
2024-06-14 22:39:01 +02:00
# but this will make an MIC of 12 I, which should be R according to EUCAST, so:
2022-10-22 22:00:15 +02:00
breakpoints_new <- breakpoints_new %>%
mutate ( breakpoint_R = ifelse ( guideline %like% " EUCAST" & method == " MIC" & log2 ( breakpoint_R ) - log2 ( breakpoint_S ) != 0 ,
2022-10-30 14:31:45 +01:00
pmax ( breakpoint_S , breakpoint_R / 2 ) ,
breakpoint_R
) )
2022-10-22 22:00:15 +02:00
# fix disks as well
2023-06-22 15:10:59 +02:00
breakpoints_new %>% filter ( guideline == " EUCAST 2023" , ab == " AMC" , mo == " B_[ORD]_ENTRBCTR" , method == " DISK" )
2022-10-22 22:00:15 +02:00
breakpoints_new <- breakpoints_new %>%
mutate ( breakpoint_R = ifelse ( guideline %like% " EUCAST" & method == " DISK" & breakpoint_S - breakpoint_R != 0 ,
2022-10-30 14:31:45 +01:00
breakpoint_R + 1 ,
breakpoint_R
) )
2024-02-24 15:16:52 +01:00
# fill missing R breakpoint where there is an S breakpoint
2022-10-29 14:15:23 +02:00
breakpoints_new [which ( is.na ( breakpoints_new $ breakpoint_R ) ) , " breakpoint_R" ] <- breakpoints_new [which ( is.na ( breakpoints_new $ breakpoint_R ) ) , " breakpoint_S" ]
2024-06-14 22:39:01 +02:00
# keep distinct rows
breakpoints_new <- breakpoints_new | >
distinct ( )
2023-10-20 15:07:11 +02:00
# CHECKS AND SAVE TO PACKAGE ----
2022-10-22 22:00:15 +02:00
# check again
2024-06-14 22:39:01 +02:00
breakpoints_new %>% filter ( guideline == " EUCAST 2024" , ab == " AMC" , mo == " B_[ORD]_ENTRBCTR" , method == " MIC" )
2022-10-22 22:00:15 +02:00
# compare with current version
2024-06-14 22:39:01 +02:00
clinical_breakpoints %>% filter ( guideline == " EUCAST 2023" , ab == " AMC" , mo == " B_[ORD]_ENTRBCTR" , method == " MIC" )
2022-10-22 22:00:15 +02:00
2023-07-10 13:41:52 +02:00
# must have "human" and "ECOFF"
breakpoints_new %>% filter ( mo == " B_STRPT_PNMN" , ab == " AMP" , guideline == " EUCAST 2020" , method == " MIC" )
2023-04-19 00:31:31 +02:00
# check dimensions
dim ( breakpoints_new )
dim ( clinical_breakpoints )
2023-01-21 23:47:20 +01:00
clinical_breakpoints <- breakpoints_new
2023-07-12 16:20:04 +02:00
clinical_breakpoints <- clinical_breakpoints %>% dataset_UTF8_to_ASCII ( )
2023-01-21 23:47:20 +01:00
usethis :: use_data ( clinical_breakpoints , overwrite = TRUE , compress = " xz" , version = 2 )
rm ( clinical_breakpoints )
2019-09-20 12:33:05 +02:00
devtools :: load_all ( " ." )