Fix clinical breakpoints

This commit is contained in:
dr. M.S. (Matthijs) Berends 2023-04-14 23:14:34 +02:00
parent 147f9112e9
commit ed70f95380
30 changed files with 1226 additions and 1616 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 2.0.0.9005
Version: 2.0.0.9006
Date: 2023-04-14
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,8 +1,8 @@
# AMR 2.0.0.9005
# AMR 2.0.0.9006
## Changed
* formatting fix for `sir_interpretation_history()`
* Fixed some WHONET codes for microorganisms
* Fixed some WHONET codes for microorganisms and consequently a couple of entries in `clinical_breakpoints`
# AMR 2.0.0

View File

@ -125,7 +125,7 @@
#' - 2 entries of *Staphylococcus* (coagulase-negative (CoNS) and coagulase-positive (CoPS))
#' - 1 entry of *Blastocystis* (*B. hominis*), although it officially does not exist (Noel *et al.* 2005, PMID 15634993)
#' - 1 entry of *Moraxella* (*M. catarrhalis*), which was formally named *Branhamella catarrhalis* (Catlin, 1970) though this change was never accepted within the field of clinical microbiology
#' - 6 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast, unknown fungus, and unknown anaerobic bacteria)
#' - 8 other 'undefined' entries (unknown, unknown Gram-negatives, unknown Gram-positives, unknown yeast, unknown fungus, and unknown anaerobic Gram-pos/Gram-neg bacteria)
#'
#' The syntax used to transform the original data to a cleansed \R format, can be found here: <https://github.com/msberends/AMR/blob/main/data-raw/reproduction_of_microorganisms.R>.
#'

Binary file not shown.

View File

@ -1 +1 @@
68467f5179638ac5622281df53a5ea75
e150d98b724ad979e176058c4197c469

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
cf8b0db59dbfe8b42fbd0a6c51a7e9b0
3d92820386230a7ac3c9367ce6d96db9

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +1,7 @@
"mo" "fullname" "status" "kingdom" "phylum" "class" "order" "family" "genus" "species" "subspecies" "rank" "ref" "source" "lpsn" "lpsn_parent" "lpsn_renamed_to" "gbif" "gbif_parent" "gbif_renamed_to" "prevalence" "snomed"
"B_ANAER" "(unknown anaerobic bacteria)" "accepted" "Bacteria" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown Gram-negatives)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""
"B_ANAER-NEG" "(unknown anaerobic Gram-negatives)" "accepted" "Bacteria" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown genus)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""
"B_ANAER-POS" "(unknown anaerobic Gram-positives)" "accepted" "Bacteria" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown genus)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""
"F_FUNGUS" "(unknown fungus)" "accepted" "Fungi" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown genus)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""
"B_GRAMN" "(unknown Gram-negatives)" "accepted" "Bacteria" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown Gram-negatives)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""
"B_GRAMP" "(unknown Gram-positives)" "accepted" "Bacteria" "(unknown phylum)" "(unknown class)" "(unknown order)" "(unknown family)" "(unknown Gram-positives)" "(unknown species)" "(unknown subspecies)" "subspecies" "manually added" 2 ""

Binary file not shown.

View File

@ -39,7 +39,9 @@ library(AMR)
# and copy the folder C:\WHONET\Resources to the data-raw/WHONET/ folder
# (for ASIARS-Net update, also copy C:\WHONET\Codes to the data-raw/WHONET/ folder)
# Load source data ----
# MICROORGANISMS WHONET CODES ----
whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
# remove old taxonomic names
filter(TAXONOMIC_STATUS == "C") %>%
@ -55,97 +57,49 @@ whonet_organisms <- read_tsv("data-raw/WHONET/Resources/Organisms.txt", na = c("
ORGANISM = if_else(ORGANISM_CODE == "fne", "Fusobacterium necrophorum", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "fnu", "Fusobacterium nucleatum", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "sdy", "Streptococcus dysgalactiae", ORGANISM),
ORGANISM = if_else(ORGANISM_CODE == "axy", "Achromobacter xylosoxidans", ORGANISM)
ORGANISM = if_else(ORGANISM_CODE == "axy", "Achromobacter xylosoxidans", ORGANISM),
# and this one was called Issatchenkia orientalis, but it should be:
ORGANISM = if_else(ORGANISM_CODE == "ckr", "Candida krusei", ORGANISM)
)
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
filter(BREAKPOINT_TYPE == "Human", GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
arrange(WHONET_ABX_CODE) %>%
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
# Transform data ----
# add some general codes
whonet_organisms <- whonet_organisms %>%
bind_rows(data.frame(
ORGANISM_CODE = c("ebc", "cof"),
ORGANISM = c("Enterobacterales", "Campylobacter")
))
mo_reset_session()
whonet_organisms.bak <- whonet_organisms
# generate the mo codes and add their names
whonet_organisms <- whonet_organisms.bak %>%
mutate(mo = as.mo(gsub("(sero[a-z]*| complex| nontypable| non[-][a-zA-Z]+|var[.]| not .*|sp[.],.*|, .*variant.*|, .*toxin.*|, microaer.*| beta-haem[.])", "", ORGANISM),
keep_synonyms = TRUE,
language = "en"),
mo = as.mo(ifelse(ORGANISM %like% "Anaerobic", "B_ANAER", mo)),
mo = case_when(ORGANISM %like% "Anaerobic" & ORGANISM %like% "negative" ~ as.mo("B_ANAER-NEG"),
ORGANISM %like% "Anaerobic" & ORGANISM %like% "positive" ~ as.mo("B_ANAER-POS"),
ORGANISM %like% "Anaerobic" ~ as.mo("B_ANAER"),
TRUE ~ mo),
mo_name = mo_name(mo,
keep_synonyms = TRUE,
language = "en"))
# update microorganisms.codes with the latest WHONET codes
# check if coercion at least resembles the first part (genus)
new_mo_codes <- whonet_organisms %>%
mutate(
first_part = sapply(ORGANISM, function(x) strsplit(gsub("[^a-zA-Z _-]+", "", x), " ")[[1]][1], USE.NAMES = FALSE),
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other")
keep = mo_name %like_case% first_part | ORGANISM %like% "Gram " | ORGANISM == "Other" | ORGANISM %like% "anaerobic")
# update microorganisms.codes with the latest WHONET codes
microorganisms.codes <- microorganisms.codes %>%
# remove all old WHONET codes, whether we (in the end) keep them or not
filter(!toupper(code) %in% toupper(new_mo_codes$ORGANISM_CODE)) %>%
filter(!toupper(code) %in% toupper(whonet_organisms$ORGANISM_CODE)) %>%
# and add the new ones
bind_rows(new_mo_codes %>%
filter(keep == TRUE) %>%
transmute(code = toupper(ORGANISM_CODE),
mo = mo)) %>%
arrange(code)
# save to package
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
rm(microorganisms.codes)
devtools::load_all()
breakpoints <- whonet_breakpoints %>%
mutate(ORGANISM_CODE = tolower(ORGANISM_CODE)) %>%
left_join(whonet_organisms) %>%
filter(ORGANISM %unlike% "(^cdc |Gram.*variable|virus)")
# this ones lack a MO name, they will become "UNKNOWN":
breakpoints %>%
filter(is.na(ORGANISM)) %>%
pull(ORGANISM_CODE) %>%
unique()
# Generate new lookup table for microorganisms ----
new_mo_codes <- breakpoints %>%
distinct(ORGANISM_CODE, ORGANISM) %>%
mutate(ORGANISM = ORGANISM %>%
gsub("Issatchenkia orientalis", "Candida krusei", .) %>%
gsub(", nutritionally variant", "", .) %>%
gsub(", toxin-.*producing", "", .)) %>%
mutate(
mo = as.mo(ORGANISM, language = NULL, keep_synonyms = FALSE),
mo_name = mo_name(mo, language = NULL)
)
# Update microorganisms.codes with the latest WHONET codes ----
# these will be changed :
new_mo_codes %>%
mutate(code = toupper(ORGANISM_CODE)) %>%
rename(mo_new = mo) %>%
left_join(microorganisms.codes %>% rename(mo_old = mo)) %>%
filter(mo_old != mo_new)
microorganisms.codes <- microorganisms.codes %>%
filter(!code %in% toupper(new_mo_codes$ORGANISM_CODE)) %>%
bind_rows(new_mo_codes %>% transmute(code = toupper(ORGANISM_CODE), mo = mo) %>% filter(!is.na(mo))) %>%
arrange(code) %>%
as_tibble()
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
rm(microorganisms.codes)
devtools::load_all()
# update ASIARS-Net?
# Run this part to update ASIARS-Net:
# start
asiarsnet <- read_tsv("data-raw/WHONET/Codes/ASIARS_Net_Organisms_ForwardLookup.txt")
asiarsnet <- asiarsnet %>%
mutate(WHONET_Code = toupper(WHONET_Code)) %>%
@ -167,20 +121,59 @@ microorganisms.codes <- microorganisms.codes %>%
filter(!code %in% c(insert1$code, insert2$code)) %>%
bind_rows(insert1, insert2) %>%
arrange(code)
# end
# save to package
usethis::use_data(microorganisms.codes, overwrite = TRUE, compress = "xz", version = 2)
rm(microorganisms.codes)
devtools::load_all()
# Create new breakpoint table ----
# BREAKPOINTS ----
# now that we have the right MO codes, get the breakpoints and convert them
whonet_breakpoints <- read_tsv("data-raw/WHONET/Resources/Breakpoints.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
filter(BREAKPOINT_TYPE == "Human", GUIDELINES %in% c("CLSI", "EUCAST"))
whonet_antibiotics <- read_tsv("data-raw/WHONET/Resources/Antibiotics.txt", na = c("", "NA", "-"), show_col_types = FALSE) %>%
arrange(WHONET_ABX_CODE) %>%
distinct(WHONET_ABX_CODE, .keep_all = TRUE)
breakpoints <- whonet_breakpoints %>%
mutate(code = toupper(ORGANISM_CODE)) %>%
left_join(microorganisms.codes)
# these ones lack a MO name, they cannot be used:
unknown <- breakpoints %>%
filter(is.na(mo)) %>%
pull(code) %>%
unique()
whonet_organisms %>%
filter(toupper(ORGANISM_CODE) %in% unknown)
breakpoints <- breakpoints %>%
filter(!is.na(mo))
# and these ones have unknown antibiotics according to WHONET itself:
breakpoints %>%
filter(!WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE) %>%
count(YEAR, GUIDELINES, WHONET_ABX_CODE) %>%
arrange(desc(YEAR))
# we cannot use them
breakpoints <- breakpoints %>%
filter(WHONET_ABX_CODE %in% whonet_antibiotics$WHONET_ABX_CODE)
# now check with our own antibiotics
breakpoints %>%
filter(!toupper(WHONET_ABX_CODE) %in% antibiotics$ab) %>%
pull(WHONET_ABX_CODE) %>%
unique()
# they are at the moment all old codes that have right replacements in `antibiotics`, so we can use as.ab()
breakpoints_new <- breakpoints %>%
# only last 10 years
filter(YEAR > as.double(format(Sys.Date(), "%Y")) - 10) %>%
# "all" and "gen" (general) must become UNKNOWNs:
mutate(ORGANISM_CODE = if_else(ORGANISM_CODE %in% c("all", "gen"), "UNKNOWN", ORGANISM_CODE)) %>%
# only last available 10 years
filter(YEAR > max(YEAR) - 10) %>%
transmute(
guideline = paste(GUIDELINES, YEAR),
method = TEST_METHOD,
site = gsub("Urinary tract infection", "UTI", SITE_OF_INFECTION),
mo = as.mo(ORGANISM_CODE, keep_synonyms = FALSE),
site = gsub(".*(UTI|urinary|urine).*", "UTI", SITE_OF_INFECTION, ignore.case = TRUE),
mo,
rank_index = case_when(
mo_rank(mo) %like% "(infra|sub)" ~ 1,
mo_rank(mo) == "species" ~ 2,
@ -194,17 +187,22 @@ breakpoints_new <- breakpoints %>%
disk_dose = POTENCY,
breakpoint_S = S,
breakpoint_R = R,
uti = SITE_OF_INFECTION %like% "(UTI|urinary|urine)"
uti = ifelse(is.na(site), FALSE, site == "UTI")
) %>%
# Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
mutate(disk_dose = disk_dose %>%
gsub("μ", "u", ., fixed = TRUE) %>%
gsub("µ", "u", ., fixed = TRUE) %>% # this is another micro sign, although we cannot see it
gsub("μ", "u", ., fixed = TRUE) %>% # this is 'mu', \u03bc
gsub("µ", "u", ., fixed = TRUE) %>% # this is 'micro', u00b5 (yes, they look the same)
gsub("", "-", ., fixed = TRUE)) %>%
arrange(desc(guideline), ab, mo, method) %>%
filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)) & !is.na(mo) & !is.na(ab)) %>%
distinct(guideline, ab, mo, method, site, breakpoint_S, .keep_all = TRUE)
# check the strange duplicates
breakpoints_new %>%
mutate(id = paste(guideline, ab, mo, method, site)) %>%
filter(id %in% .$id[which(duplicated(id))])
# clean disk zones and MICs
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_S", drop = TRUE]))
breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R"] <- as.double(as.disk(breakpoints_new[which(breakpoints_new$method == "DISK"), "breakpoint_R", drop = TRUE]))
@ -223,7 +221,7 @@ breakpoints_new[which(breakpoints_new$breakpoint_R == 513), "breakpoint_R"] <- m
breakpoints_new[which(breakpoints_new$breakpoint_R == 1025), "breakpoint_R"] <- m[which(m == 1024) + 1]
# WHONET adds one log2 level to the R breakpoint for their software, e.g. in AMC in Enterobacterales:
# EUCAST 2021 guideline: S <= 8 and R > 8
# EUCAST 2022 guideline: S <= 8 and R > 8
# WHONET file: S <= 8 and R >= 16
breakpoints_new %>% filter(guideline == "EUCAST 2022", ab == "AMC", mo == "B_[ORD]_ENTRBCTR", method == "MIC")
# this will make an MIC of 12 I, which should be R, so:

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -163,7 +163,7 @@ After using \code{\link[=as.sir]{as.sir()}}, you can use the \code{\link[=eucast
\subsection{Machine-Readable Clinical Breakpoints}{
The repository of this package \href{https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt}{contains a machine-readable version} of all guidelines. This is a CSV file consisting of 18 308 rows and 11 columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. \strong{This allows for easy implementation of these rules in laboratory information systems (LIS)}. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed.
The repository of this package \href{https://github.com/msberends/AMR/blob/main/data-raw/clinical_breakpoints.txt}{contains a machine-readable version} of all guidelines. This is a CSV file consisting of 17 918 rows and 11 columns. This file is machine-readable, since it contains one row for every unique combination of the test method (MIC or disk diffusion), the antimicrobial drug and the microorganism. \strong{This allows for easy implementation of these rules in laboratory information systems (LIS)}. Note that it only contains interpretation guidelines for humans - interpretation guidelines from CLSI for animals were removed.
}
\subsection{Other}{

View File

@ -5,7 +5,7 @@
\alias{clinical_breakpoints}
\title{Data Set with Clinical Breakpoints for SIR Interpretation}
\format{
A \link[tibble:tibble]{tibble} with 18 308 observations and 11 variables:
A \link[tibble:tibble]{tibble} with 17 918 observations and 11 variables:
\itemize{
\item \code{guideline}\cr Name of the guideline
\item \code{method}\cr Either "DISK" or "MIC"

View File

@ -3,9 +3,9 @@
\docType{data}
\name{microorganisms}
\alias{microorganisms}
\title{Data Set with 52 149 Microorganisms}
\title{Data Set with 52 151 Microorganisms}
\format{
A \link[tibble:tibble]{tibble} with 52 149 observations and 22 variables:
A \link[tibble:tibble]{tibble} with 52 151 observations and 22 variables:
\itemize{
\item \code{mo}\cr ID of microorganism as used by this package
\item \code{fullname}\cr Full name, like \code{"Escherichia coli"}. For the taxonomic ranks genus, species and subspecies, this is the 'pasted' text of genus, species, and subspecies. For all taxonomic ranks higher than genus, this is the name of the taxon.
@ -66,7 +66,7 @@ For convenience, some entries were added manually:
\item 2 entries of \emph{Staphylococcus} (coagulase-negative (CoNS) and coagulase-positive (CoPS))
\item 1 entry of \emph{Blastocystis} (\emph{B. hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993)
\item 1 entry of \emph{Moraxella} (\emph{M. catarrhalis}), which was formally named \emph{Branhamella catarrhalis} (Catlin, 1970) though this change was never accepted within the field of clinical microbiology
\item 6 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast, unknown fungus, and unknown anaerobic bacteria)
\item 8 other 'undefined' entries (unknown, unknown Gram-negatives, unknown Gram-positives, unknown yeast, unknown fungus, and unknown anaerobic Gram-pos/Gram-neg bacteria)
}
The syntax used to transform the original data to a cleansed \R format, can be found here: \url{https://github.com/msberends/AMR/blob/main/data-raw/reproduction_of_microorganisms.R}.

View File

@ -3,9 +3,9 @@
\docType{data}
\name{microorganisms.codes}
\alias{microorganisms.codes}
\title{Data Set with 5 751 Common Microorganism Codes}
\title{Data Set with 5 754 Common Microorganism Codes}
\format{
A \link[tibble:tibble]{tibble} with 5 751 observations and 2 variables:
A \link[tibble:tibble]{tibble} with 5 754 observations and 2 variables:
\itemize{
\item \code{code}\cr Commonly used code of a microorganism
\item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set