mirror of
https://github.com/msberends/AMR.git
synced 2025-01-13 19:31:38 +01:00
algoritm improvement, removed all Catabacter except for C. hongkongensis
This commit is contained in:
parent
4816419f0c
commit
b83e6a9380
2
NEWS.md
2
NEWS.md
@ -25,7 +25,7 @@
|
|||||||
* Introduction to AMR as a vignette
|
* Introduction to AMR as a vignette
|
||||||
|
|
||||||
#### Changed
|
#### Changed
|
||||||
* Added 231 microorganisms to the `microorganisms` data set and removed the few viruses it contained, now *n* = 2,669 (2,230 bacteria, 285 fungi/yeasts, 153 parasites, 1 other)
|
* Added almost 200 microorganisms to the `microorganisms` data set and removed the few viruses it contained
|
||||||
* Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
|
* Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
|
||||||
* Added 163 trade names to the `antibiotics` data set, it now contains 298 different trade names in total, e.g.:
|
* Added 163 trade names to the `antibiotics` data set, it now contains 298 different trade names in total, e.g.:
|
||||||
```r
|
```r
|
||||||
|
4
R/data.R
4
R/data.R
@ -122,8 +122,8 @@
|
|||||||
|
|
||||||
#' Data set with human pathogenic microorganisms
|
#' Data set with human pathogenic microorganisms
|
||||||
#'
|
#'
|
||||||
#' A data set containing 2,669 (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
#' A data set containing 2,630 (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
||||||
#' @format A \code{\link{tibble}} with 2,669 observations and 10 variables:
|
#' @format A \code{\link{tibble}} with 2,630 observations and 10 variables:
|
||||||
#' \describe{
|
#' \describe{
|
||||||
#' \item{\code{mo}}{ID of microorganism}
|
#' \item{\code{mo}}{ID of microorganism}
|
||||||
#' \item{\code{bactsys}}{Bactsyscode of microorganism}
|
#' \item{\code{bactsys}}{Bactsyscode of microorganism}
|
||||||
|
21
R/mo.R
21
R/mo.R
@ -131,7 +131,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
|||||||
# remove 'empty' genus and species values
|
# remove 'empty' genus and species values
|
||||||
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
||||||
# remove dots and other non-text in case of "E. coli" except spaces
|
# remove dots and other non-text in case of "E. coli" except spaces
|
||||||
x <- gsub("[^a-zA-Z0-9 ]+", "", x)
|
x <- gsub("[^a-zA-Z0-9/ \\-]+", "", x)
|
||||||
# but spaces before and after should be omitted
|
# but spaces before and after should be omitted
|
||||||
x <- trimws(x, which = "both")
|
x <- trimws(x, which = "both")
|
||||||
x_trimmed <- x
|
x_trimmed <- x
|
||||||
@ -146,6 +146,12 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
|||||||
x_withspaces_start <- paste0('^', x_withspaces)
|
x_withspaces_start <- paste0('^', x_withspaces)
|
||||||
x_withspaces <- paste0('^', x_withspaces, '$')
|
x_withspaces <- paste0('^', x_withspaces, '$')
|
||||||
|
|
||||||
|
# print(x)
|
||||||
|
# print(x_withspaces_all)
|
||||||
|
# print(x_withspaces_start)
|
||||||
|
# print(x_withspaces)
|
||||||
|
# print(x_backup)
|
||||||
|
|
||||||
for (i in 1:length(x)) {
|
for (i in 1:length(x)) {
|
||||||
if (identical(x_trimmed[i], "")) {
|
if (identical(x_trimmed[i], "")) {
|
||||||
# empty values
|
# empty values
|
||||||
@ -195,6 +201,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
|||||||
x[i] <- 'PSEAER'
|
x[i] <- 'PSEAER'
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
if (x_backup[i] %like% '^l.*pneum.*' & !x_backup[i] %like% '^l.*non.*pneum.*') {
|
||||||
|
# avoid detection of Legionella non pneumophila in case of Legionella pneumophila
|
||||||
|
x[i] <- 'LEGPNE'
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
# CoNS and CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
# CoNS and CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
|
||||||
if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]'
|
if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]'
|
||||||
@ -250,6 +261,12 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
|||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
# try the same, now based on genus + species
|
||||||
|
found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo
|
||||||
|
if (length(found) > 0) {
|
||||||
|
x[i] <- found[1L]
|
||||||
|
next
|
||||||
|
}
|
||||||
# try any match keeping spaces, not ending with $
|
# try any match keeping spaces, not ending with $
|
||||||
found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
|
found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
|
||||||
if (length(found) > 0) {
|
if (length(found) > 0) {
|
||||||
@ -329,7 +346,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
|
|||||||
|
|
||||||
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
||||||
if (length(failures) > 0) {
|
if (length(failures) > 0) {
|
||||||
warning("These values could not be coerced to a valid mo: ",
|
warning("These ", length(failures) , " values could not be coerced to a valid mo: ",
|
||||||
paste('"', unique(failures), '"', sep = "", collapse = ', '),
|
paste('"', unique(failures), '"', sep = "", collapse = ', '),
|
||||||
".",
|
".",
|
||||||
call. = FALSE)
|
call. = FALSE)
|
||||||
|
@ -159,7 +159,7 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
|
|||||||
# return G. species
|
# return G. species
|
||||||
result <- paste0(substr(mo_genus(x), 1, 1), ". ", suppressWarnings(mo_species(x)))
|
result <- paste0(substr(mo_genus(x), 1, 1), ". ", suppressWarnings(mo_species(x)))
|
||||||
}
|
}
|
||||||
result[result %in% c(". ")] <- ""
|
result[result %in% c(". ", "(. ")] <- ""
|
||||||
mo_translate(result, language = language)
|
mo_translate(result, language = language)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -224,6 +224,7 @@ mo_translate <- function(x, language) {
|
|||||||
language == "de" ~ x %>%
|
language == "de" ~ x %>%
|
||||||
gsub("Coagulase Negative Staphylococcus","Koagulase-negative Staphylococcus", ., fixed = TRUE) %>%
|
gsub("Coagulase Negative Staphylococcus","Koagulase-negative Staphylococcus", ., fixed = TRUE) %>%
|
||||||
gsub("Coagulase Positive Staphylococcus","Koagulase-positive Staphylococcus", ., fixed = TRUE) %>%
|
gsub("Coagulase Positive Staphylococcus","Koagulase-positive Staphylococcus", ., fixed = TRUE) %>%
|
||||||
|
gsub("Beta-haemolytic Streptococcus", "Beta-h\u00e4molytischer Streptococcus", ., fixed = TRUE) %>%
|
||||||
gsub("(no MO)", "(kein MO)", ., fixed = TRUE) %>%
|
gsub("(no MO)", "(kein MO)", ., fixed = TRUE) %>%
|
||||||
gsub("Negative rods", "Negative St\u00e4bchen", ., fixed = TRUE) %>%
|
gsub("Negative rods", "Negative St\u00e4bchen", ., fixed = TRUE) %>%
|
||||||
gsub("Negative cocci", "Negative Kokken", ., fixed = TRUE) %>%
|
gsub("Negative cocci", "Negative Kokken", ., fixed = TRUE) %>%
|
||||||
@ -244,6 +245,7 @@ mo_translate <- function(x, language) {
|
|||||||
language == "nl" ~ x %>%
|
language == "nl" ~ x %>%
|
||||||
gsub("Coagulase Negative Staphylococcus","Coagulase-negatieve Staphylococcus", ., fixed = TRUE) %>%
|
gsub("Coagulase Negative Staphylococcus","Coagulase-negatieve Staphylococcus", ., fixed = TRUE) %>%
|
||||||
gsub("Coagulase Positive Staphylococcus","Coagulase-positieve Staphylococcus", ., fixed = TRUE) %>%
|
gsub("Coagulase Positive Staphylococcus","Coagulase-positieve Staphylococcus", ., fixed = TRUE) %>%
|
||||||
|
gsub("Beta-haemolytic Streptococcus", "Beta-hemolytische Streptococcus", ., fixed = TRUE) %>%
|
||||||
gsub("(no MO)", "(geen MO)", ., fixed = TRUE) %>%
|
gsub("(no MO)", "(geen MO)", ., fixed = TRUE) %>%
|
||||||
gsub("Negative rods", "Negatieve staven", ., fixed = TRUE) %>%
|
gsub("Negative rods", "Negatieve staven", ., fixed = TRUE) %>%
|
||||||
gsub("Negative cocci", "Negatieve kokken", ., fixed = TRUE) %>%
|
gsub("Negative cocci", "Negatieve kokken", ., fixed = TRUE) %>%
|
||||||
@ -264,6 +266,7 @@ mo_translate <- function(x, language) {
|
|||||||
language == "es" ~ x %>%
|
language == "es" ~ x %>%
|
||||||
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>%
|
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulasa negativo", ., fixed = TRUE) %>%
|
||||||
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>%
|
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>%
|
||||||
|
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
|
||||||
gsub("(no MO)", "(sin MO)", ., fixed = TRUE) %>%
|
gsub("(no MO)", "(sin MO)", ., fixed = TRUE) %>%
|
||||||
gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>%
|
gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>%
|
||||||
gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>%
|
gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>%
|
||||||
@ -284,6 +287,7 @@ mo_translate <- function(x, language) {
|
|||||||
language == "pt" ~ x %>%
|
language == "pt" ~ x %>%
|
||||||
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulase negativo", ., fixed = TRUE) %>%
|
gsub("Coagulase Negative Staphylococcus","Staphylococcus coagulase negativo", ., fixed = TRUE) %>%
|
||||||
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulase positivo", ., fixed = TRUE) %>%
|
gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulase positivo", ., fixed = TRUE) %>%
|
||||||
|
gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>%
|
||||||
gsub("(no MO)", "(sem MO)", ., fixed = TRUE) %>%
|
gsub("(no MO)", "(sem MO)", ., fixed = TRUE) %>%
|
||||||
gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>%
|
gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>%
|
||||||
gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>%
|
gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>%
|
||||||
|
@ -388,7 +388,7 @@ antibiotics # A tibble: 423 x 18
|
|||||||
|
|
||||||
# Dataset with bacteria codes and properties like gram stain and
|
# Dataset with bacteria codes and properties like gram stain and
|
||||||
# aerobic/anaerobic
|
# aerobic/anaerobic
|
||||||
microorganisms # A tibble: 2,669 x 10
|
microorganisms # A tibble: 2,630 x 10
|
||||||
```
|
```
|
||||||
|
|
||||||
## Copyright
|
## Copyright
|
||||||
|
Binary file not shown.
@ -4,7 +4,7 @@
|
|||||||
\name{microorganisms}
|
\name{microorganisms}
|
||||||
\alias{microorganisms}
|
\alias{microorganisms}
|
||||||
\title{Data set with human pathogenic microorganisms}
|
\title{Data set with human pathogenic microorganisms}
|
||||||
\format{A \code{\link{tibble}} with 2,669 observations and 10 variables:
|
\format{A \code{\link{tibble}} with 2,630 observations and 10 variables:
|
||||||
\describe{
|
\describe{
|
||||||
\item{\code{mo}}{ID of microorganism}
|
\item{\code{mo}}{ID of microorganism}
|
||||||
\item{\code{bactsys}}{Bactsyscode of microorganism}
|
\item{\code{bactsys}}{Bactsyscode of microorganism}
|
||||||
@ -21,7 +21,7 @@
|
|||||||
microorganisms
|
microorganisms
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
A data set containing 2,669 (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
A data set containing 2,630 (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}.
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{guess_mo}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
|
\code{\link{guess_mo}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
|
||||||
|
@ -4,9 +4,7 @@ test_that("as.mo works", {
|
|||||||
|
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
MOs <- AMR::microorganisms %>% filter(!is.na(mo))
|
MOs <- AMR::microorganisms %>% filter(!is.na(mo))
|
||||||
|
|
||||||
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo)))
|
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo)))
|
||||||
expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en"))
|
|
||||||
|
|
||||||
expect_identical(
|
expect_identical(
|
||||||
as.character(as.mo(c("E. coli", "H. influenzae"))),
|
as.character(as.mo(c("E. coli", "H. influenzae"))),
|
||||||
@ -21,6 +19,9 @@ test_that("as.mo works", {
|
|||||||
expect_equal(as.character(as.mo("K. pneu rhino")), "KLEPNERH") # K. pneumoniae subspp. rhinoscleromatis
|
expect_equal(as.character(as.mo("K. pneu rhino")), "KLEPNERH") # K. pneumoniae subspp. rhinoscleromatis
|
||||||
expect_equal(as.character(as.mo("Bartonella")), "BAR")
|
expect_equal(as.character(as.mo("Bartonella")), "BAR")
|
||||||
expect_equal(as.character(as.mo("C. difficile")), "CLODIF")
|
expect_equal(as.character(as.mo("C. difficile")), "CLODIF")
|
||||||
|
expect_equal(as.character(as.mo("L. pneumophila")), "LEGPNE")
|
||||||
|
expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON")
|
||||||
|
expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE")
|
||||||
|
|
||||||
expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes
|
expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes
|
||||||
|
|
||||||
|
@ -22,4 +22,27 @@ test_that("mo_property works", {
|
|||||||
expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven")
|
expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven")
|
||||||
|
|
||||||
expect_error(mo_type("E. coli", language = "INVALID"))
|
expect_error(mo_type("E. coli", language = "INVALID"))
|
||||||
|
|
||||||
|
# test integrity
|
||||||
|
library(dplyr)
|
||||||
|
MOs <- AMR::microorganisms %>% filter(!is.na(mo))
|
||||||
|
expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en"))
|
||||||
|
|
||||||
|
mo_clean <- MOs$mo
|
||||||
|
mo_from_shortname <- as.mo(mo_shortname(mo_clean))
|
||||||
|
mo_clean <- mo_clean[nchar(mo_from_shortname) == 6 &
|
||||||
|
!is.na(mo_from_shortname) &
|
||||||
|
!mo_from_shortname %like% "...SPP"]
|
||||||
|
mo_from_shortname <- mo_from_shortname[nchar(mo_from_shortname) == 6 &
|
||||||
|
!is.na(mo_from_shortname) &
|
||||||
|
!mo_from_shortname %like% "...SPP"]
|
||||||
|
tb <- tibble(a = substr(mo_clean, 1, 6),
|
||||||
|
b = mo_from_shortname,
|
||||||
|
c = a == b,
|
||||||
|
d = mo_shortname(a),
|
||||||
|
e = mo_shortname(b),
|
||||||
|
f = d == e)
|
||||||
|
expect_gt(sum(tb$c) / nrow(tb), 0.9) # more than 90% of MO code should be identical
|
||||||
|
expect_identical(sum(tb$f), nrow(tb)) # all shortnames should be identical
|
||||||
|
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user