1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 06:02:01 +02:00

algoritm improvement, removed all Catabacter except for C. hongkongensis

This commit is contained in:
2018-09-10 11:40:54 +02:00
parent 4816419f0c
commit b83e6a9380
9 changed files with 56 additions and 11 deletions

21
R/mo.R
View File

@ -131,7 +131,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
# remove 'empty' genus and species values
x <- gsub("(no MO)", "", x, fixed = TRUE)
# remove dots and other non-text in case of "E. coli" except spaces
x <- gsub("[^a-zA-Z0-9 ]+", "", x)
x <- gsub("[^a-zA-Z0-9/ \\-]+", "", x)
# but spaces before and after should be omitted
x <- trimws(x, which = "both")
x_trimmed <- x
@ -146,6 +146,12 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x_withspaces_start <- paste0('^', x_withspaces)
x_withspaces <- paste0('^', x_withspaces, '$')
# print(x)
# print(x_withspaces_all)
# print(x_withspaces_start)
# print(x_withspaces)
# print(x_backup)
for (i in 1:length(x)) {
if (identical(x_trimmed[i], "")) {
# empty values
@ -195,6 +201,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[i] <- 'PSEAER'
next
}
if (x_backup[i] %like% '^l.*pneum.*' & !x_backup[i] %like% '^l.*non.*pneum.*') {
# avoid detection of Legionella non pneumophila in case of Legionella pneumophila
x[i] <- 'LEGPNE'
next
}
# CoNS and CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]'
@ -250,6 +261,12 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
x[i] <- found[1L]
next
}
# try the same, now based on genus + species
found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo
if (length(found) > 0) {
x[i] <- found[1L]
next
}
# try any match keeping spaces, not ending with $
found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo
if (length(found) > 0) {
@ -329,7 +346,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0) {
warning("These values could not be coerced to a valid mo: ",
warning("These ", length(failures) , " values could not be coerced to a valid mo: ",
paste('"', unique(failures), '"', sep = "", collapse = ', '),
".",
call. = FALSE)