mirror of
https://github.com/msberends/AMR.git
synced 2025-01-13 13:31:37 +01:00
better as.mo handling
This commit is contained in:
parent
5cfa5bbfe3
commit
60fe98bbbd
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 0.5.0
|
Version: 0.5.0.9001
|
||||||
Date: 2018-12-01
|
Date: 2018-12-05
|
||||||
Title: Antimicrobial Resistance Analysis
|
Title: Antimicrobial Resistance Analysis
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person(
|
person(
|
||||||
@ -39,8 +39,8 @@ Authors@R: c(
|
|||||||
role = "ths",
|
role = "ths",
|
||||||
comment = c(ORCID = "0000-0003-1634-0010")))
|
comment = c(ORCID = "0000-0003-1634-0010")))
|
||||||
Description: Functions to simplify the analysis and prediction of Antimicrobial
|
Description: Functions to simplify the analysis and prediction of Antimicrobial
|
||||||
Resistance (AMR) to work with microbial and antimicrobial properties by using
|
Resistance (AMR) and to work with microbial and antimicrobial properties by
|
||||||
evidence-based methods.
|
using evidence-based methods.
|
||||||
Depends:
|
Depends:
|
||||||
R (>= 3.1.0)
|
R (>= 3.1.0)
|
||||||
Imports:
|
Imports:
|
||||||
|
@ -94,6 +94,7 @@ export(mdro)
|
|||||||
export(mo_TSN)
|
export(mo_TSN)
|
||||||
export(mo_authors)
|
export(mo_authors)
|
||||||
export(mo_class)
|
export(mo_class)
|
||||||
|
export(mo_failures)
|
||||||
export(mo_family)
|
export(mo_family)
|
||||||
export(mo_fullname)
|
export(mo_fullname)
|
||||||
export(mo_genus)
|
export(mo_genus)
|
||||||
@ -103,6 +104,7 @@ export(mo_order)
|
|||||||
export(mo_phylum)
|
export(mo_phylum)
|
||||||
export(mo_property)
|
export(mo_property)
|
||||||
export(mo_ref)
|
export(mo_ref)
|
||||||
|
export(mo_renamed)
|
||||||
export(mo_shortname)
|
export(mo_shortname)
|
||||||
export(mo_species)
|
export(mo_species)
|
||||||
export(mo_subkingdom)
|
export(mo_subkingdom)
|
||||||
@ -175,6 +177,7 @@ importFrom(crayon,blue)
|
|||||||
importFrom(crayon,bold)
|
importFrom(crayon,bold)
|
||||||
importFrom(crayon,green)
|
importFrom(crayon,green)
|
||||||
importFrom(crayon,italic)
|
importFrom(crayon,italic)
|
||||||
|
importFrom(crayon,magenta)
|
||||||
importFrom(crayon,red)
|
importFrom(crayon,red)
|
||||||
importFrom(crayon,silver)
|
importFrom(crayon,silver)
|
||||||
importFrom(crayon,strip_style)
|
importFrom(crayon,strip_style)
|
||||||
|
18
NEWS.md
18
NEWS.md
@ -1,5 +1,19 @@
|
|||||||
# 0.5.0
|
# 0.5.0.90xx (latest development version)
|
||||||
**Published on CRAN: 2018-12-01**
|
|
||||||
|
#### New
|
||||||
|
* Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values.
|
||||||
|
|
||||||
|
#### Changed
|
||||||
|
* Improvements for `as.mo`:
|
||||||
|
* Finds better results when input is in other languages
|
||||||
|
* Better handling for subspecies
|
||||||
|
* Better handling for *Salmonellae*
|
||||||
|
* Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 0.5.0 (latest stable release)
|
||||||
|
**Published on CRAN: 2018-11-30**
|
||||||
|
|
||||||
#### New
|
#### New
|
||||||
* Repository moved to GitLab: https://gitlab.com/msberends/AMR
|
* Repository moved to GitLab: https://gitlab.com/msberends/AMR
|
||||||
|
@ -164,8 +164,8 @@ first_isolate <- function(tbl,
|
|||||||
message("NOTE: Using column `", col_date, "` as input for `col_date`.")
|
message("NOTE: Using column `", col_date, "` as input for `col_date`.")
|
||||||
}
|
}
|
||||||
# -- patient id
|
# -- patient id
|
||||||
if (is.null(col_patient_id) & any(colnames(tbl) %like% "^patient")) {
|
if (is.null(col_patient_id) & any(colnames(tbl) %like% "^(patient|patid)")) {
|
||||||
col_patient_id <- colnames(tbl)[colnames(tbl) %like% "^patient"][1]
|
col_patient_id <- colnames(tbl)[colnames(tbl) %like% "^(patient|patid)"][1]
|
||||||
message("NOTE: Using column `", col_patient_id, "` as input for `col_patient_id`.")
|
message("NOTE: Using column `", col_patient_id, "` as input for `col_patient_id`.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
174
R/mo.R
174
R/mo.R
@ -137,10 +137,10 @@
|
|||||||
#' mutate(mo = as.mo(paste(genus, species)))
|
#' mutate(mo = as.mo(paste(genus, species)))
|
||||||
#' }
|
#' }
|
||||||
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
|
as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL) {
|
||||||
structure(mo_validate(x = x, property = "mo",
|
mo <- mo_validate(x = x, property = "mo",
|
||||||
Becker = Becker, Lancefield = Lancefield,
|
Becker = Becker, Lancefield = Lancefield,
|
||||||
allow_uncertain = allow_uncertain, reference_df = reference_df),
|
allow_uncertain = allow_uncertain, reference_df = reference_df)
|
||||||
class = "mo")
|
structure(.Data = mo, class = "mo")
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @rdname as.mo
|
#' @rdname as.mo
|
||||||
@ -155,9 +155,12 @@ is.mo <- function(x) {
|
|||||||
#' @export
|
#' @export
|
||||||
guess_mo <- as.mo
|
guess_mo <- as.mo
|
||||||
|
|
||||||
#' @importFrom dplyr %>% pull left_join
|
#' @importFrom dplyr %>% pull left_join n_distinct
|
||||||
#' @importFrom data.table data.table as.data.table setkey
|
#' @importFrom data.table data.table as.data.table setkey
|
||||||
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE, reference_df = NULL, property = "mo") {
|
#' @importFrom crayon magenta red italic
|
||||||
|
exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
|
||||||
|
allow_uncertain = FALSE, reference_df = NULL,
|
||||||
|
property = "mo", clear_options = TRUE) {
|
||||||
|
|
||||||
if (!"AMR" %in% base::.packages()) {
|
if (!"AMR" %in% base::.packages()) {
|
||||||
library("AMR")
|
library("AMR")
|
||||||
@ -168,6 +171,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (clear_options == TRUE) {
|
||||||
|
options(mo_failures = NULL)
|
||||||
|
options(mo_renamed = NULL)
|
||||||
|
}
|
||||||
|
|
||||||
if (NCOL(x) == 2) {
|
if (NCOL(x) == 2) {
|
||||||
# support tidyverse selection like: df %>% select(colA, colB)
|
# support tidyverse selection like: df %>% select(colA, colB)
|
||||||
# paste these columns together
|
# paste these columns together
|
||||||
@ -231,10 +239,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x_backup <- trimws(x, which = "both")
|
x_backup <- trimws(x, which = "both")
|
||||||
|
|
||||||
# remove spp and species
|
# remove spp and species
|
||||||
x <- gsub(" +(spp.?|species)", "", x_backup)
|
x <- trimws(gsub(" +(spp.?|ssp.?|subsp.?|species)", " ", x_backup, ignore.case = TRUE), which = "both")
|
||||||
x_species <- paste(x, "species")
|
x_species <- paste(x, "species")
|
||||||
# translate to English for supported languages of mo_property
|
# translate to English for supported languages of mo_property
|
||||||
x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x)
|
x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x, ignore.case = TRUE)
|
||||||
# remove 'empty' genus and species values
|
# remove 'empty' genus and species values
|
||||||
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
x <- gsub("(no MO)", "", x, fixed = TRUE)
|
||||||
# remove non-text in case of "E. coli" except dots and spaces
|
# remove non-text in case of "E. coli" except dots and spaces
|
||||||
@ -244,6 +252,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x <- trimws(x, which = "both")
|
x <- trimws(x, which = "both")
|
||||||
x_trimmed <- x
|
x_trimmed <- x
|
||||||
x_trimmed_species <- paste(x_trimmed, "species")
|
x_trimmed_species <- paste(x_trimmed, "species")
|
||||||
|
x_trimmed_without_group <- gsub(" group$", "", x_trimmed, ignore.case = TRUE)
|
||||||
|
# remove last part from "-" or "/"
|
||||||
|
x_trimmed_without_group <- gsub("(.*)[-/].*", "\\1", x_trimmed_without_group)
|
||||||
# replace space and dot by regex sign
|
# replace space and dot by regex sign
|
||||||
x_withspaces <- gsub("[ .]+", ".* ", x)
|
x_withspaces <- gsub("[ .]+", ".* ", x)
|
||||||
x <- gsub("[ .]+", ".*", x)
|
x <- gsub("[ .]+", ".*", x)
|
||||||
@ -252,13 +263,14 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x_withspaces_start <- paste0('^', x_withspaces)
|
x_withspaces_start <- paste0('^', x_withspaces)
|
||||||
x_withspaces <- paste0('^', x_withspaces, '$')
|
x_withspaces <- paste0('^', x_withspaces, '$')
|
||||||
|
|
||||||
# cat(paste0('x "', x, '"\n'))
|
# cat(paste0('x "', x, '"\n'))
|
||||||
# cat(paste0('x_species "', x_species, '"\n'))
|
# cat(paste0('x_species "', x_species, '"\n'))
|
||||||
# cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n'))
|
# cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n'))
|
||||||
# cat(paste0('x_withspaces "', x_withspaces, '"\n'))
|
# cat(paste0('x_withspaces "', x_withspaces, '"\n'))
|
||||||
# cat(paste0('x_backup "', x_backup, '"\n'))
|
# cat(paste0('x_backup "', x_backup, '"\n'))
|
||||||
# cat(paste0('x_trimmed "', x_trimmed, '"\n'))
|
# cat(paste0('x_trimmed "', x_trimmed, '"\n'))
|
||||||
# cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
|
# cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n'))
|
||||||
|
# cat(paste0('x_trimmed_without_group "', x_trimmed_without_group, '"\n'))
|
||||||
|
|
||||||
for (i in 1:length(x)) {
|
for (i in 1:length(x)) {
|
||||||
if (identical(x_trimmed[i], "")) {
|
if (identical(x_trimmed[i], "")) {
|
||||||
@ -302,7 +314,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
|
x[i] <- microorganismsDT[mo == 'B_STPHY_EPI', ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
if (toupper(x_trimmed[i]) == 'VRE') {
|
if (toupper(x_trimmed[i]) == "VRE"
|
||||||
|
| x_trimmed[i] %like% '(enterococci|enterokok|enterococo)[a-z]*?$') {
|
||||||
x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
|
x[i] <- microorganismsDT[mo == 'B_ENTRC', ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
@ -323,7 +336,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
next
|
next
|
||||||
}
|
}
|
||||||
if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') {
|
if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') {
|
||||||
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]), ..property][[1]][1L]
|
# Streptococci, like GBS = Group B Streptococci (B_STRPTC_GRB)
|
||||||
|
x[i] <- microorganismsDT[mo == gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||||
|
next
|
||||||
|
}
|
||||||
|
if (toupper(x_trimmed[i]) %like% '(streptococc|streptokok).* [ABCDFGHK]$') {
|
||||||
|
# Streptococci in different languages, like "estreptococos grupo B"
|
||||||
|
x[i] <- microorganismsDT[mo == gsub(".*(streptococ|streptokok|estreptococ).* ([ABCDFGHK])$", "B_STRPTC_GR\\2", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||||
|
next
|
||||||
|
}
|
||||||
|
if (toupper(x_trimmed[i]) %like% 'group [ABCDFGHK] (streptococ|streptokok|estreptococ)') {
|
||||||
|
# Streptococci in different languages, like "Group A Streptococci"
|
||||||
|
x[i] <- microorganismsDT[mo == gsub(".*group ([ABCDFGHK]) (streptococ|streptokok|estreptococ).*", "B_STRPTC_GR\\1", x_trimmed[i], ignore.case = TRUE), ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
|
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ----
|
||||||
@ -341,18 +365,24 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
|
x[i] <- microorganismsDT[mo == 'B_STPHY_CPS', ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
if (tolower(x[i]) %like% '^gram[ -]+nega.*'
|
if (tolower(x[i]) %like% 'gram[ -]?neg.*'
|
||||||
| tolower(x_trimmed[i]) %like% '^gram[ -]+nega.*') {
|
| tolower(x_trimmed[i]) %like% 'gram[ -]?neg.*') {
|
||||||
# coerce S. coagulase positive
|
# coerce S. coagulase positive
|
||||||
x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
|
x[i] <- microorganismsDT[mo == 'B_GRAMN', ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
if (tolower(x[i]) %like% '^gram[ -]+posi.*'
|
if (tolower(x[i]) %like% 'gram[ -]?pos.*'
|
||||||
| tolower(x_trimmed[i]) %like% '^gram[ -]+posi.*') {
|
| tolower(x_trimmed[i]) %like% 'gram[ -]?pos.*') {
|
||||||
# coerce S. coagulase positive
|
# coerce S. coagulase positive
|
||||||
x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
|
x[i] <- microorganismsDT[mo == 'B_GRAMP', ..property][[1]][1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
if (grepl("[sS]almonella [A-Z][a-z]+ ?.*", x_trimmed[i])) {
|
||||||
|
# Salmonella with capital letter species like "Salmonella Goettingen" - they're all S. enterica
|
||||||
|
x[i] <- microorganismsDT[mo == 'B_SLMNL_ENT', ..property][[1]][1L]
|
||||||
|
base::message(magenta(paste0("Note: ", italic(x_trimmed[i]), " is a subspecies of ", italic("Salmonella enterica"), " (B_SLMNL_ENT)")))
|
||||||
|
next
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# FIRST TRY FULLNAMES AND CODES
|
# FIRST TRY FULLNAMES AND CODES
|
||||||
@ -424,6 +454,12 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
found <- microorganisms.prevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
|
||||||
|
if (length(found) > 0) {
|
||||||
|
x[i] <- found[1L]
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# try any match keeping spaces ----
|
# try any match keeping spaces ----
|
||||||
found <- microorganisms.prevDT[fullname %like% x_withspaces[i], ..property][[1]]
|
found <- microorganisms.prevDT[fullname %like% x_withspaces[i], ..property][[1]]
|
||||||
@ -495,28 +531,29 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
found <- microorganisms.unprevDT[tolower(fullname) == tolower(x_trimmed_without_group[i]), ..property][[1]]
|
||||||
|
if (length(found) > 0) {
|
||||||
|
x[i] <- found[1L]
|
||||||
|
next
|
||||||
|
}
|
||||||
# try any match keeping spaces ----
|
# try any match keeping spaces ----
|
||||||
found <- microorganisms.unprevDT[fullname %like% x_withspaces[i], ..property][[1]]
|
found <- microorganisms.unprevDT[fullname %like% x_withspaces[i], ..property][[1]]
|
||||||
if (length(found) > 0) {
|
if (length(found) > 0) {
|
||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# try any match keeping spaces, not ending with $ ----
|
# try any match keeping spaces, not ending with $ ----
|
||||||
found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
|
found <- microorganisms.unprevDT[fullname %like% x_withspaces_start[i], ..property][[1]]
|
||||||
if (length(found) > 0) {
|
if (length(found) > 0) {
|
||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# try any match diregarding spaces ----
|
# try any match diregarding spaces ----
|
||||||
found <- microorganisms.unprevDT[fullname %like% x[i], ..property][[1]]
|
found <- microorganisms.unprevDT[fullname %like% x[i], ..property][[1]]
|
||||||
if (length(found) > 0) {
|
if (length(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||||
x[i] <- found[1L]
|
x[i] <- found[1L]
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# try splitting of characters in the middle and then find ID ----
|
# try splitting of characters in the middle and then find ID ----
|
||||||
# only when text length is 6 or lower
|
# only when text length is 6 or lower
|
||||||
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
|
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus
|
||||||
@ -568,15 +605,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
found <- microorganisms.oldDT[name %like% x_withspaces[i]
|
found <- microorganisms.oldDT[name %like% x_withspaces[i]
|
||||||
| name %like% x_withspaces_start[i]
|
| name %like% x_withspaces_start[i]
|
||||||
| name %like% x[i],]
|
| name %like% x[i],]
|
||||||
if (NROW(found) > 0) {
|
if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||||
if (property == "ref") {
|
if (property == "ref") {
|
||||||
x[i] <- found[1, ref]
|
x[i] <- found[1, ref]
|
||||||
} else {
|
} else {
|
||||||
x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
|
x[i] <- microorganismsDT[tsn == found[1, tsn_new], ..property][[1]]
|
||||||
}
|
}
|
||||||
warning("Uncertain interpretation: '",
|
warning(red(paste0("UNCERTAIN - '",
|
||||||
x_backup[i], "' -> '", found[1, name], "'",
|
x_backup[i], "' -> ", italic(found[1, name]))),
|
||||||
call. = FALSE, immediate. = TRUE)
|
call. = FALSE, immediate. = TRUE)
|
||||||
renamed_note(name_old = found[1, name],
|
renamed_note(name_old = found[1, name],
|
||||||
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
|
name_new = microorganismsDT[tsn == found[1, tsn_new], fullname],
|
||||||
ref_old = found[1, ref],
|
ref_old = found[1, ref],
|
||||||
@ -584,14 +621,38 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
# (2) try to strip off one element and check the remains
|
# (2) strip values between brackets ----
|
||||||
x_strip <- x_backup[i] %>% strsplit(" ") %>% unlist()
|
found <- microorganismsDT[fullname %like% gsub("( [(].*[)]) ", " ", x_withspaces[i])
|
||||||
x_strip <- x_strip[1:length(x_strip) - 1]
|
| fullname %like% gsub("( [(].*[)]) ", " ", x_backup[i])
|
||||||
x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip)))
|
| fullname %like% gsub("( [(].*[)]) ", " ", x[i]),]
|
||||||
|
if (NROW(found) > 0 & nchar(x_trimmed[i]) >= 6) {
|
||||||
|
x[i] <- found[1, ..property][[1]]
|
||||||
|
warning(red(paste0("UNCERTAIN - '",
|
||||||
|
x_backup[i], "' -> ", italic(found[1, fullname][[1]]), " (", found[1, mo][[1]], ")")),
|
||||||
|
call. = FALSE, immediate. = TRUE)
|
||||||
|
next
|
||||||
|
}
|
||||||
|
|
||||||
|
# (3) try to strip off one element and check the remains ----
|
||||||
|
look_for_part <- function(z) {
|
||||||
|
x_strip <- z %>% strsplit(" ") %>% unlist()
|
||||||
|
if (length(x_strip) > 1 & nchar(x_trimmed[i]) >= 6) {
|
||||||
|
for (i in 1:(length(x_strip) - 1)) {
|
||||||
|
x_strip_collapsed <- paste(x_strip[1:(length(x_strip) - i)], collapse = " ")
|
||||||
|
found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, clear_options = FALSE)))
|
||||||
|
if (!is.na(found)) {
|
||||||
|
found <- microorganismsDT[mo == found, ..property][[1]]
|
||||||
|
warning(red(paste0("UNCERTAIN - '",
|
||||||
|
z, "' -> ", italic(microorganismsDT[mo == found[1L], fullname][[1]]), " (", found[1L], ")")),
|
||||||
|
call. = FALSE, immediate. = TRUE)
|
||||||
|
return(found[1L])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(NA_character_)
|
||||||
|
}
|
||||||
|
x[i] <- look_for_part(x_backup[i])
|
||||||
if (!is.na(x[i])) {
|
if (!is.na(x[i])) {
|
||||||
warning("Uncertain interpretation: '",
|
|
||||||
x_backup[i], "' -> '", microorganismsDT[mo == x[i], fullname], "' (", x[i], ")",
|
|
||||||
call. = FALSE, immediate. = TRUE)
|
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -605,10 +666,16 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
|
|
||||||
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
failures <- failures[!failures %in% c(NA, NULL, NaN)]
|
||||||
if (length(failures) > 0) {
|
if (length(failures) > 0) {
|
||||||
warning("These ", length(failures) , " values could not be coerced to a valid MO code: ",
|
options(mo_failures = sort(unique(failures)))
|
||||||
paste('"', unique(failures), '"', sep = "", collapse = ', '),
|
if (n_distinct(failures) > 25) {
|
||||||
".",
|
warning(n_distinct(failures), " different values could not be coerced to a valid MO code. See mo_failures() to review them.",
|
||||||
call. = FALSE)
|
call. = FALSE)
|
||||||
|
} else {
|
||||||
|
warning("These ", length(failures) , " values could not be coerced to a valid MO code: ",
|
||||||
|
paste('"', unique(failures), '"', sep = "", collapse = ', '),
|
||||||
|
". See mo_failures() to review them.",
|
||||||
|
call. = FALSE)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Becker ----
|
# Becker ----
|
||||||
@ -687,6 +754,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
|
|||||||
x
|
x
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @importFrom crayon blue
|
||||||
renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
|
renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
|
||||||
if (!is.na(ref_old)) {
|
if (!is.na(ref_old)) {
|
||||||
ref_old <- paste0(" (", ref_old, ")")
|
ref_old <- paste0(" (", ref_old, ")")
|
||||||
@ -698,7 +766,11 @@ renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
|
|||||||
} else {
|
} else {
|
||||||
ref_new <- ""
|
ref_new <- ""
|
||||||
}
|
}
|
||||||
base::message(paste0("Note: '", name_old, "'", ref_old, " was renamed '", name_new, "'", ref_new))
|
msg <- paste0("'", name_old, "'", ref_old, " was renamed '", name_new, "'", ref_new)
|
||||||
|
msg_plain <- paste0(name_old, ref_old, " -> ", name_new, ref_new)
|
||||||
|
msg_plain <- c(getOption("mo_renamed", character(0)), msg_plain)
|
||||||
|
options(mo_renamed = sort(msg_plain))
|
||||||
|
base::message(blue(paste("Note:", msg)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @exportMethod print.mo
|
#' @exportMethod print.mo
|
||||||
@ -733,3 +805,21 @@ as.data.frame.mo <- function (x, ...) {
|
|||||||
pull.mo <- function(.data, ...) {
|
pull.mo <- function(.data, ...) {
|
||||||
pull(as.data.frame(.data), ...)
|
pull(as.data.frame(.data), ...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' Vector of failed coercion attempts
|
||||||
|
#'
|
||||||
|
#' Returns a vector of all failed attempts to coerce values to a valid MO code with \code{\link{as.mo}}.
|
||||||
|
#' @seealso \code{\link{as.mo}}
|
||||||
|
#' @export
|
||||||
|
mo_failures <- function() {
|
||||||
|
getOption("mo_failures")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Vector of taxonomic renamed items
|
||||||
|
#'
|
||||||
|
#' Returns a vector of all renamed items of the last coercion to valid MO codes with \code{\link{as.mo}}.
|
||||||
|
#' @seealso \code{\link{as.mo}}
|
||||||
|
#' @export
|
||||||
|
mo_renamed <- function() {
|
||||||
|
getOption("mo_renamed")
|
||||||
|
}
|
||||||
|
@ -461,6 +461,15 @@ mo_validate <- function(x, property, ...) {
|
|||||||
Lancefield <- FALSE
|
Lancefield <- FALSE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!"AMR" %in% base::.packages()) {
|
||||||
|
library("AMR")
|
||||||
|
# These data.tables are available as data sets when the AMR package is loaded:
|
||||||
|
# microorganismsDT # this one is sorted by kingdom (B<F<P), prevalence, TSN
|
||||||
|
# microorganisms.prevDT # same as microorganismsDT, but with prevalence != 9999
|
||||||
|
# microorganisms.unprevDT # same as microorganismsDT, but with prevalence == 9999
|
||||||
|
# microorganisms.oldDT # old taxonomic names, sorted by name (genus+species), TSN
|
||||||
|
}
|
||||||
|
|
||||||
if (!all(x %in% microorganismsDT[[property]])
|
if (!all(x %in% microorganismsDT[[property]])
|
||||||
| Becker %in% c(TRUE, "all")
|
| Becker %in% c(TRUE, "all")
|
||||||
| Lancefield %in% c(TRUE, "all")) {
|
| Lancefield %in% c(TRUE, "all")) {
|
||||||
|
14
man/mo_failures.Rd
Normal file
14
man/mo_failures.Rd
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mo.R
|
||||||
|
\name{mo_failures}
|
||||||
|
\alias{mo_failures}
|
||||||
|
\title{Vector of failed coercion attempts}
|
||||||
|
\usage{
|
||||||
|
mo_failures()
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Returns a vector of all failed attempts to coerce values to a valid MO code with \code{\link{as.mo}}.
|
||||||
|
}
|
||||||
|
\seealso{
|
||||||
|
\code{\link{as.mo}}
|
||||||
|
}
|
14
man/mo_renamed.Rd
Normal file
14
man/mo_renamed.Rd
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/mo.R
|
||||||
|
\name{mo_renamed}
|
||||||
|
\alias{mo_renamed}
|
||||||
|
\title{Vector of taxonomic renamed items}
|
||||||
|
\usage{
|
||||||
|
mo_renamed()
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Returns a vector of all renamed items of the last coercion to valid MO codes with \code{\link{as.mo}}.
|
||||||
|
}
|
||||||
|
\seealso{
|
||||||
|
\code{\link{as.mo}}
|
||||||
|
}
|
@ -210,4 +210,14 @@ test_that("as.mo works", {
|
|||||||
c("PRTMIR", "bclcer", "B_ESCHR_COL"))),
|
c("PRTMIR", "bclcer", "B_ESCHR_COL"))),
|
||||||
c("B_PROTS_MIR", "B_BCLLS_CER", "B_ESCHR_COL"))
|
c("B_PROTS_MIR", "B_BCLLS_CER", "B_ESCHR_COL"))
|
||||||
|
|
||||||
|
# hard to find
|
||||||
|
expect_equal(as.character(suppressWarnings(as.mo(
|
||||||
|
c("Microbacterium paraoxidans",
|
||||||
|
"Streptococcus suis (bovis gr)",
|
||||||
|
"Raoultella (here some text) terrigena"), allow_uncertain = TRUE))),
|
||||||
|
c("B_MCRBC", "B_STRPTC_SUI", "B_RLTLL_TER"))
|
||||||
|
|
||||||
|
# Salmonella (City) are all actually Salmonella enterica spp (City)
|
||||||
|
expect_equal(as.character(suppressMessages(as.mo("Salmonella Goettingen", allow_uncertain = TRUE))),
|
||||||
|
"B_SLMNL_ENT")
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user