1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-24 11:44:35 +01:00

(v1.2.0.9022) as.ab() improvement

This commit is contained in:
dr. M.S. (Matthijs) Berends 2020-07-01 16:21:36 +02:00
parent afc660dc33
commit 298e67a45b
15 changed files with 84 additions and 29 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.2.0.9021
Version: 1.2.0.9022
Date: 2020-07-01
Title: Antimicrobial Resistance Analysis
Authors@R: c(

11
NEWS.md
View File

@ -1,4 +1,4 @@
# AMR 1.2.0.9021
# AMR 1.2.0.9022
## <small>Last updated: 01-Jul-2020</small>
### New
@ -20,16 +20,19 @@
### Changed
* Using unexisting columns in all `count_*()`, `proportion_*()`, `susceptibility()` and `resistance()` functions wil now return an error instead of dropping them silently
* Improvements for `as.ab()`:
* Dramatic improvement of the algorithm behind `as.ab()`, making many more input errors translatable like from digitalised health care records, using too few or too many vowels or consonants and many more
* Added progress bar
* Fixed a bug where `as.ab()` would return an error on invalid input values
* The `as.ab()` function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.
* Fixed a bug where `eucast_rules()` would not work on a tibble when the `tibble` or `dplyr` package was loaded
* All `*_join_microorganisms()` functions and `bug_drug_combinations()` now return the original data class (e.g. `tibble`s and `data.table`s)
* Fixed a bug where `as.ab()` would return an error on invalid input values
* Fixed a bug for using grouped versions of `rsi_df()`, `proportion_df()` and `count_df()`, and fixed a bug where not all different antimicrobial results were added as rows
* Improved auto-determination for columns of types `<mo>` and `<Date>`
* Fixed a bug in `bug_drug_combinations()` for when only one antibiotic was in the input data
* Changed the summary for class `<mo>`, to highlight the %SI vs. %R
* Improved error handling, giving more useful info when functions return an error
* Algorithm improvements to `as.ab()`, many more misspellings are now translatable. The `as.ab()` function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.
* Added progress bar to `as.ab()`
# AMR 1.2.0

40
R/ab.R
View File

@ -29,6 +29,13 @@
#' @rdname as.ab
#' @inheritSection WHOCC WHOCC
#' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
#'
#' All these properties will be searched for the user input. The [as.ab()] can correct for different forms of misspelling:
#'
#' * Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.
#' * Too few or too many vowels or consonants
#' * Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)
#' * Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.
#'
#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples.
#'
@ -231,7 +238,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
# replace spaces and slashes with a possibility on both
x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling)
# correct for digital reading text (OCR)
x_spelling <- gsub("[NRD]", "[NRD]", x_spelling)
x_spelling <- gsub("[NRD8B]", "[NRD8B]", x_spelling)
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling)
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
}
# try if name starts with it
@ -246,6 +255,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# and try if any synonym starts with it
synonym_found <- unlist(lapply(antibiotics$synonyms,
function(s) any(s %like% paste0("^", x_spelling))))
@ -254,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# INITIAL SEARCH - More uncertain results ----
if (initial_search == TRUE) {
@ -341,7 +351,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE))
if (!is.na(found) && !ab_group(found, initial_search = FALSE) %like% "cephalosporins") {
@ -365,7 +375,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
next
}
# make all vowels facultative
search_str <- gsub("([AEIOUY])", "\\1*", x[i])
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
@ -390,8 +400,28 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
next
}
# try with switched character, like "mreopenem"
for (j in seq_len(nchar(x[i]))) {
x_switched <- paste0(
# beginning part:
substr(x[i], 1, j - 1),
# here is the switching of 2 characters:
substr(x[i], j + 1, j + 1),
substr(x[i], j, j),
# ending part:
substr(x[i], j + 2, nchar(x[i])))
found <- suppressWarnings(as.ab(x_switched, initial_search = FALSE))
if (!is.na(found)) {
break
}
}
if (!is.na(found)) {
x_new[i] <- found[1L]
next
}
} # end of initial_search = TRUE
# not found
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
}

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -43,7 +43,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>
@ -229,9 +229,9 @@
<small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
</div>
<div id="amr-1209021" class="section level1">
<h1 class="page-header" data-toc-text="1.2.0.9021">
<a href="#amr-1209021" class="anchor"></a>AMR 1.2.0.9021<small> Unreleased </small>
<div id="amr-1209022" class="section level1">
<h1 class="page-header" data-toc-text="1.2.0.9022">
<a href="#amr-1209022" class="anchor"></a>AMR 1.2.0.9022<small> Unreleased </small>
</h1>
<div id="last-updated-01-jul-2020" class="section level2">
<h2 class="hasAnchor">
@ -263,18 +263,22 @@
<a href="#changed" class="anchor"></a>Changed</h3>
<ul>
<li>Using unexisting columns in all <code>count_*()</code>, <code>proportion_*()</code>, <code><a href="../reference/proportion.html">susceptibility()</a></code> and <code><a href="../reference/proportion.html">resistance()</a></code> functions wil now return an error instead of dropping them silently</li>
<li>Improvements for <code><a href="../reference/as.ab.html">as.ab()</a></code>:
<ul>
<li>Dramatic improvement of the algorithm behind <code><a href="../reference/as.ab.html">as.ab()</a></code>, making many more input errors translatable like from digitalised health care records, using too few or too many vowels or consonants and many more</li>
<li>Added progress bar</li>
<li>Fixed a bug where <code><a href="../reference/as.ab.html">as.ab()</a></code> would return an error on invalid input values</li>
<li>The <code><a href="../reference/as.ab.html">as.ab()</a></code> function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.</li>
</ul>
</li>
<li>Fixed a bug where <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> would not work on a tibble when the <code>tibble</code> or <code>dplyr</code> package was loaded</li>
<li>All <code>*_join_microorganisms()</code> functions and <code><a href="../reference/bug_drug_combinations.html">bug_drug_combinations()</a></code> now return the original data class (e.g. <code>tibble</code>s and <code>data.table</code>s)</li>
<li>Fixed a bug where <code><a href="../reference/as.ab.html">as.ab()</a></code> would return an error on invalid input values</li>
<li>Fixed a bug for using grouped versions of <code><a href="../reference/proportion.html">rsi_df()</a></code>, <code><a href="../reference/proportion.html">proportion_df()</a></code> and <code><a href="../reference/count.html">count_df()</a></code>, and fixed a bug where not all different antimicrobial results were added as rows</li>
<li>Improved auto-determination for columns of types <code>&lt;mo&gt;</code> and <code>&lt;Date&gt;</code>
</li>
<li>Fixed a bug in <code><a href="../reference/bug_drug_combinations.html">bug_drug_combinations()</a></code> for when only one antibiotic was in the input data</li>
<li>Changed the summary for class <code>&lt;mo&gt;</code>, to highlight the %SI vs. %R</li>
<li>Improved error handling, giving more useful info when functions return an error</li>
<li>Algorithm improvements to <code><a href="../reference/as.ab.html">as.ab()</a></code>, many more misspellings are now translatable. The <code><a href="../reference/as.ab.html">as.ab()</a></code> function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.</li>
<li>Added progress bar to <code><a href="../reference/as.ab.html">as.ab()</a></code>
</li>
</ul>
</div>
</div>

View File

@ -10,7 +10,7 @@ articles:
WHONET: WHONET.html
benchmarks: benchmarks.html
resistance_predict: resistance_predict.html
last_built: 2020-07-01T09:51Z
last_built: 2020-07-01T14:20Z
urls:
reference: https://msberends.gitlab.io/AMR/reference
article: https://msberends.gitlab.io/AMR/articles

View File

@ -82,7 +82,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9019</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>
@ -262,6 +262,13 @@
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>All entries in the <a href='antibiotics.html'>antibiotics</a> data set have three different identifiers: a human readable EARS-Net code (column <code>ab</code>, used by ECDC and WHONET), an ATC code (column <code>atc</code>, used by WHO), and a CID code (column <code>cid</code>, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.</p>
<p>All these properties will be searched for the user input. The <code>as.ab()</code> can correct for different forms of misspelling:</p><ul>
<li><p>Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.</p></li>
<li><p>Too few or too many vowels or consonants</p></li>
<li><p>Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)</p></li>
<li><p>Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.</p></li>
</ul>
<p>Use the <code><a href='ab_property.html'>ab_property()</a></code> functions to get properties based on the returned antibiotic ID, see Examples.</p>
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>

View File

@ -81,7 +81,7 @@
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">AMR (for R)</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
</span>
</div>

View File

@ -26,6 +26,14 @@ Use this function to determine the antibiotic code of one or more antibiotics. T
\details{
All entries in the \link{antibiotics} data set have three different identifiers: a human readable EARS-Net code (column \code{ab}, used by ECDC and WHONET), an ATC code (column \code{atc}, used by WHO), and a CID code (column \code{cid}, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
All these properties will be searched for the user input. The \code{\link[=as.ab]{as.ab()}} can correct for different forms of misspelling:
\itemize{
\item Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.
\item Too few or too many vowels or consonants
\item Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)
\item Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.
}
Use the \code{\link[=ab_property]{ab_property()}} functions to get properties based on the returned antibiotic ID, see Examples.
}
\section{Source}{

View File

@ -40,7 +40,7 @@ test_that("as.ab works", {
expect_output(print(as.ab("amox")))
expect_output(print(data.frame(a = as.ab("amox"))))
expect_warning(as.ab("Z00ZZ00")) # not yet available in data set
expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
expect_warning(as.ab("UNKNOWN"))
expect_warning(as.ab(""))
@ -55,8 +55,11 @@ test_that("as.ab works", {
expect_equal(as.character(as.ab("Amoxy + clavulaanzuur")),
"AMC")
expect_equal(as.character(as.ab(c("mreopenem", "co-maoxiclav"))),
c("MEM", "AMC"))
expect_message(as.ab("cipro mero"))
# assigning and subsetting
x <- antibiotics$ab
expect_s3_class(x[1], "ab")

View File

@ -28,7 +28,7 @@ test_that("ab_from_text works", {
expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
"Amoxicillin")
expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", ")[[1]],
"AMX, CIP")
"AMC, CIP")
expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]],
500)