mirror of
https://github.com/msberends/AMR.git
synced 2024-12-25 18:06:12 +01:00
(v1.2.0.9022) as.ab() improvement
This commit is contained in:
parent
afc660dc33
commit
298e67a45b
@ -1,5 +1,5 @@
|
||||
Package: AMR
|
||||
Version: 1.2.0.9021
|
||||
Version: 1.2.0.9022
|
||||
Date: 2020-07-01
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
|
11
NEWS.md
11
NEWS.md
@ -1,4 +1,4 @@
|
||||
# AMR 1.2.0.9021
|
||||
# AMR 1.2.0.9022
|
||||
## <small>Last updated: 01-Jul-2020</small>
|
||||
|
||||
### New
|
||||
@ -20,16 +20,19 @@
|
||||
|
||||
### Changed
|
||||
* Using unexisting columns in all `count_*()`, `proportion_*()`, `susceptibility()` and `resistance()` functions wil now return an error instead of dropping them silently
|
||||
* Improvements for `as.ab()`:
|
||||
* Dramatic improvement of the algorithm behind `as.ab()`, making many more input errors translatable like from digitalised health care records, using too few or too many vowels or consonants and many more
|
||||
* Added progress bar
|
||||
* Fixed a bug where `as.ab()` would return an error on invalid input values
|
||||
* The `as.ab()` function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.
|
||||
* Fixed a bug where `eucast_rules()` would not work on a tibble when the `tibble` or `dplyr` package was loaded
|
||||
* All `*_join_microorganisms()` functions and `bug_drug_combinations()` now return the original data class (e.g. `tibble`s and `data.table`s)
|
||||
* Fixed a bug where `as.ab()` would return an error on invalid input values
|
||||
* Fixed a bug for using grouped versions of `rsi_df()`, `proportion_df()` and `count_df()`, and fixed a bug where not all different antimicrobial results were added as rows
|
||||
* Improved auto-determination for columns of types `<mo>` and `<Date>`
|
||||
* Fixed a bug in `bug_drug_combinations()` for when only one antibiotic was in the input data
|
||||
* Changed the summary for class `<mo>`, to highlight the %SI vs. %R
|
||||
* Improved error handling, giving more useful info when functions return an error
|
||||
* Algorithm improvements to `as.ab()`, many more misspellings are now translatable. The `as.ab()` function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.
|
||||
* Added progress bar to `as.ab()`
|
||||
|
||||
|
||||
# AMR 1.2.0
|
||||
|
||||
|
40
R/ab.R
40
R/ab.R
@ -29,6 +29,13 @@
|
||||
#' @rdname as.ab
|
||||
#' @inheritSection WHOCC WHOCC
|
||||
#' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
|
||||
#'
|
||||
#' All these properties will be searched for the user input. The [as.ab()] can correct for different forms of misspelling:
|
||||
#'
|
||||
#' * Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.
|
||||
#' * Too few or too many vowels or consonants
|
||||
#' * Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)
|
||||
#' * Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.
|
||||
#'
|
||||
#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples.
|
||||
#'
|
||||
@ -231,7 +238,9 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
# replace spaces and slashes with a possibility on both
|
||||
x_spelling <- gsub("[ /]", "( .*|.*/)", x_spelling)
|
||||
# correct for digital reading text (OCR)
|
||||
x_spelling <- gsub("[NRD]", "[NRD]", x_spelling)
|
||||
x_spelling <- gsub("[NRD8B]", "[NRD8B]", x_spelling)
|
||||
x_spelling <- gsub("(O|0)", "(O|0)+", x_spelling)
|
||||
x_spelling <- gsub("++", "+", x_spelling, fixed = TRUE)
|
||||
}
|
||||
|
||||
# try if name starts with it
|
||||
@ -246,6 +255,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
# and try if any synonym starts with it
|
||||
synonym_found <- unlist(lapply(antibiotics$synonyms,
|
||||
function(s) any(s %like% paste0("^", x_spelling))))
|
||||
@ -254,7 +264,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# INITIAL SEARCH - More uncertain results ----
|
||||
|
||||
if (initial_search == TRUE) {
|
||||
@ -341,7 +351,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# first 5 except for cephalosporins, then first 7 (those cephalosporins all start quite the same!)
|
||||
found <- suppressWarnings(as.ab(substr(x[i], 1, 5), initial_search = FALSE))
|
||||
if (!is.na(found) && !ab_group(found, initial_search = FALSE) %like% "cephalosporins") {
|
||||
@ -365,7 +375,7 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
x_new[i] <- note_if_more_than_one_found(found, i, from_text)
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
# make all vowels facultative
|
||||
search_str <- gsub("([AEIOUY])", "\\1*", x[i])
|
||||
found <- suppressWarnings(as.ab(search_str, initial_search = FALSE, already_regex = TRUE))
|
||||
@ -390,8 +400,28 @@ as.ab <- function(x, flag_multiple_results = TRUE, ...) {
|
||||
next
|
||||
}
|
||||
|
||||
# try with switched character, like "mreopenem"
|
||||
for (j in seq_len(nchar(x[i]))) {
|
||||
x_switched <- paste0(
|
||||
# beginning part:
|
||||
substr(x[i], 1, j - 1),
|
||||
# here is the switching of 2 characters:
|
||||
substr(x[i], j + 1, j + 1),
|
||||
substr(x[i], j, j),
|
||||
# ending part:
|
||||
substr(x[i], j + 2, nchar(x[i])))
|
||||
found <- suppressWarnings(as.ab(x_switched, initial_search = FALSE))
|
||||
if (!is.na(found)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if (!is.na(found)) {
|
||||
x_new[i] <- found[1L]
|
||||
next
|
||||
}
|
||||
|
||||
} # end of initial_search = TRUE
|
||||
|
||||
|
||||
# not found
|
||||
x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
|
||||
}
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -43,7 +43,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -229,9 +229,9 @@
|
||||
<small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
|
||||
</div>
|
||||
|
||||
<div id="amr-1209021" class="section level1">
|
||||
<h1 class="page-header" data-toc-text="1.2.0.9021">
|
||||
<a href="#amr-1209021" class="anchor"></a>AMR 1.2.0.9021<small> Unreleased </small>
|
||||
<div id="amr-1209022" class="section level1">
|
||||
<h1 class="page-header" data-toc-text="1.2.0.9022">
|
||||
<a href="#amr-1209022" class="anchor"></a>AMR 1.2.0.9022<small> Unreleased </small>
|
||||
</h1>
|
||||
<div id="last-updated-01-jul-2020" class="section level2">
|
||||
<h2 class="hasAnchor">
|
||||
@ -263,18 +263,22 @@
|
||||
<a href="#changed" class="anchor"></a>Changed</h3>
|
||||
<ul>
|
||||
<li>Using unexisting columns in all <code>count_*()</code>, <code>proportion_*()</code>, <code><a href="../reference/proportion.html">susceptibility()</a></code> and <code><a href="../reference/proportion.html">resistance()</a></code> functions wil now return an error instead of dropping them silently</li>
|
||||
<li>Improvements for <code><a href="../reference/as.ab.html">as.ab()</a></code>:
|
||||
<ul>
|
||||
<li>Dramatic improvement of the algorithm behind <code><a href="../reference/as.ab.html">as.ab()</a></code>, making many more input errors translatable like from digitalised health care records, using too few or too many vowels or consonants and many more</li>
|
||||
<li>Added progress bar</li>
|
||||
<li>Fixed a bug where <code><a href="../reference/as.ab.html">as.ab()</a></code> would return an error on invalid input values</li>
|
||||
<li>The <code><a href="../reference/as.ab.html">as.ab()</a></code> function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Fixed a bug where <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> would not work on a tibble when the <code>tibble</code> or <code>dplyr</code> package was loaded</li>
|
||||
<li>All <code>*_join_microorganisms()</code> functions and <code><a href="../reference/bug_drug_combinations.html">bug_drug_combinations()</a></code> now return the original data class (e.g. <code>tibble</code>s and <code>data.table</code>s)</li>
|
||||
<li>Fixed a bug where <code><a href="../reference/as.ab.html">as.ab()</a></code> would return an error on invalid input values</li>
|
||||
<li>Fixed a bug for using grouped versions of <code><a href="../reference/proportion.html">rsi_df()</a></code>, <code><a href="../reference/proportion.html">proportion_df()</a></code> and <code><a href="../reference/count.html">count_df()</a></code>, and fixed a bug where not all different antimicrobial results were added as rows</li>
|
||||
<li>Improved auto-determination for columns of types <code><mo></code> and <code><Date></code>
|
||||
</li>
|
||||
<li>Fixed a bug in <code><a href="../reference/bug_drug_combinations.html">bug_drug_combinations()</a></code> for when only one antibiotic was in the input data</li>
|
||||
<li>Changed the summary for class <code><mo></code>, to highlight the %SI vs. %R</li>
|
||||
<li>Improved error handling, giving more useful info when functions return an error</li>
|
||||
<li>Algorithm improvements to <code><a href="../reference/as.ab.html">as.ab()</a></code>, many more misspellings are now translatable. The <code><a href="../reference/as.ab.html">as.ab()</a></code> function will now throw a note if more than 1 antimicrobial drug could be retrieved from a single input value.</li>
|
||||
<li>Added progress bar to <code><a href="../reference/as.ab.html">as.ab()</a></code>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -10,7 +10,7 @@ articles:
|
||||
WHONET: WHONET.html
|
||||
benchmarks: benchmarks.html
|
||||
resistance_predict: resistance_predict.html
|
||||
last_built: 2020-07-01T09:51Z
|
||||
last_built: 2020-07-01T14:20Z
|
||||
urls:
|
||||
reference: https://msberends.gitlab.io/AMR/reference
|
||||
article: https://msberends.gitlab.io/AMR/articles
|
||||
|
@ -82,7 +82,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9019</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
@ -262,6 +262,13 @@
|
||||
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
|
||||
|
||||
<p>All entries in the <a href='antibiotics.html'>antibiotics</a> data set have three different identifiers: a human readable EARS-Net code (column <code>ab</code>, used by ECDC and WHONET), an ATC code (column <code>atc</code>, used by WHO), and a CID code (column <code>cid</code>, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.</p>
|
||||
<p>All these properties will be searched for the user input. The <code>as.ab()</code> can correct for different forms of misspelling:</p><ul>
|
||||
<li><p>Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.</p></li>
|
||||
<li><p>Too few or too many vowels or consonants</p></li>
|
||||
<li><p>Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)</p></li>
|
||||
<li><p>Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.</p></li>
|
||||
</ul>
|
||||
|
||||
<p>Use the <code><a href='ab_property.html'>ab_property()</a></code> functions to get properties based on the returned antibiotic ID, see Examples.</p>
|
||||
<h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>
|
||||
|
||||
|
@ -81,7 +81,7 @@
|
||||
</button>
|
||||
<span class="navbar-brand">
|
||||
<a class="navbar-link" href="../index.html">AMR (for R)</a>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9021</span>
|
||||
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.2.0.9022</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
|
@ -26,6 +26,14 @@ Use this function to determine the antibiotic code of one or more antibiotics. T
|
||||
\details{
|
||||
All entries in the \link{antibiotics} data set have three different identifiers: a human readable EARS-Net code (column \code{ab}, used by ECDC and WHONET), an ATC code (column \code{atc}, used by WHO), and a CID code (column \code{cid}, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
|
||||
|
||||
All these properties will be searched for the user input. The \code{\link[=as.ab]{as.ab()}} can correct for different forms of misspelling:
|
||||
\itemize{
|
||||
\item Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc.
|
||||
\item Too few or too many vowels or consonants
|
||||
\item Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast)
|
||||
\item Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc.
|
||||
}
|
||||
|
||||
Use the \code{\link[=ab_property]{ab_property()}} functions to get properties based on the returned antibiotic ID, see Examples.
|
||||
}
|
||||
\section{Source}{
|
||||
|
@ -40,7 +40,7 @@ test_that("as.ab works", {
|
||||
expect_output(print(as.ab("amox")))
|
||||
expect_output(print(data.frame(a = as.ab("amox"))))
|
||||
|
||||
expect_warning(as.ab("Z00ZZ00")) # not yet available in data set
|
||||
expect_warning(as.ab("J00AA00")) # ATC not yet available in data set
|
||||
expect_warning(as.ab("UNKNOWN"))
|
||||
expect_warning(as.ab(""))
|
||||
|
||||
@ -55,8 +55,11 @@ test_that("as.ab works", {
|
||||
expect_equal(as.character(as.ab("Amoxy + clavulaanzuur")),
|
||||
"AMC")
|
||||
|
||||
expect_equal(as.character(as.ab(c("mreopenem", "co-maoxiclav"))),
|
||||
c("MEM", "AMC"))
|
||||
|
||||
expect_message(as.ab("cipro mero"))
|
||||
|
||||
|
||||
# assigning and subsetting
|
||||
x <- antibiotics$ab
|
||||
expect_s3_class(x[1], "ab")
|
||||
|
@ -28,7 +28,7 @@ test_that("ab_from_text works", {
|
||||
expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", translate_ab = TRUE)[[1]],
|
||||
"Amoxicillin")
|
||||
expect_identical(ab_from_text("administered amoxi/clav and cipro", collapse = ", ")[[1]],
|
||||
"AMX, CIP")
|
||||
"AMC, CIP")
|
||||
|
||||
expect_identical(ab_from_text("28/03/2020 regular amoxicilliin 500mg po tds", type = "dose")[[1]],
|
||||
500)
|
||||
|
Loading…
Reference in New Issue
Block a user