renamed year columns to ref

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-10-01 14:44:40 +02:00
parent 3119a221e5
commit ed17db0263
16 changed files with 78 additions and 66 deletions

View File

@ -90,7 +90,6 @@ export(labels_rsi_count)
export(left_join_microorganisms)
export(like)
export(mo_TSN)
export(mo_authors)
export(mo_class)
export(mo_family)
export(mo_fullname)
@ -99,13 +98,13 @@ export(mo_gramstain)
export(mo_order)
export(mo_phylum)
export(mo_property)
export(mo_ref)
export(mo_shortname)
export(mo_species)
export(mo_subkingdom)
export(mo_subspecies)
export(mo_taxonomy)
export(mo_type)
export(mo_year)
export(n_rsi)
export(p.symbol)
export(portion_I)

View File

@ -6,7 +6,7 @@
* Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`
* Semantic names: `mo_fullname`, `mo_shortname`
* Microbial properties: `mo_type`, `mo_gramstain`
* Author information: `mo_author`, `mo_year`
* Author and year: `mo_ref`
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
```r
@ -20,11 +20,11 @@
# [1] "Streptococcus grupo A"
```
Furthermore, old taxonomic names will give a note about the current taxonomic name:
Furthermore, former taxonomic names will give a note about the current taxonomic name:
```r
mo_gramstain("Escherichia blattae")
mo_gramstain("Esc blattae")
# Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010)
# [1] "Gram negative
# [1] "Gram negative"
```
* Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates
* Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables

View File

@ -124,7 +124,7 @@
#'
#' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}.
#' @inheritSection as.mo ITIS
#' @format A \code{\link{data.frame}} with 18,833 observations and 16 variables:
#' @format A \code{\link{data.frame}} with 18,833 observations and 15 variables:
#' \describe{
#' \item{\code{mo}}{ID of microorganism}
#' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@ -140,8 +140,7 @@
#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
#' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
#' \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
#' \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
#' \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
#' }
#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}}
@ -151,13 +150,12 @@
#'
#' A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}.
#' @inheritSection as.mo ITIS
#' @format A \code{\link{data.frame}} with 2,383 observations and 5 variables:
#' @format A \code{\link{data.frame}} with 2,383 observations and 4 variables:
#' \describe{
#' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
#' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
#' \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS}
#' \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source}
#' \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source}
#' \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
#' }
#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}}

View File

@ -517,6 +517,12 @@ diff.frequency_tbl <- function(x, y, ...) {
stop("Both x and y must be a frequency table.")
}
cat("Differences between frequency tables")
if (identical(x, y)) {
cat("\n\nNo differences found.\n")
return(invisible())
}
x.attr <- attributes(x)$opt
# only keep item and count
@ -543,12 +549,11 @@ diff.frequency_tbl <- function(x, y, ...) {
diff.percent,
paste0("+", diff.percent)))
cat("Differences between frequency tables")
print(
knitr::kable(x,
format = x.attr$tbl_format,
col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"),
align = "lrrrr",
align = paste0(x.attr$column_align[1], "rrrr"),
padding = 1)
)
}

View File

@ -31,6 +31,7 @@ globalVariables(c(".",
"cum_percent",
"date_lab",
"days_diff",
"diff.percent",
"fctlvl",
"first_isolate_row_index",
"Freq",
@ -58,6 +59,7 @@ globalVariables(c(".",
"patient_id",
"prevalence",
"R",
"ref",
"real_first_isolate",
"S",
"septic_patients",

45
R/mo.R
View File

@ -244,13 +244,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
x <- gsub(" ", ".*", x, fixed = TRUE)
# add start en stop regex
x <- paste0('^', x, '$')
x_withspaces_all <- x_withspaces
x_withspaces_start <- paste0('^', x_withspaces)
x_withspaces <- paste0('^', x_withspaces, '$')
# cat(paste0('x "', x, '"\n'))
# cat(paste0('x_species "', x_species, '"\n'))
# cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n'))
# cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n'))
# cat(paste0('x_withspaces "', x_withspaces, '"\n'))
# cat(paste0('x_backup "', x_backup, '"\n'))
@ -522,16 +520,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
MOs_old <- as.data.table(AMR::microorganisms.old)
setkey(MOs_old, name, tsn_new)
}
found <- MOs_old[tolower(name) == tolower(x_backup[i]) |
tsn == x_trimmed[i],]
found <- MOs_old[tolower(name) == tolower(x_backup[i])
| tsn == x_trimmed[i]
| name %like% x_withspaces[i],]
if (NROW(found) > 0) {
x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
ref_old = found[1, ref],
ref_new = MOs[tsn == found[1, tsn_new], ref])
next
}
@ -548,10 +545,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
call. = FALSE, immediate. = TRUE)
renamed_note(name_old = found[1, name],
name_new = MOs[tsn == found[1, tsn_new], fullname],
authors_old = found[1, authors],
authors_new = MOs[tsn == found[1, tsn_new], authors],
year_old = found[1, year],
year_new = MOs[tsn == found[1, tsn_new], year])
ref_old = found[1, ref],
ref_new = MOs[tsn == found[1, tsn_new], ref])
next
}
@ -666,20 +661,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
}
#' @importFrom dplyr case_when
renamed_note <- function(name_old, name_new,
authors_old = "", authors_new = "",
year_old = "", year_new = "") {
authorship_old <- case_when(
!authors_old %in% c("", NA) & !year_old %in% c("", NA) ~ paste0(" (", authors_old, ", ", year_old, ")"),
!authors_old %in% c("", NA) ~ paste0(" (", authors_old, ")"),
!year_old %in% c("", NA) ~ paste0(" (", year_old, ")"),
TRUE ~ "")
authorship_new <- case_when(
!authors_new %in% c("", NA) & !year_new %in% c("", NA) ~ paste0(" (", authors_new, ", ", year_new, ")"),
!authors_new %in% c("", NA) ~ paste0(" (", authors_new, ")"),
!year_new %in% c("", NA) ~ paste0(" (", year_new, ")"),
TRUE ~ "")
base::message(paste0("Note: '", name_old, "'", authorship_old, " was renamed '", name_new, "'", authorship_new))
renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
if (!is.na(ref_old)) {
ref_old <- paste0(" (", ref_old, ")")
} else {
ref_old <- ""
}
if (!is.na(ref_new)) {
ref_new <- paste0(" (", ref_new, ")")
} else {
ref_new <- ""
}
base::message(paste0("Note: '", name_old, "'", ref_old, " was renamed '", name_new, "'", ref_new))
}
#' @exportMethod print.mo

View File

@ -45,8 +45,7 @@
#' mo_gramstain("E. coli") # "Gram negative"
#' mo_TSN("E. coli") # 285
#' mo_type("E. coli") # "Bacteria"
#' mo_authors("E. coli") # "Castellani and Chalmers"
#' mo_year("E. coli") # 1919
#' mo_ref("E. coli") # "Castellani and Chalmers, 1919"
#'
#'
#' # Abbreviations known in the field
@ -199,14 +198,8 @@ mo_subkingdom <- function(x, ...) {
#' @rdname mo_property
#' @export
mo_authors <- function(x, ...) {
mo_validate(x = x, property = "authors", ...)
}
#' @rdname mo_property
#' @export
mo_year <- function(x, ...) {
mo_validate(x = x, property = "year", ...)
mo_ref <- function(x, ...) {
mo_validate(x = x, property = "ref", ...)
}
#' @rdname mo_property

Binary file not shown.

Binary file not shown.

View File

@ -119,7 +119,7 @@ years <- format(years)
# print a histogram of numeric values
septic_patients \%>\%
freq(age) \%>\%
hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
hist()
# or print all points to a regular plot
septic_patients \%>\%
@ -147,6 +147,10 @@ table(septic_patients$gender,
septic_patients$age) \%>\%
freq(sep = " **sep** ")
# check differences between frequency tables
diff(freq(septic_patients$trim),
freq(septic_patients$trsu))
\dontrun{
# send frequency table to clipboard (e.g. for pasting in Excel)
septic_patients \%>\%

View File

@ -4,7 +4,7 @@
\name{microorganisms}
\alias{microorganisms}
\title{Data set with taxonomic data from ITIS}
\format{A \code{\link{data.frame}} with 18,833 observations and 16 variables:
\format{A \code{\link{data.frame}} with 18,833 observations and 15 variables:
\describe{
\item{\code{mo}}{ID of microorganism}
\item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@ -20,8 +20,7 @@
\item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
\item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
\item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
\item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
\item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
\item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
}}
\source{
[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.

View File

@ -4,13 +4,12 @@
\name{microorganisms.old}
\alias{microorganisms.old}
\title{Data set with old taxonomic data from ITIS}
\format{A \code{\link{data.frame}} with 2,383 observations and 5 variables:
\format{A \code{\link{data.frame}} with 2,383 observations and 4 variables:
\describe{
\item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
\item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
\item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS}
\item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source}
\item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source}
\item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
}}
\source{
[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.

View File

@ -12,8 +12,7 @@
\alias{mo_class}
\alias{mo_phylum}
\alias{mo_subkingdom}
\alias{mo_authors}
\alias{mo_year}
\alias{mo_ref}
\alias{mo_type}
\alias{mo_TSN}
\alias{mo_gramstain}
@ -40,9 +39,7 @@ mo_phylum(x, ...)
mo_subkingdom(x, ...)
mo_authors(x, ...)
mo_year(x, ...)
mo_ref(x, ...)
mo_type(x, language = NULL, ...)
@ -103,8 +100,7 @@ mo_shortname("E. coli") # "E. coli"
mo_gramstain("E. coli") # "Gram negative"
mo_TSN("E. coli") # 285
mo_type("E. coli") # "Bacteria"
mo_authors("E. coli") # "Castellani and Chalmers"
mo_year("E. coli") # 1919
mo_ref("E. coli") # "Castellani and Chalmers, 1919"
# Abbreviations known in the field

View File

@ -112,5 +112,19 @@ test_that("frequency table works", {
expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
ampi, pita, czol, cfep, cfur))
# run diff
expect_output(print(
diff(freq(septic_patients$amcl),
freq(septic_patients$amox))
))
expect_output(print(
diff(freq(septic_patients$age),
freq(septic_patients$age)) # same
))
expect_error(print(
diff(freq(septic_patients$amcl),
"Just a string") # not a freq tbl
))
})

View File

@ -158,4 +158,14 @@ test_that("as.mo works", {
expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))
# predefined reference_df
expect_equal(as.character(as.mo("TestingOwnID",
reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
"B_ESCHR_COL")
expect_equal(as.character(as.mo(c("TestingOwnID", "E. coli"),
reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
c("B_ESCHR_COL", "B_ESCHR_COL"))
expect_warning(as.character(as.mo("TestingOwnID",
reference_df = NULL)))
})

View File

@ -15,8 +15,8 @@ test_that("mo_property works", {
expect_equal(class(mo_taxonomy("E. coli")), "list")
expect_equal(names(mo_taxonomy("E. coli")), c("subkingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies"))
expect_equal(mo_authors("E. coli"), "Castellani and Chalmers")
expect_equal(mo_year("E. coli"), 1919)
expect_equal(mo_ref("E. coli"), "Castellani and Chalmers, 1919")
expect_equal(mo_shortname("MRSA"), "S. aureus")
expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus")