diff --git a/NAMESPACE b/NAMESPACE index ac78d183..61343f00 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,7 +90,6 @@ export(labels_rsi_count) export(left_join_microorganisms) export(like) export(mo_TSN) -export(mo_authors) export(mo_class) export(mo_family) export(mo_fullname) @@ -99,13 +98,13 @@ export(mo_gramstain) export(mo_order) export(mo_phylum) export(mo_property) +export(mo_ref) export(mo_shortname) export(mo_species) export(mo_subkingdom) export(mo_subspecies) export(mo_taxonomy) export(mo_type) -export(mo_year) export(n_rsi) export(p.symbol) export(portion_I) diff --git a/NEWS.md b/NEWS.md index e81443f7..1df963f7 100755 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ * Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies` * Semantic names: `mo_fullname`, `mo_shortname` * Microbial properties: `mo_type`, `mo_gramstain` - * Author information: `mo_author`, `mo_year` + * Author and year: `mo_ref` They also come with support for German, Dutch, French, Italian, Spanish and Portuguese: ```r @@ -20,11 +20,11 @@ # [1] "Streptococcus grupo A" ``` - Furthermore, old taxonomic names will give a note about the current taxonomic name: + Furthermore, former taxonomic names will give a note about the current taxonomic name: ```r - mo_gramstain("Escherichia blattae") + mo_gramstain("Esc blattae") # Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010) - # [1] "Gram negative + # [1] "Gram negative" ``` * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates * Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables diff --git a/R/data.R b/R/data.R index 98492b91..da83228b 100755 --- a/R/data.R +++ b/R/data.R @@ -124,7 +124,7 @@ #' #' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}. #' @inheritSection as.mo ITIS -#' @format A \code{\link{data.frame}} with 18,833 observations and 16 variables: +#' @format A \code{\link{data.frame}} with 18,833 observations and 15 variables: #' \describe{ #' \item{\code{mo}}{ID of microorganism} #' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS} @@ -140,8 +140,7 @@ #' \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}} #' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}} #' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.} -#' \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source} -#' \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source} +#' \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source} #' } #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. #' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}} @@ -151,13 +150,12 @@ #' #' A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}. #' @inheritSection as.mo ITIS -#' @format A \code{\link{data.frame}} with 2,383 observations and 5 variables: +#' @format A \code{\link{data.frame}} with 2,383 observations and 4 variables: #' \describe{ #' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} #' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} #' \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS} -#' \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source} -#' \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source} +#' \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source} #' } #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. #' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}} diff --git a/R/freq.R b/R/freq.R index 3ef5a5ea..72e495e2 100755 --- a/R/freq.R +++ b/R/freq.R @@ -517,6 +517,12 @@ diff.frequency_tbl <- function(x, y, ...) { stop("Both x and y must be a frequency table.") } + cat("Differences between frequency tables") + if (identical(x, y)) { + cat("\n\nNo differences found.\n") + return(invisible()) + } + x.attr <- attributes(x)$opt # only keep item and count @@ -543,12 +549,11 @@ diff.frequency_tbl <- function(x, y, ...) { diff.percent, paste0("+", diff.percent))) - cat("Differences between frequency tables") print( knitr::kable(x, format = x.attr$tbl_format, col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"), - align = "lrrrr", + align = paste0(x.attr$column_align[1], "rrrr"), padding = 1) ) } diff --git a/R/globals.R b/R/globals.R index 222c272b..8873099a 100755 --- a/R/globals.R +++ b/R/globals.R @@ -31,6 +31,7 @@ globalVariables(c(".", "cum_percent", "date_lab", "days_diff", + "diff.percent", "fctlvl", "first_isolate_row_index", "Freq", @@ -58,6 +59,7 @@ globalVariables(c(".", "patient_id", "prevalence", "R", + "ref", "real_first_isolate", "S", "septic_patients", diff --git a/R/mo.R b/R/mo.R index 07457b42..1be5e6ad 100644 --- a/R/mo.R +++ b/R/mo.R @@ -244,13 +244,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x <- gsub(" ", ".*", x, fixed = TRUE) # add start en stop regex x <- paste0('^', x, '$') - x_withspaces_all <- x_withspaces x_withspaces_start <- paste0('^', x_withspaces) x_withspaces <- paste0('^', x_withspaces, '$') # cat(paste0('x "', x, '"\n')) # cat(paste0('x_species "', x_species, '"\n')) - # cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n')) # cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n')) # cat(paste0('x_withspaces "', x_withspaces, '"\n')) # cat(paste0('x_backup "', x_backup, '"\n')) @@ -522,16 +520,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = MOs_old <- as.data.table(AMR::microorganisms.old) setkey(MOs_old, name, tsn_new) } - found <- MOs_old[tolower(name) == tolower(x_backup[i]) | - tsn == x_trimmed[i],] + found <- MOs_old[tolower(name) == tolower(x_backup[i]) + | tsn == x_trimmed[i] + | name %like% x_withspaces[i],] if (NROW(found) > 0) { x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]] renamed_note(name_old = found[1, name], name_new = MOs[tsn == found[1, tsn_new], fullname], - authors_old = found[1, authors], - authors_new = MOs[tsn == found[1, tsn_new], authors], - year_old = found[1, year], - year_new = MOs[tsn == found[1, tsn_new], year]) + ref_old = found[1, ref], + ref_new = MOs[tsn == found[1, tsn_new], ref]) next } @@ -548,10 +545,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = call. = FALSE, immediate. = TRUE) renamed_note(name_old = found[1, name], name_new = MOs[tsn == found[1, tsn_new], fullname], - authors_old = found[1, authors], - authors_new = MOs[tsn == found[1, tsn_new], authors], - year_old = found[1, year], - year_new = MOs[tsn == found[1, tsn_new], year]) + ref_old = found[1, ref], + ref_new = MOs[tsn == found[1, tsn_new], ref]) next } @@ -666,20 +661,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = } #' @importFrom dplyr case_when -renamed_note <- function(name_old, name_new, - authors_old = "", authors_new = "", - year_old = "", year_new = "") { - authorship_old <- case_when( - !authors_old %in% c("", NA) & !year_old %in% c("", NA) ~ paste0(" (", authors_old, ", ", year_old, ")"), - !authors_old %in% c("", NA) ~ paste0(" (", authors_old, ")"), - !year_old %in% c("", NA) ~ paste0(" (", year_old, ")"), - TRUE ~ "") - authorship_new <- case_when( - !authors_new %in% c("", NA) & !year_new %in% c("", NA) ~ paste0(" (", authors_new, ", ", year_new, ")"), - !authors_new %in% c("", NA) ~ paste0(" (", authors_new, ")"), - !year_new %in% c("", NA) ~ paste0(" (", year_new, ")"), - TRUE ~ "") - base::message(paste0("Note: '", name_old, "'", authorship_old, " was renamed '", name_new, "'", authorship_new)) +renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") { + if (!is.na(ref_old)) { + ref_old <- paste0(" (", ref_old, ")") + } else { + ref_old <- "" + } + if (!is.na(ref_new)) { + ref_new <- paste0(" (", ref_new, ")") + } else { + ref_new <- "" + } + base::message(paste0("Note: '", name_old, "'", ref_old, " was renamed '", name_new, "'", ref_new)) } #' @exportMethod print.mo diff --git a/R/mo_property.R b/R/mo_property.R index be1ad44b..e7def8ea 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -45,8 +45,7 @@ #' mo_gramstain("E. coli") # "Gram negative" #' mo_TSN("E. coli") # 285 #' mo_type("E. coli") # "Bacteria" -#' mo_authors("E. coli") # "Castellani and Chalmers" -#' mo_year("E. coli") # 1919 +#' mo_ref("E. coli") # "Castellani and Chalmers, 1919" #' #' #' # Abbreviations known in the field @@ -199,14 +198,8 @@ mo_subkingdom <- function(x, ...) { #' @rdname mo_property #' @export -mo_authors <- function(x, ...) { - mo_validate(x = x, property = "authors", ...) -} - -#' @rdname mo_property -#' @export -mo_year <- function(x, ...) { - mo_validate(x = x, property = "year", ...) +mo_ref <- function(x, ...) { + mo_validate(x = x, property = "ref", ...) } #' @rdname mo_property diff --git a/data/microorganisms.old.rda b/data/microorganisms.old.rda index ca40a9d4..4b867071 100644 Binary files a/data/microorganisms.old.rda and b/data/microorganisms.old.rda differ diff --git a/data/microorganisms.rda b/data/microorganisms.rda index 6a4b2f51..0826741b 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/man/freq.Rd b/man/freq.Rd index ab8db157..495c40af 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -119,7 +119,7 @@ years <- format(years) # print a histogram of numeric values septic_patients \%>\% freq(age) \%>\% - hist() # prettier: ggplot(septic_patients, aes(age)) + geom_histogram() + hist() # or print all points to a regular plot septic_patients \%>\% @@ -147,6 +147,10 @@ table(septic_patients$gender, septic_patients$age) \%>\% freq(sep = " **sep** ") +# check differences between frequency tables +diff(freq(septic_patients$trim), + freq(septic_patients$trsu)) + \dontrun{ # send frequency table to clipboard (e.g. for pasting in Excel) septic_patients \%>\% diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 59c4a7f8..84eea38b 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -4,7 +4,7 @@ \name{microorganisms} \alias{microorganisms} \title{Data set with taxonomic data from ITIS} -\format{A \code{\link{data.frame}} with 18,833 observations and 16 variables: +\format{A \code{\link{data.frame}} with 18,833 observations and 15 variables: \describe{ \item{\code{mo}}{ID of microorganism} \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS} @@ -20,8 +20,7 @@ \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}} \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}} \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.} - \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source} - \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source} + \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source} }} \source{ [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd index 6ca053f1..c808dcac 100644 --- a/man/microorganisms.old.Rd +++ b/man/microorganisms.old.Rd @@ -4,13 +4,12 @@ \name{microorganisms.old} \alias{microorganisms.old} \title{Data set with old taxonomic data from ITIS} -\format{A \code{\link{data.frame}} with 2,383 observations and 5 variables: +\format{A \code{\link{data.frame}} with 2,383 observations and 4 variables: \describe{ \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS} - \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source} - \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source} + \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source} }} \source{ [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 7af584ee..38a568ff 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -12,8 +12,7 @@ \alias{mo_class} \alias{mo_phylum} \alias{mo_subkingdom} -\alias{mo_authors} -\alias{mo_year} +\alias{mo_ref} \alias{mo_type} \alias{mo_TSN} \alias{mo_gramstain} @@ -40,9 +39,7 @@ mo_phylum(x, ...) mo_subkingdom(x, ...) -mo_authors(x, ...) - -mo_year(x, ...) +mo_ref(x, ...) mo_type(x, language = NULL, ...) @@ -103,8 +100,7 @@ mo_shortname("E. coli") # "E. coli" mo_gramstain("E. coli") # "Gram negative" mo_TSN("E. coli") # 285 mo_type("E. coli") # "Bacteria" -mo_authors("E. coli") # "Castellani and Chalmers" -mo_year("E. coli") # 1919 +mo_ref("E. coli") # "Castellani and Chalmers, 1919" # Abbreviations known in the field diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 8b528037..4940fb54 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -112,5 +112,19 @@ test_that("frequency table works", { expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl, ampi, pita, czol, cfep, cfur)) + # run diff + expect_output(print( + diff(freq(septic_patients$amcl), + freq(septic_patients$amox)) + )) + expect_output(print( + diff(freq(septic_patients$age), + freq(septic_patients$age)) # same + )) + expect_error(print( + diff(freq(septic_patients$amcl), + "Just a string") # not a freq tbl + )) + }) diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index c24e9758..dc0e8afe 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -158,4 +158,14 @@ test_that("as.mo works", { expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL") expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE)) + # predefined reference_df + expect_equal(as.character(as.mo("TestingOwnID", + reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))), + "B_ESCHR_COL") + expect_equal(as.character(as.mo(c("TestingOwnID", "E. coli"), + reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))), + c("B_ESCHR_COL", "B_ESCHR_COL")) + expect_warning(as.character(as.mo("TestingOwnID", + reference_df = NULL))) + }) diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index e37e9fb9..4a6696df 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -15,8 +15,8 @@ test_that("mo_property works", { expect_equal(class(mo_taxonomy("E. coli")), "list") expect_equal(names(mo_taxonomy("E. coli")), c("subkingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies")) - expect_equal(mo_authors("E. coli"), "Castellani and Chalmers") - expect_equal(mo_year("E. coli"), 1919) + + expect_equal(mo_ref("E. coli"), "Castellani and Chalmers, 1919") expect_equal(mo_shortname("MRSA"), "S. aureus") expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus")