renamed year columns to ref

2026-06-25 21:36:20 +02:00 · 2018-10-01 14:44:40 +02:00
parent 3119a221e5
commit ed17db0263
16 changed files with 78 additions and 66 deletions
--- a/3
+++ b/3
@@ -90,7 +90,6 @@ export(labels_rsi_count)
 export(left_join_microorganisms)
 export(like)
 export(mo_TSN)
-export(mo_authors)
 export(mo_class)
 export(mo_family)
 export(mo_fullname)
@@ -99,13 +98,13 @@ export(mo_gramstain)
 export(mo_order)
 export(mo_phylum)
 export(mo_property)
+export(mo_ref)
 export(mo_shortname)
 export(mo_species)
 export(mo_subkingdom)
 export(mo_subspecies)
 export(mo_taxonomy)
 export(mo_type)
-export(mo_year)
 export(n_rsi)
 export(p.symbol)
 export(portion_I)
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,7 +6,7 @@
  * Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`
  * Semantic names: `mo_fullname`, `mo_shortname`
  * Microbial properties: `mo_type`, `mo_gramstain`
-  * Author information: `mo_author`, `mo_year`
+  * Author and year: `mo_ref`
  
  They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
  ```r
@@ -20,11 +20,11 @@
  # [1] "Streptococcus grupo A"
  ```
  
-  Furthermore, old taxonomic names will give a note about the current taxonomic name:
+  Furthermore, former taxonomic names will give a note about the current taxonomic name:
  ```r
-  mo_gramstain("Escherichia blattae")
+  mo_gramstain("Esc blattae")
  # Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010)
-  # [1] "Gram negative
+  # [1] "Gram negative"
  ```
 * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates
  * Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables
--- a/R/data.R
+++ b/R/data.R
@@ -124,7 +124,7 @@
 #'
 #' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}.
 #' @inheritSection as.mo ITIS
-#' @format A \code{\link{data.frame}} with 18,833 observations and 16 variables:
+#' @format A \code{\link{data.frame}} with 18,833 observations and 15 variables:
 #' \describe{
 #'   \item{\code{mo}}{ID of microorganism}
 #'   \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@@ -140,8 +140,7 @@
 #'   \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
 #'   \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
 #'   \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
-#'   \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
-#'   \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
+#'   \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 #' }
 #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
 #' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}}
@@ -151,13 +150,12 @@
 #'
 #' A data set containing old (previously valid or accepted) taxonomic names according to ITIS. This data set is used internally by \code{\link{as.mo}}.
 #' @inheritSection as.mo ITIS
-#' @format A \code{\link{data.frame}} with 2,383 observations and 5 variables:
+#' @format A \code{\link{data.frame}} with 2,383 observations and 4 variables:
 #' \describe{
 #'   \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
 #'   \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
 #'   \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS}
-#'   \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source}
-#'   \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source}
+#'   \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 #' }
 #' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
 #' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}}
--- a/R/freq.R
+++ b/R/freq.R
@@ -517,6 +517,12 @@ diff.frequency_tbl <- function(x, y, ...) {
    stop("Both x and y must be a frequency table.")
  }

+  cat("Differences between frequency tables")
+  if (identical(x, y)) {
+    cat("\n\nNo differences found.\n")
+    return(invisible())
+  }
+
  x.attr <- attributes(x)$opt

  # only keep item and count
@@ -543,12 +549,11 @@ diff.frequency_tbl <- function(x, y, ...) {
                                 diff.percent,
                                 paste0("+", diff.percent)))

-  cat("Differences between frequency tables")
  print(
    knitr::kable(x,
                 format = x.attr$tbl_format,
                 col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"),
-                 align = "lrrrr",
+                 align = paste0(x.attr$column_align[1], "rrrr"),
                 padding = 1)
  )
 }
--- a/R/globals.R
+++ b/R/globals.R
@@ -31,6 +31,7 @@ globalVariables(c(".",
                  "cum_percent",
                  "date_lab",
                  "days_diff",
+                  "diff.percent",
                  "fctlvl",
                  "first_isolate_row_index",
                  "Freq",
@@ -58,6 +59,7 @@ globalVariables(c(".",
                  "patient_id",
                  "prevalence",
                  "R",
+                  "ref",
                  "real_first_isolate",
                  "S",
                  "septic_patients",
--- a/R/mo.R
+++ b/R/mo.R
@@ -244,13 +244,11 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
    x <- gsub(" ", ".*", x, fixed = TRUE)
    # add start en stop regex
    x <- paste0('^', x, '$')
-    x_withspaces_all <- x_withspaces
    x_withspaces_start <- paste0('^', x_withspaces)
    x_withspaces <- paste0('^', x_withspaces, '$')

    # cat(paste0('x                  "', x, '"\n'))
    # cat(paste0('x_species          "', x_species, '"\n'))
-    # cat(paste0('x_withspaces_all   "', x_withspaces_all, '"\n'))
    # cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n'))
    # cat(paste0('x_withspaces       "', x_withspaces, '"\n'))
    # cat(paste0('x_backup           "', x_backup, '"\n'))
@@ -522,16 +520,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        MOs_old <- as.data.table(AMR::microorganisms.old)
        setkey(MOs_old, name, tsn_new)
      }
-      found <- MOs_old[tolower(name) == tolower(x_backup[i]) |
-                         tsn == x_trimmed[i],]
+      found <- MOs_old[tolower(name) == tolower(x_backup[i])
+                       | tsn == x_trimmed[i]
+                       | name %like% x_withspaces[i],]
      if (NROW(found) > 0) {
        x[i] <- MOs[tsn == found[1, tsn_new], ..property][[1]]
        renamed_note(name_old = found[1, name],
                     name_new = MOs[tsn == found[1, tsn_new], fullname],
-                     authors_old = found[1, authors],
-                     authors_new = MOs[tsn == found[1, tsn_new], authors],
-                     year_old = found[1, year],
-                     year_new = MOs[tsn == found[1, tsn_new], year])
+                     ref_old = found[1, ref],
+                     ref_new = MOs[tsn == found[1, tsn_new], ref])
        next
      }

@@ -548,10 +545,8 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
                  call. = FALSE, immediate. = TRUE)
          renamed_note(name_old = found[1, name],
                       name_new = MOs[tsn == found[1, tsn_new], fullname],
-                       authors_old = found[1, authors],
-                       authors_new = MOs[tsn == found[1, tsn_new], authors],
-                       year_old = found[1, year],
-                       year_new = MOs[tsn == found[1, tsn_new], year])
+                       ref_old = found[1, ref],
+                       ref_new = MOs[tsn == found[1, tsn_new], ref])
          next
        }

@@ -666,20 +661,18 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
 }

 #' @importFrom dplyr case_when
-renamed_note <- function(name_old, name_new,
-                         authors_old = "", authors_new = "",
-                         year_old = "", year_new = "") {
-  authorship_old <- case_when(
-    !authors_old %in% c("", NA) & !year_old %in% c("", NA) ~ paste0(" (", authors_old, ", ", year_old, ")"),
-    !authors_old %in% c("", NA) ~ paste0(" (", authors_old, ")"),
-    !year_old %in% c("", NA) ~ paste0(" (", year_old, ")"),
-    TRUE ~ "")
-  authorship_new <- case_when(
-    !authors_new %in% c("", NA) & !year_new %in% c("", NA) ~ paste0(" (", authors_new, ", ", year_new, ")"),
-    !authors_new %in% c("", NA) ~ paste0(" (", authors_new, ")"),
-    !year_new %in% c("", NA) ~ paste0(" (", year_new, ")"),
-    TRUE ~ "")
-  base::message(paste0("Note: '", name_old, "'", authorship_old, " was renamed '", name_new, "'", authorship_new))
+renamed_note <- function(name_old, name_new, ref_old = "", ref_new = "") {
+  if (!is.na(ref_old)) {
+    ref_old <- paste0(" (", ref_old, ")")
+  } else {
+    ref_old <- ""
+  }
+  if (!is.na(ref_new)) {
+    ref_new <- paste0(" (", ref_new, ")")
+  } else {
+    ref_new <- ""
+  }
+  base::message(paste0("Note: '", name_old, "'", ref_old, " was renamed '", name_new, "'", ref_new))
 }

 #' @exportMethod print.mo
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -45,8 +45,7 @@
 #' mo_gramstain("E. coli")       # "Gram negative"
 #' mo_TSN("E. coli")             # 285
 #' mo_type("E. coli")            # "Bacteria"
-#' mo_authors("E. coli")         # "Castellani and Chalmers"
-#' mo_year("E. coli")            # 1919
+#' mo_ref("E. coli")             # "Castellani and Chalmers, 1919"
 #'
 #'
 #' # Abbreviations known in the field
@@ -199,14 +198,8 @@ mo_subkingdom <- function(x, ...) {

 #' @rdname mo_property
 #' @export
-mo_authors <- function(x, ...) {
-  mo_validate(x = x, property = "authors", ...)
-}
-
-#' @rdname mo_property
-#' @export
-mo_year <- function(x, ...) {
-  mo_validate(x = x, property = "year", ...)
+mo_ref <- function(x, ...) {
+  mo_validate(x = x, property = "ref", ...)
 }

 #' @rdname mo_property
--- a/data/microorganisms.old.rda
+++ b/data/microorganisms.old.rda
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -119,7 +119,7 @@ years <- format(years)
 # print a histogram of numeric values
 septic_patients \%>\%
  freq(age) \%>\%
-  hist()  # prettier: ggplot(septic_patients, aes(age)) + geom_histogram()
+  hist()

 # or print all points to a regular plot
 septic_patients \%>\%
@@ -147,6 +147,10 @@ table(septic_patients$gender,
      septic_patients$age) \%>\%
  freq(sep = " **sep** ")

+# check differences between frequency tables
+diff(freq(septic_patients$trim),
+     freq(septic_patients$trsu))
+
 \dontrun{
 # send frequency table to clipboard (e.g. for pasting in Excel)
 septic_patients \%>\%
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@@ -4,7 +4,7 @@
 \name{microorganisms}
 \alias{microorganisms}
 \title{Data set with taxonomic data from ITIS}
-\format{A \code{\link{data.frame}} with 18,833 observations and 16 variables:
+\format{A \code{\link{data.frame}} with 18,833 observations and 15 variables:
 \describe{
  \item{\code{mo}}{ID of microorganism}
  \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS}
@@ -20,8 +20,7 @@
  \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}}
  \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}}
  \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.}
-  \item{\code{authors}}{Author(s) that published this taxonomic name as found in ITIS, see Source}
-  \item{\code{year}}{Year in which the author(s) published this taxonomic name as found in ITIS, see Source}
+  \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 }}
 \source{
 [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
--- a/man/microorganisms.old.Rd
+++ b/man/microorganisms.old.Rd
@@ -4,13 +4,12 @@
 \name{microorganisms.old}
 \alias{microorganisms.old}
 \title{Data set with old taxonomic data from ITIS}
-\format{A \code{\link{data.frame}} with 2,383 observations and 5 variables:
+\format{A \code{\link{data.frame}} with 2,383 observations and 4 variables:
 \describe{
  \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS}
  \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source}
  \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS}
-  \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source}
-  \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source}
+  \item{\code{ref}}{Author(s) and year of concerning publication as found in ITIS, see Source}
 }}
 \source{
 [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}.
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@@ -12,8 +12,7 @@
 \alias{mo_class}
 \alias{mo_phylum}
 \alias{mo_subkingdom}
-\alias{mo_authors}
-\alias{mo_year}
+\alias{mo_ref}
 \alias{mo_type}
 \alias{mo_TSN}
 \alias{mo_gramstain}
@@ -40,9 +39,7 @@ mo_phylum(x, ...)

 mo_subkingdom(x, ...)

-mo_authors(x, ...)
-
-mo_year(x, ...)
+mo_ref(x, ...)

 mo_type(x, language = NULL, ...)

@@ -103,8 +100,7 @@ mo_shortname("E. coli")       # "E. coli"
 mo_gramstain("E. coli")       # "Gram negative"
 mo_TSN("E. coli")             # 285
 mo_type("E. coli")            # "Bacteria"
-mo_authors("E. coli")         # "Castellani and Chalmers"
-mo_year("E. coli")            # 1919
+mo_ref("E. coli")             # "Castellani and Chalmers, 1919"


 # Abbreviations known in the field
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -112,5 +112,19 @@ test_that("frequency table works", {
  expect_error(septic_patients %>% freq(peni, oxac, clox, amox, amcl,
                                        ampi, pita, czol, cfep, cfur))

+  # run diff
+  expect_output(print(
+    diff(freq(septic_patients$amcl),
+         freq(septic_patients$amox))
+  ))
+  expect_output(print(
+    diff(freq(septic_patients$age),
+         freq(septic_patients$age)) # same
+  ))
+  expect_error(print(
+    diff(freq(septic_patients$amcl),
+         "Just a string") # not a freq tbl
+  ))
+
 })

--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@@ -158,4 +158,14 @@ test_that("as.mo works", {
  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
  expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))

+  # predefined reference_df
+  expect_equal(as.character(as.mo("TestingOwnID",
+                                  reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
+               "B_ESCHR_COL")
+  expect_equal(as.character(as.mo(c("TestingOwnID", "E. coli"),
+                                  reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
+               c("B_ESCHR_COL", "B_ESCHR_COL"))
+  expect_warning(as.character(as.mo("TestingOwnID",
+                                  reference_df = NULL)))
+
 })
--- a/tests/testthat/test-mo_property.R
+++ b/tests/testthat/test-mo_property.R
@@ -15,8 +15,8 @@ test_that("mo_property works", {
  expect_equal(class(mo_taxonomy("E. coli")), "list")
  expect_equal(names(mo_taxonomy("E. coli")), c("subkingdom", "phylum", "class", "order",
                                                "family", "genus", "species", "subspecies"))
-  expect_equal(mo_authors("E. coli"), "Castellani and Chalmers")
-  expect_equal(mo_year("E. coli"), 1919)
+
+  expect_equal(mo_ref("E. coli"), "Castellani and Chalmers, 1919")

  expect_equal(mo_shortname("MRSA"), "S. aureus")
  expect_equal(mo_shortname("MRSA", Becker = TRUE), "S. aureus")