AMR/inst/tinytest/test-data.R

# ==================================================================== #
# TITLE:                                                               #
# AMR: An R Package for Working with Antimicrobial Resistance Data     #
#                                                                      #
# SOURCE CODE:                                                         #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# PLEASE CITE THIS SOFTWARE AS:                                        #
# Berends MS, Luz CF, Friedrich AW, et al. (2022).                     #
# AMR: An R Package for Working with Antimicrobial Resistance Data.    #
# Journal of Statistical Software, 104(3), 1-31.                       #
# https://doi.org/10.18637/jss.v104.i03                                #
#                                                                      #
# Developed at the University of Groningen and the University Medical  #
# Center Groningen in The Netherlands, in collaboration with many      #
# colleagues from around the world, see our website.                   #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
# ==================================================================== #

# IDs should always be unique
expect_identical(nrow(microorganisms), length(unique(microorganisms$mo)))
expect_identical(class(microorganisms$mo), c("mo", "character"))
expect_identical(nrow(antibiotics), length(unique(antibiotics$ab)))
expect_true(all(is.na(antibiotics$atc[duplicated(antibiotics$atc)])))
expect_identical(class(antibiotics$ab), c("ab", "character"))


# check cross table reference
expect_true(all(microorganisms.codes$mo %in% microorganisms$mo))
expect_true(all(example_isolates$mo %in% microorganisms$mo))
expect_true(all(microorganisms.groups$mo %in% microorganisms$mo))
expect_true(all(microorganisms.groups$mo_group %in% microorganisms$mo))
expect_true(all(clinical_breakpoints$mo %in% microorganisms$mo))
expect_true(all(clinical_breakpoints$ab %in% antibiotics$ab))
expect_true(all(intrinsic_resistant$mo %in% microorganisms$mo))
expect_true(all(intrinsic_resistant$ab %in% antibiotics$ab))
expect_false(any(is.na(microorganisms.codes$code)))
expect_false(any(is.na(microorganisms.codes$mo)))
expect_true(all(dosage$ab %in% antibiotics$ab))
expect_true(all(dosage$name %in% antibiotics$name))
# check valid disks/MICs
expect_false(any(is.na(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE]))))
expect_false(any(is.na(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE]))))
expect_false(any(is.na(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE]))))
expect_false(any(is.na(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE]))))

# antibiotic names must always be coercible to their original AB code
expect_identical(as.ab(antibiotics$name), antibiotics$ab)

if (AMR:::pkg_is_available("tibble")) {
  # there should be no diacritics (i.e. non ASCII) characters in the datasets (CRAN policy)
  datasets <- data(package = "AMR", envir = asNamespace("AMR"))$results[, "Item", drop = TRUE]
  for (i in seq_len(length(datasets))) {
    dataset <- get(datasets[i], envir = asNamespace("AMR"))
    expect_identical(AMR:::dataset_UTF8_to_ASCII(dataset), dataset, info = datasets[i])
  }
}

df <- AMR:::AMR_env$MO_lookup
expect_true(all(c(
  "mo", "fullname", "status", "kingdom", "phylum", "class", "order", 
  "family", "genus", "species", "subspecies", "rank", "ref", "source", 
  "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", 
  "snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"
) %in% colnames(df)))

expect_inherits(AMR:::MO_CONS, "mo")

uncategorised <- subset(
  microorganisms,
  genus == "Staphylococcus" &
    !species %in% c("", "aureus") &
    !mo %in% c(AMR:::MO_CONS, AMR:::MO_COPS)
)
expect_true(NROW(uncategorised) == 0,
  info = ifelse(NROW(uncategorised) == 0,
    "All staphylococcal species categorised as CoNS/CoPS.",
    paste0(
      "Staphylococcal species not categorised as CoNS/CoPS: S. ",
      uncategorised$species, " (", uncategorised$mo, ")",
      collapse = "\n"
    )
  )
)

# THIS WILL CHECK NON-ASCII STRINGS IN ALL FILES:

# check_non_ascii <- function() {
#   purrr::map_df(
#     .id = "file",
#     # list common text files
#     .x = fs::dir_ls(
#       recurse = TRUE,
#       type = "file",
#       # ignore images, compressed
#       regexp = "\\.(png|ico|rda|ai|tar.gz|zip|xlsx|csv|pdf|psd)$",
#       invert = TRUE
#     ),
#     .f = function(path) {
#       x <- readLines(path, warn = FALSE)
#       # from tools::showNonASCII()
#       asc <- iconv(x, "latin1", "ASCII")
#       ind <- is.na(asc) | asc != x
#       # make data frame
#       if (any(ind)) {
#         tibble::tibble(
#           row = which(ind),
#           line = iconv(x[ind], "latin1", "ASCII", sub = "byte")
#         )
#       } else {
#         tibble::tibble()
#       }
#     }
#   )
# }
# x <- check_non_ascii() %>%
#   filter(file %unlike% "^(data-raw|docs|git_)")
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`# ==================================================================== #`
new species groups, updated clinical breakpoints 2023-07-08 17:30:05 +02:00			`# TITLE: #`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`# AMR: An R Package for Working with Antimicrobial Resistance Data #`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`# #`
new species groups, updated clinical breakpoints 2023-07-08 17:30:05 +02:00			`# SOURCE CODE: #`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`# https://github.com/msberends/AMR #`
			`# #`
new species groups, updated clinical breakpoints 2023-07-08 17:30:05 +02:00			`# PLEASE CITE THIS SOFTWARE AS: #`
(v2.1.1.9064) update all microbial taxonomy, add mycobank, big documentation update 2024-07-16 14:51:57 +02:00			`# Berends MS, Luz CF, Friedrich AW, et al. (2022). #`
			`# AMR: An R Package for Working with Antimicrobial Resistance Data. #`
			`# Journal of Statistical Software, 104(3), 1-31. #`
documentation update 2023-05-27 10:39:22 +02:00			`# https://doi.org/10.18637/jss.v104.i03 #`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`# #`
support new mo codes 2022-12-27 15:16:15 +01:00			`# Developed at the University of Groningen and the University Medical #`
			`# Center Groningen in The Netherlands, in collaboration with many #`
			`# colleagues from around the world, see our website. #`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`# #`
			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
			`# #`
			`# Visit our website for the full manual and a complete tutorial about #`
			`# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #`
			`# ==================================================================== #`

			`# IDs should always be unique`
			`expect_identical(nrow(microorganisms), length(unique(microorganisms$mo)))`
			`expect_identical(class(microorganisms$mo), c("mo", "character"))`
			`expect_identical(nrow(antibiotics), length(unique(antibiotics$ab)))`
(v1.7.1.9007) Updated antibiotics dataset, fixes #41 2021-06-23 10:03:17 +02:00			`expect_true(all(is.na(antibiotics$atc[duplicated(antibiotics$atc)])))`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`expect_identical(class(antibiotics$ab), c("ab", "character"))`

(v1.7.1.9007) Updated antibiotics dataset, fixes #41 2021-06-23 10:03:17 +02:00
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`# check cross table reference`
			`expect_true(all(microorganisms.codes$mo %in% microorganisms$mo))`
			`expect_true(all(example_isolates$mo %in% microorganisms$mo))`
interpretation fixes 2023-07-11 09:50:45 +02:00			`expect_true(all(microorganisms.groups$mo %in% microorganisms$mo))`
			`expect_true(all(microorganisms.groups$mo_group %in% microorganisms$mo))`
Replace RSI with SIR 2023-01-21 23:47:20 +01:00			`expect_true(all(clinical_breakpoints$mo %in% microorganisms$mo))`
			`expect_true(all(clinical_breakpoints$ab %in% antibiotics$ab))`
(v1.7.1.9073) as.rsi() fix for UTIs 2021-12-14 21:47:14 +01:00			`expect_true(all(intrinsic_resistant$mo %in% microorganisms$mo))`
			`expect_true(all(intrinsic_resistant$ab %in% antibiotics$ab))`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`expect_false(any(is.na(microorganisms.codes$code)))`
			`expect_false(any(is.na(microorganisms.codes$mo)))`
			`expect_true(all(dosage$ab %in% antibiotics$ab))`
			`expect_true(all(dosage$name %in% antibiotics$name))`
(v1.7.1.9071) rsi disk fix 2021-12-13 11:57:34 +01:00			`# check valid disks/MICs`
Update clinical breakpoints and fix some `as.mo()` bugs (#117) * Updates clinical breakpoints EUCAST/CLSI 2023, fixes #102, fixes #112, fixes #113, fixes #114, fixes #115 * docs * implement ecoffs * unit tests 2023-06-22 15:10:59 +02:00			`expect_false(any(is.na(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE]))))`
			`expect_false(any(is.na(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE]))))`
			`expect_false(any(is.na(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE]))))`
			`expect_false(any(is.na(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE]))))`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00
			`# antibiotic names must always be coercible to their original AB code`
			`expect_identical(as.ab(antibiotics$name), antibiotics$ab)`

unit tests 2023-02-18 14:56:06 +01:00			`if (AMR:::pkg_is_available("tibble")) {`
fix for R <= 3.3 2022-08-28 19:34:04 +02:00			`# there should be no diacritics (i.e. non ASCII) characters in the datasets (CRAN policy)`
			`datasets <- data(package = "AMR", envir = asNamespace("AMR"))$results[, "Item", drop = TRUE]`
			`for (i in seq_len(length(datasets))) {`
			`dataset <- get(datasets[i], envir = asNamespace("AMR"))`
			`expect_identical(AMR:::dataset_UTF8_to_ASCII(dataset), dataset, info = datasets[i])`
			`}`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00			`}`

move object assignment to AMR_env 2022-10-14 13:02:50 +02:00			`df <- AMR:::AMR_env$MO_lookup`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`expect_true(all(c(`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`"mo", "fullname", "status", "kingdom", "phylum", "class", "order",`
			`"family", "genus", "species", "subspecies", "rank", "ref", "source",`
			`"lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence",`
			`"snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`) %in% colnames(df)))`
(v1.6.0.9031) tinytest unit tests 2021-05-15 21:36:22 +02:00
			`expect_inherits(AMR:::MO_CONS, "mo")`

styled, unit test fix 2022-08-28 10:31:50 +02:00			`uncategorised <- subset(`
			`microorganisms,`
			`genus == "Staphylococcus" &`
			`!species %in% c("", "aureus") &`
			`!mo %in% c(AMR:::MO_CONS, AMR:::MO_COPS)`
			`)`
			`expect_true(NROW(uncategorised) == 0,`
			`info = ifelse(NROW(uncategorised) == 0,`
			`"All staphylococcal species categorised as CoNS/CoPS.",`
			`paste0(`
			`"Staphylococcal species not categorised as CoNS/CoPS: S. ",`
New mo algorithm, prepare for 2.0 2022-10-05 09:12:22 +02:00			`uncategorised$species, " (", uncategorised$mo, ")",`
			`collapse = "\n"`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`)`
			`)`
			`)`
(v1.7.1.9074) as.mo() improvement, ASCII replacements for unit tests 2021-12-14 22:39:23 +01:00
			`# THIS WILL CHECK NON-ASCII STRINGS IN ALL FILES:`

			`# check_non_ascii <- function() {`
			`# purrr::map_df(`
			`# .id = "file",`
			`# # list common text files`
			`# .x = fs::dir_ls(`
			`# recurse = TRUE,`
			`# type = "file",`
			`# # ignore images, compressed`
			`# regexp = "\\.(png\|ico\|rda\|ai\|tar.gz\|zip\|xlsx\|csv\|pdf\|psd)$",`
			`# invert = TRUE`
			`# ),`
			`# .f = function(path) {`
			`# x <- readLines(path, warn = FALSE)`
			`# # from tools::showNonASCII()`
			`# asc <- iconv(x, "latin1", "ASCII")`
			`# ind <- is.na(asc) \| asc != x`
			`# # make data frame`
			`# if (any(ind)) {`
			`# tibble::tibble(`
			`# row = which(ind),`
			`# line = iconv(x[ind], "latin1", "ASCII", sub = "byte")`
			`# )`
			`# } else {`
			`# tibble::tibble()`
			`# }`
			`# }`
			`# )`
			`# }`
styled, unit test fix 2022-08-28 10:31:50 +02:00			`# x <- check_non_ascii() %>%`
(v1.7.1.9074) as.mo() improvement, ASCII replacements for unit tests 2021-12-14 22:39:23 +01:00			`# filter(file %unlike% "^(data-raw\|docs\|git_)")`