# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis                              #
#                                                                      #
# SOURCE                                                               #
# https://gitlab.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)  #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
#                                                                      #
# This R package was created for academic research and was publicly    #
# released in the hope that it will be useful, but it comes WITHOUT    #
# ANY WARRANTY OR LIABILITY.                                           #
# Visit our website for more info: https://msberends.gitab.io/AMR.     #
# ==================================================================== #

context("mo.R")

test_that("as.mo works", {

  library(dplyr)
  MOs <- AMR::microorganisms %>% filter(!is.na(mo), nchar(mo) > 3)
  expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo)))

  expect_identical(
    as.character(as.mo(c("E. coli", "H. influenzae"))),
    c("B_ESCHR_COL", "B_HMPHL_INF"))

  expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COL")
  expect_equal(as.character(as.mo("Escherichia  coli")), "B_ESCHR_COL")
  expect_equal(as.character(as.mo("Escherichia  species")), "B_ESCHR")
  expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
  expect_equal(as.character(as.mo(" B_ESCHR_COL ")), "B_ESCHR_COL")
  expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COL") # not Campylobacter
  expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNE")
  expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
  expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNE_RHI") # K. pneumoniae subspp. rhinoscleromatis
  expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
  expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DIF")
  expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNE")
  expect_equal(as.character(as.mo("Strepto")), "B_STRPTC")
  expect_equal(as.character(as.mo("Streptococcus")), "B_STRPTC") # not Peptostreptoccus

  expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("B_STRPTC_GRA", "B_STRPTC_GRB"))

  expect_equal(as.character(as.mo("S. pyo")), "B_STRPTC_PYO") # not Actinomyces pyogenes

  expect_equal(as.character(as.mo("P. aer")), "B_PDMNS_AER") # not Pasteurella aerogenes

  # GLIMS
  expect_equal(as.character(as.mo("bctfgr")), "B_BCTRD_FRA")

  expect_equal(as.character(as.mo("MRSE")), "B_STPHY_EPI")
  expect_equal(as.character(as.mo("VRE")), "B_ENTRC")
  expect_equal(as.character(as.mo("MRPA")), "B_PDMNS_AER")
  expect_equal(as.character(as.mo("PISP")), "B_STRPTC_PNE")
  expect_equal(as.character(as.mo("PRSP")), "B_STRPTC_PNE")
  expect_equal(as.character(as.mo("VISP")), "B_STRPTC_PNE")
  expect_equal(as.character(as.mo("VRSP")), "B_STRPTC_PNE")

  expect_equal(as.character(as.mo("CNS")), "B_STPHY_CNS")
  expect_equal(as.character(as.mo("CoNS")), "B_STPHY_CNS")
  expect_equal(as.character(as.mo("CPS")), "B_STPHY_CPS")
  expect_equal(as.character(as.mo("CoPS")), "B_STPHY_CPS")

  expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAMN", "B_GRAMP"))

  # prevalent MO
  expect_identical(
    as.character(
      as.mo(c("stau",
              "STAU",
              "staaur",
              "S. aureus",
              "S aureus",
              "Staphylococcus aureus",
              "MRSA",
              "VISA"))),
    rep("B_STPHY_AUR", 8))
  # unprevalent MO
  expect_identical(
    as.character(
      as.mo(c("buno",
              "BUNO",
              "burnod",
              "B. nodosa",
              "B nodosa",
              "Burkholderia nodosa"))),
    rep("B_BRKHL_NOD", 6))

  # empty values
  expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3))
  # too few characters
  expect_warning(as.mo("ab"))

  expect_equal(suppressWarnings(as.character(as.mo(c("Qq species", "", "CRS", "K. pneu rhino", "esco")))),
               c(NA_character_, NA_character_, "B_STNTR_MAL", "B_KLBSL_PNE_RHI", "B_ESCHR_COL"))

  # check for Becker classification
  expect_identical(as.character(as.mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPI")
  expect_identical(as.character(as.mo("S. epidermidis", Becker = TRUE)),  "B_STPHY_CNS")
  expect_identical(as.character(as.mo("STAEPI",         Becker = TRUE)),  "B_STPHY_CNS")
  expect_identical(as.character(as.mo("S. intermedius", Becker = FALSE)), "B_STRPTC_INT") # Strep (!) intermedius
  expect_identical(as.character(as.mo("Sta intermedius",Becker = FALSE)), "B_STPHY_INT")
  expect_identical(as.character(as.mo("Sta intermedius",Becker = TRUE)),  "B_STPHY_CPS")
  expect_identical(as.character(as.mo("STAINT",         Becker = TRUE)),  "B_STPHY_CPS")
  # aureus must only be influenced if Becker = "all"
  expect_identical(as.character(as.mo("STAAUR", Becker = FALSE)), "B_STPHY_AUR")
  expect_identical(as.character(as.mo("STAAUR", Becker = TRUE)),  "B_STPHY_AUR")
  expect_identical(as.character(as.mo("STAAUR", Becker = "all")), "B_STPHY_CPS")

  # check for Lancefield classification
  expect_identical(as.character(as.mo("S. pyogenes", Lancefield = FALSE)),    "B_STRPTC_PYO")
  expect_identical(as.character(as.mo("S. pyogenes", Lancefield = TRUE)),     "B_STRPTC_GRA")
  expect_identical(as.character(as.mo("STCPYO",      Lancefield = TRUE)),     "B_STRPTC_GRA") # group A
  expect_identical(as.character(as.mo("S. agalactiae",  Lancefield = FALSE)), "B_STRPTC_AGA")
  expect_identical(as.character(as.mo("S. agalactiae",  Lancefield = TRUE)),  "B_STRPTC_GRB") # group B
  expect_identical(as.character(as.mo("S. equisimilis", Lancefield = FALSE)), "B_STRPTC_DYS_EQU")
  expect_identical(as.character(as.mo("S. equisimilis", Lancefield = TRUE)),  "B_STRPTC_GRC") # group C
  # Enterococci must only be influenced if Lancefield = "all"
  expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)),     "B_ENTRC_IUM")
  expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)),      "B_ENTRC_IUM")
  expect_identical(as.character(as.mo("E. faecium", Lancefield = "all")),     "B_STRPTC_GRD") # group D
  expect_identical(as.character(as.mo("S. anginosus",   Lancefield = FALSE)), "B_STRPTC_ANG")
  expect_identical(as.character(as.mo("S. anginosus",   Lancefield = TRUE)),  "B_STRPTC_GRF") # group F
  expect_identical(as.character(as.mo("S. sanguinis",   Lancefield = FALSE)), "B_STRPTC_SAN")
  expect_identical(as.character(as.mo("S. sanguinis",   Lancefield = TRUE)),  "B_STRPTC_GRH") # group H
  expect_identical(as.character(as.mo("S. salivarius",  Lancefield = FALSE)), "B_STRPTC_SAL")
  expect_identical(as.character(as.mo("S. salivarius",  Lancefield = TRUE)),  "B_STRPTC_GRK") # group K

  library(dplyr)

  # select with one column
  expect_identical(
    septic_patients[1:10,] %>%
      left_join_microorganisms() %>%
      select(genus) %>%
      as.mo() %>%
      as.character(),
    c("B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY",
      "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY"))

  # select with two columns
  expect_identical(
    septic_patients[1:10,] %>%
      pull(mo),
    septic_patients[1:10,] %>%
      left_join_microorganisms() %>%
      select(genus, species) %>%
      as.mo() %>%
      as.character())

  # unknown results
  expect_warning(as.mo(c("INVALID", "Yeah, unknown")))

  # too many columns
  expect_error(septic_patients %>% select(1:3) %>% as.mo())

  # print
  expect_output(print(as.mo(c("B_ESCHR_COL", NA))))

  # helper function
  expect_identical(as.mo("B_ESCHR_COL"),
                   as.mo("B_ESCHR_COL"))

  # test pull
  expect_equal(nrow(septic_patients %>% mutate(mo = as.mo(mo))),
               2000)

  # test data.frame
  expect_equal(nrow(data.frame(test = as.mo("B_ESCHR_COL"))),
               1)

  # check empty values
  expect_equal(as.character(suppressWarnings(as.mo(""))),
               NA_character_)

  # check less prevalent MOs
  expect_equal(as.character(as.mo("Gomphosphaeria aponina delicatula")), "B_GMPHS_APO_DEL")
  expect_equal(as.character(as.mo("Gomphosphaeria apo del")), "B_GMPHS_APO_DEL")
  expect_equal(as.character(as.mo("G apo deli")), "B_GMPHS_APO_DEL")
  expect_equal(as.character(as.mo("Gomphosphaeria  aponina")), "B_GMPHS_APO")
  expect_equal(as.character(as.mo("Gomphosphaeria  species")), "B_GMPHS")
  expect_equal(as.character(as.mo("Gomphosphaeria")), "B_GMPHS")
  expect_equal(as.character(as.mo(" B_GMPHS_APO ")), "B_GMPHS_APO")
  expect_equal(as.character(as.mo("g aponina")), "B_GMPHS_APO")

  # check old names
  expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLA")
  # - Didymosphaeria spartinae (unprevalent)
  expect_warning(suppressMessages(as.mo("D spartin", allow_uncertain = TRUE)))
  # - was renames to Leptosphaeria obiones
  expect_equal(suppressWarnings(suppressMessages(as.character(as.mo("D spartin", allow_uncertain = TRUE)))),
                                "F_LPTSP_OBI")

  # check uncertain names
  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = FALSE))), NA_character_)
  expect_equal(suppressWarnings(as.character(as.mo("esco extra_text", allow_uncertain = TRUE))), "B_ESCHR_COL")
  expect_warning(as.mo("esco extra_text", allow_uncertain = TRUE))

  # predefined reference_df
  expect_equal(as.character(as.mo("TestingOwnID",
                                  reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
               "B_ESCHR_COL")
  expect_equal(as.character(as.mo(c("TestingOwnID", "E. coli"),
                                  reference_df = data.frame(a = "TestingOwnID", b = "B_ESCHR_COL"))),
               c("B_ESCHR_COL", "B_ESCHR_COL"))
  expect_warning(as.mo("TestingOwnID", reference_df = NULL))
  expect_error(as.mo("E. coli", reference_df = data.frame(a = "TestingOwnID")))

  # combination of existing mo and certe
  expect_identical(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL"))),
                   c("B_ESCHR_COL", "B_ESCHR_COL"))

  # TSN of prevalent and non prevalent ones
  expect_equal(mo_TSN(c("Gomphosphaeria aponina delicatula", "Escherichia coli")),
               c(717, 285))

  expect_equal(mo_fullname(c("E. spp.",
                             "E. spp",
                             "E. species")),
               rep("Escherichia species", 3))

  # from different sources
  expect_equal(as.character(as.mo(
    c("PRTMIR", "bclcer", "B_ESCHR_COL"))),
    c("B_PROTS_MIR", "B_BCLLS_CER", "B_ESCHR_COL"))

  # hard to find
  expect_equal(as.character(suppressWarnings(as.mo(
    c("Microbacterium paraoxidans",
      "Streptococcus suis (bovis gr)",
      "Raoultella (here some text) terrigena")))),
    c("B_MCRBC", "B_STRPTC_SUI", "B_RLTLL_TER"))

  # Salmonella (City) are all actually Salmonella enterica spp (City)
  expect_equal(as.character(suppressMessages(as.mo("Salmonella Goettingen"))),
               "B_SLMNL_ENT")
})