# initially only interfacing 4D data

require(assertthat, quietly = T)

source('4D_archive.R')
source('anonymize.R')
source('globalData.R')


readGlimsArchive = function(ymdStartDate, ymdEndDate, 
                            readPreprocessed = T, 
                            tFiles = NULL, tFileOvw = NULL) {
  if (readPreprocessed) {
    # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
    # return(read_rds('data/total_2022-09-30.rds'))
    return(read_rds('data/bk_2019.rds'))
    # return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
    # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
    # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
  } else {
    assert_that(!is.null(tFiles), 
                msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
    tArchive = readArchive(
      tFileInfo = tFiles %>% 
        filter(
          fileDate >= ymdStartDate,
          fileDate <= ymdEndDate
        ),
      tFileOvw = tFileOvw,
      createDebugList = F # save memory 
    )
    
    bk = NULL
    if (!is.null(tArchive)) {
      radarData <- tArchive %>%
        # work towards RadaR nomenclature
        rename(sampleid = Monsternummer,
               patientid = PIN.Lot,
               department = ligafdeling,
               specialism = B,
               mo = MO) %>% 
        # some processing towards RadaR format
        mutate(
          gender = Geslacht,
          # case_when(Geslacht == 'M' ~ 'Male',
          #                    Geslacht == 'V' ~ 'Female') %>% as.factor(),
          type_dept = specialism,
          birth_date = dmy(Geboortedatum),
          age = trunc((birth_date %--% date) / years(1)),
          Materiaal = replace_na(Materiaal, ''),
          Materiaal = str_trim(Materiaal),
          # Rapport = replace_na(Rapport, ''),
          # Rapport = str_trim(Rapport),
          specimen_group = 'bk',
          specimen_type = '',
          is_icu = is_icu(department),
          is_clinical = is_clinical(department),
          is_outward = is_outward(department)
        ) %>%
        # get AMR package (WHONET, EARS-Net std) mnemonics and names
        left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
        rename(ab.amr = `EARS-Net.Mnemonic`) %>%
        # get the fields used in RadaR only
        select(
          sampleid, date, IsolNr, Materiaal,
          specimen_group, specimen_type, 
          department, type_dept, specialism,
          is_icu, is_clinical, is_outward,
          birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
        ) 
      
      # return(radarData)
      
      # these sub steps are for debugging purposes
      z <- radarData %>% 
        mutate(IsolNr = as.numeric(IsolNr)) %>% 
        arrange(sampleid, department) %>%
        group_by(sampleid) %>% 
        # fill repeating data
        fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
        filter(#!is.na(mo), 
               #!is.na(RIS), 
               IsolNr == max(IsolNr, na.rm = T)) %>% 
        ungroup() %>%
        distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
        # # get distinctive rows, arrange not strictly necessary
        # # arrange(sampleid, date, department, type_dept, specialism, 
        # #         birth_date, age, gender, patientid, mo, ab.amr, RIS) %>% 
        # # commented 2022-10-14 by GB in order to get the slicing done correctly
        # # group_by(date, sampleid, IsolNr, department, type_dept, specialism, 
        # #           birth_date, age, gender, patientid, mo, ab.amr) %>%
        # group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
        # slice(n()) %>% 
        # ungroup() %>%
        # distinct(date, sampleid, IsolNr, department, type_dept, specialism, 
        #          birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T)  %>%
        # # filter only reported rows, with a micro-organism and RIS value
        # filter(
        #   !is.na(RAP)
        #   # !is.na(mo),
        #   # !is.na(RIS)
        # )
        
      return(z)
      
      # pivot to wider format
      bk <- z %>% 
        filter(!is.na(RIS)) %>%
        mutate(
          RIS = as.rsi(RIS),
          MIC = as.mic(MIC),
          mo = as.mo(mo)) %>%
        pivot_wider(names_from = ab.amr, values_from = RIS) %>%
        select(order(colnames(.))) %>%
        relocate(all_of(patchFirstColumns), .before = 1) 
      
      
      # bk = bk %>%
      #   mutate(
      #     across( 
      #       (which(colnames(.) == 'mo') + 1) : length(colnames(.)), 
      #       as.rsi) 
      #   )
    }
    return(bk)
  }
}


readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
  readGlimsArchive(ymdStartDate = ymdDate,
                   ymdEndDate   = ymdDate, 
                   readPreprocessed = F, 
                   tFiles = tFiles, 
                   tFileOvw = tFileOvw)
}

readArchiveToday = function(tFiles, tFileOvw) {
  readGlimsArchive(ymdStartDate = today(), 
                   ymdEndDate   = today(), 
                   readPreprocessed = F, 
                   tFiles = tFiles, 
                   tFileOvw = tFileOvw)
}

# anonymize the dataset
if (F) {
  nopd = readArchiveToday()
  # nopd = nopd %>%
  # filter(str_length(patientid) == 7) %>%
  # rowwise() %>%
  # mutate(patientid = getUupin(patientid)) %>%
  # ungroup() %>%
  # mutate(birth_date = ymd('1970-01-01'))
  
  
  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
  if (!file.exists(filename)) {
    write_rds(nopd, file = filename)
  }
}

# stich together processed parts of the archive
if (F) {
  tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
}

# testing the slicing
if (F) {
  ymdStartDate = ymd('2022-09-01')
  
  # find interesting cases
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    # slice(n()) %>% 
    ungroup() %>%
    select(1:3, 13:ncol(.)) %>% 
    filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)
  
  # show without slicing last row
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    # slice(n()) %>% 
    select(1:3, 13:ncol(.)) %>% 
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
  
  # show with slicing last row
  radarData %>%
    arrange(sampleid, department) %>% 
    group_by(sampleid) %>% 
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>% 
    ungroup() %>% 
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>% 
    slice(n()) %>% 
    select(1:3, 13:ncol(.)) %>% 
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))
  
}