radar2/interface_4D.R

# initially only interfacing 4D data

require(assertthat, quietly = T)

source('4D_archive.R')
source('anonymize.R')
source('globalData.R')


readGlimsArchive = function(ymdStartDate, ymdEndDate,
                            readPreprocessed = T,
                            tFiles = NULL, tFileOvw = NULL) {
  if (readPreprocessed) {
    # return(read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds'))
    # return(read_rds('data/total_2022-09-30.rds'))
    return(read_rds('data/bk_2019.rds'))
    # return(read_rds('data/bk_2022.09.01_2022.10.13.rds'))
    # return(read_rds('data/archive_2022-01-01_2022-10-19_v2022-10-20_1530.rds')) # read_2_2022-10-20_1530
    # return(read_rds('data/read_10_2022-01-01_2022-10-25_x2022-10-25.rds'))
  } else {
    assert_that(!is.null(tFiles),
                msg = 'readGlimsArchive(.., readPreprocessed = F, ..)')
    tArchive = readArchive(
      tFileInfo = tFiles %>%
        filter(
          fileDate >= ymdStartDate,
          fileDate <= ymdEndDate
        ),
      tFileOvw = tFileOvw,
      createDebugList = F # save memory
    )

    bk = NULL
    if (!is.null(tArchive)) {
      radarData <- tArchive %>%
        # work towards RadaR nomenclature
        rename(sampleid = Monsternummer,
               patientid = PIN.Lot,
               department = ligafdeling,
               specialism = B,
               mo = MO) %>%
        # some processing towards RadaR format
        mutate(
          gender = Geslacht,
          # case_when(Geslacht == 'M' ~ 'Male',
          #                    Geslacht == 'V' ~ 'Female') %>% as.factor(),
          type_dept = specialism,
          birth_date = dmy(Geboortedatum),
          age = trunc((birth_date %--% date) / years(1)),
          Materiaal = replace_na(Materiaal, ''),
          Materiaal = str_trim(Materiaal),
          # Rapport = replace_na(Rapport, ''),
          # Rapport = str_trim(Rapport),
          specimen_group = 'bk',
          specimen_type = '',
          is_icu = is_icu(department),
          is_clinical = is_clinical(department),
          is_outward = is_outward(department)
        ) %>%
        # get AMR package (WHONET, EARS-Net std) mnemonics and names
        left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
        rename(ab.amr = `EARS-Net.Mnemonic`) %>%
        # get the fields used in RadaR only
        select(
          sampleid, date, IsolNr, Materiaal,
          specimen_group, specimen_type,
          department, type_dept, specialism,
          is_icu, is_clinical, is_outward,
          birth_date, age, gender, patientid, mo, RIS, ab.amr, MIC, RAP
        )

      # return(radarData)

      # these sub steps are for debugging purposes
      z <- radarData %>%
        mutate(IsolNr = as.numeric(IsolNr)) %>%
        arrange(sampleid, department) %>%
        group_by(sampleid) %>%
        # fill repeating data
        fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
        filter(#!is.na(mo),
               #!is.na(RIS),
               IsolNr == max(IsolNr, na.rm = T)) %>%
        ungroup() %>%
        distinct(sampleid, date, IsolNr, mo, ab.amr, .keep_all = T)
        # # get distinctive rows, arrange not strictly necessary
        # # arrange(sampleid, date, department, type_dept, specialism,
        # #         birth_date, age, gender, patientid, mo, ab.amr, RIS) %>%
        # # commented 2022-10-14 by GB in order to get the slicing done correctly
        # # group_by(date, sampleid, IsolNr, department, type_dept, specialism,
        # #           birth_date, age, gender, patientid, mo, ab.amr) %>%
        # group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
        # slice(n()) %>%
        # ungroup() %>%
        # distinct(date, sampleid, IsolNr, department, type_dept, specialism,
        #          birth_date, age, gender, patientid, mo, ab.amr, .keep_all = T)  %>%
        # # filter only reported rows, with a micro-organism and RIS value
        # filter(
        #   !is.na(RAP)
        #   # !is.na(mo),
        #   # !is.na(RIS)
        # )

      return(z)

      # pivot to wider format
      bk <- z %>%
        filter(!is.na(RIS)) %>%
        mutate(
          RIS = as.rsi(RIS),
          MIC = as.mic(MIC),
          mo = as.mo(mo)) %>%
        pivot_wider(names_from = ab.amr, values_from = RIS) %>%
        select(order(colnames(.))) %>%
        relocate(all_of(patchFirstColumns), .before = 1)


      # bk = bk %>%
      #   mutate(
      #     across(
      #       (which(colnames(.) == 'mo') + 1) : length(colnames(.)),
      #       as.rsi)
      #   )
    }
    return(bk)
  }
}


readArchiveOfDay = function(tFiles, tFileOvw, ymdDate) {
  readGlimsArchive(ymdStartDate = ymdDate,
                   ymdEndDate   = ymdDate,
                   readPreprocessed = F,
                   tFiles = tFiles,
                   tFileOvw = tFileOvw)
}

readArchiveToday = function(tFiles, tFileOvw) {
  readGlimsArchive(ymdStartDate = today(),
                   ymdEndDate   = today(),
                   readPreprocessed = F,
                   tFiles = tFiles,
                   tFileOvw = tFileOvw)
}

# anonymize the dataset
if (F) {
  nopd = readArchiveToday()
  # nopd = nopd %>%
  # filter(str_length(patientid) == 7) %>%
  # rowwise() %>%
  # mutate(patientid = getUupin(patientid)) %>%
  # ungroup() %>%
  # mutate(birth_date = ymd('1970-01-01'))


  filename = paste0('data/bk_', ymdStartDate, '_', ymdEndDate, '.rds')
  if (!file.exists(filename)) {
    write_rds(nopd, file = filename)
  }
}

# stich together processed parts of the archive
if (F) {
  tibble(name = list.files(path = 'data/', pattern = 'bk_.*_.*\\.rds'))
}

# testing the slicing
if (F) {
  ymdStartDate = ymd('2022-09-01')

  # find interesting cases
  radarData %>%
    arrange(sampleid, department) %>%
    group_by(sampleid) %>%
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
    ungroup() %>%
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
    # slice(n()) %>%
    ungroup() %>%
    select(1:3, 13:ncol(.)) %>%
    filter(ab.amr %in% c('CAZ', 'CTX')) %>% filter(!is.na(mo)) %>% count(date, sampleid, IsolNr, mo, ab.amr) %>% filter(n>1)

  # show without slicing last row
  radarData %>%
    arrange(sampleid, department) %>%
    group_by(sampleid) %>%
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
    ungroup() %>%
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
    # slice(n()) %>%
    select(1:3, 13:ncol(.)) %>%
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))

  # show with slicing last row
  radarData %>%
    arrange(sampleid, department) %>%
    group_by(sampleid) %>%
    fill(patientid, age, gender, birth_date, department, type_dept, specialism) %>%
    ungroup() %>%
    group_by(date, sampleid, IsolNr, mo, ab.amr) %>%
    slice(n()) %>%
    select(1:3, 13:ncol(.)) %>%
    filter(sampleid == '22360599331', ab.amr %in% c('CAZ', 'CTX'))

}