radar2/interface_4D_final_results.R

suppressMessages({
  require(tidyverse)
  require(lubridate)
  require(vroom)
  require(writexl)
  require(AMR)
})


minMemoryAsap = F


# Rapport ab:
#   altijd wel hebben,
# # = niet gerapporteerd
#
# Rap st:
#   # = niet gerapporteerd, niet opnemen als niet gerapporteerd
# niet gerapporteerd kan fout zijn

dataPath = 'interfaces/4D_final_results/data'
# Antibiotica data
fnAB = 'AB-2021-22.csv'
# Microorganism data
fnST = 'ST-Alles.csv'
# Monster data
fnMS = 'MSv2-2022-tm10.csv'
# Isolaattesten data
fnTS = 'TS-Alles-221130-092256.csv'
# Tests data
fnTM = 'TM-2022-tm10.csv'

source('globalData.R')
source('interfaces.R')

# temporarily put here for dev
tAntibiotics = readxl::read_xlsx('data/ab_mapping.xlsx') %>%
  select(Mnemonic, Naam, `EARS-Net.Mnemonic`, `EARS-Net.Name`)


tAB = vroom(file = file.path(dataPath, fnAB), show_col_types = F)
tST = vroom(file = file.path(dataPath, fnST), show_col_types = F)
tMS = vroom(file = file.path(dataPath, fnMS), skip = 5, show_col_types = F)
tTS = vroom(file = file.path(dataPath, fnTS), show_col_types = F)
tTM = vroom(file = file.path(dataPath, fnTM), show_col_types = F)

tAB = tAB %>%
  filter(Monsternummer %>% str_sub(1, 2) == '22') %>%
  mutate(Monsternummer = as.character(Monsternummer))

# gerapporteerde isolaten
tST = tST %>%
  filter(
    Monsternummer %>% str_sub(1, 2) == '22',
    # '#' = niet gerapporteerd, deze niet includeren
    # in ST betekent dit: alleen NA wel includeren
    is.na(Rap) # = gerapporteert
  ) %>%
  select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
  mutate(Monsternummer = as.character(Monsternummer))

tMS = tMS %>%
  filter(
    Monsternr %>% str_sub(1, 2) == '22',
    ObjType != 'L'
    # Materiaal == 'MP_BLOED' # 118922, 2023-02-07
  ) %>%
  rename(Monsternummer = Monsternr,
         patientid = `PIN-lot`) %>%
  select(-`Materiaal Variabelen`, ObjType, Lot, Kamer) %>%
  mutate(date = dmy_hms(paste(OntvOLabDt, as.character(Tijd)))) %>%
  select(-OntvOLabDt, -Tijd) %>%
  mutate(Monsternummer = as.character(Monsternummer))

tTS = tTS %>%
  mutate(Monsternummer = as.character(Monsternummer))

# MS monsters die geen isolaat hebben zijn negatief
# vandaar nu de right_join
# Vervolgens moeten uit de MS file alleen de kweekbepalingen worden geincludeerd

x = tMS %>%
  mutate(Monsternummer = str_trim(Monsternummer)) %>%
  left_join(tST, by = c('Monsternummer')) %>%
  left_join(tAB, by = c('Monsternummer', 'Isolnr' = 'IsolNr')) %>%
  left_join(tTS,
            by = c('Monsternummer', 'Isolnr' = 'Isolaatnr'),
            suffix = c('.MSxSTxAB', '.TS_IsolaatTest'))

if (minMemoryAsap) {
  # save memory
  rm(tAB, tST, tMS)
}

x = x %>%
  # select(-`UMCG 4D Uniekmaker Specimen - IsolNr`) %>%
  mutate(year = str_sub(Monsternummer, 1, 2) %>% as.numeric(),
         weeknr = str_sub(Monsternummer, 3, 4) %>% as.numeric()) %>%
  # smaller subset in order to keep the first explorations workable
  filter(year == 22)
         # weeknr >= 41)

s = x %>%
  # filter(Materiaal == 'MP_BLOED') %>% # 118922, 2023-02-07
  # fields expected by RadaR:
  #
  # sampleid <chr>, date <date>, specimen_group <chr>, specimen_type <chr>,
  # department <chr>, type_dept <fct>, specialism <chr>,
  # is_icu <lgl>, is_clinical <lgl>, is_outward <lgl>,
  # age <int>, gender <fct>, patientid <chr>, mo <mo>,
  #
  # align column names
  rename(
    sampleid = Monsternummer,
    # mo = MO, # later by as.mo(MO)
    rap.st = Rap,
    rap.ab = Rapport.MSxSTxAB,
    department = LigAfd,
    specialism = Spec,
    age = Leeftijd,
    gender = Geslacht
  ) %>%
  # add yet missing fields
  add_column(
    specimen_group = 'bk',
    specimen_type = '',
    type_dept = '' # needed?
  ) %>%
  mutate(
    is_icu = is_icu(department),
    is_clinical = is_clinical(department),
    is_outward = is_outward(department),
  ) %>%
  relocate(rap.ab, .after = rap.st) %>%
  relocate(c(year, weeknr, date), .after = sampleid) %>%
  # get AMR package (WHONET, EARS-Net std) mnemonics and names
  left_join(tAntibiotics, by = c('AB' = 'Mnemonic')) %>%
  rename(
    AB_WHO = `EARS-Net.Mnemonic`
  ) %>%
  select(-AB)

if (minMemoryAsap) {
  # save memory
  rm(x)
}

# TODO: find diff in multiple rows per sampleid that yield <list> io <rsi>
if (F) {
  s %>% filter(sampleid == '22010009431') %>%
    select(sampleid, AB_WHO, RIS, MIC, MO) %>%
    mutate(
      RIS = as.rsi(RIS),
      MIC = as.mic(MIC),
      mo = as.mo(MO)) %>%
    pivot_wider(names_from = AB_WHO, values_from = RIS)
  # diff: rap.ab, Etst, Naam, AB_WHO, `EARS-Net.Name`
}

x = s %>%
  select(-`EARS-Net.Name`, -Naam,
         # door de volgende waarden ontstaan dubbelingen in radarData, daarom
         # tijdelijk wegfilteren. maar gaat later wel nodig zijn
         -Rapport.TS_IsolaatTest, -Waarde.TS_IsolaatTest, -isolaattest,
         -MIC, -Etst, -Diff, -rap.ab) %>%
  # RIS values of 'V' are a pain in the neck
  mutate(RIS = replace(RIS, RIS == 'V', NA)) %>%
  group_by(sampleid, Isolnr, MO, AB_WHO) %>%
  # mark rows to be deleted
  mutate(deleteRow = n() > 1 & is.na(RIS) & !is.na(MO)) %>%
  ungroup() %>%
  # delete the rows that are marked with deleteRow
  filter(!deleteRow) %>%
  # remove column deleteRow
  select(-deleteRow) %>%
  # all multiple values should be absent now, so get distinct rows
  distinct(sampleid, Isolnr, AB_WHO, RIS, .keep_all = T)

y = x %>%
  pivot_wider(
    id_cols = c(sampleid, Isolnr, MO),
    names_from = AB_WHO,
    values_from = RIS
  )

radarData = x %>%
  # filter(!is.na(AB_WHO)) %>%
  mutate(
    RIS = as.rsi(RIS),
    # MIC = as.mic(MIC),
    mo = as.mo(MO)) %>%
  pivot_wider(names_from = AB_WHO, values_from = RIS) %>%
  # pivot_wider(names_from = Rapport.TS_IsolaatTest,
  #             values_from = Waarde.TS_IsolaatTest)
  select(order(colnames(.))) %>%
  relocate(
    all_of(
      c(
        'sampleid', 'Isolnr', 'patientid', 'year', 'weeknr', 'date', 'MMBProc',
        'department', 'type_dept', 'specialism', 'is_clinical', 'is_icu', 'is_outward',
        'specimen_type', 'specimen_group',
        'rap.st',
        # 'rap.ab',
        'age', 'gender',
        # 'Diff', 'Etst',
        'Waarde.MSxSTxAB',
        # 'Waarde.TS_IsolaatTest',
        # 'MIC',
        'MO', 'mo', 'Pos'
      )
    ),
    .before = 1
  ) %>%
  select(-`NA`)

if (minMemoryAsap) {
  # save memory
  rm(s)
}

radarData = radarData %>%
  filter(date >= '2022-01-01', date <= '2022-09-30')


if (F) {
  # TODO je verwacht meer Neg dan Pos
  # DONE hiervoor tMS left_join tST ipv andersom
  radarData %>% filter(!is.na(Pos), MO == 'mrsa') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'esccol') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'pseaer') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'klepne') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'canalb') %>% count(patientid)
  radarData %>% filter(!is.na(Pos), MO == 'enbcco') %>% count(patientid)

  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'enbcco') %>% count(patientid)
  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'esccol') %>% count(patientid)
  radarData %>% filter(isolaattest == 'ESBL', Waarde.TS_IsolaatTest == '+', MO == 'klepne') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', GEN == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', CIP == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'klepne', MEM == 'R') %>% count(patientid)
  radarData %>% filter(MO == 'staaur', CLI == 'R') %>% count(patientid)

  radarData %>% filter(is.na(MO), is.na(rap.st))

  # rijen voor pat 8000005 in MS file: 36
  tMS %>% filter(patientid == '8000005') %>% count(Monsternummer)
  # rijen voor pat 8000005 in ST file: 23
  tST %>% filter(Monsternummer %in% (tMS %>% filter(patientid == '8000005') %>% pull(Monsternummer) %>% unique())) %>% group_by(Monsternummer) %>% mutate(n = n()) %>% print(n=25)
  # 22 * 36 = 792


  x %>% filter(sampleid == '22070019631') %>% select(-year, -weeknr, -date, - patientid, -age, -ObjType, -Lot, -gender, -PC, -department, -Type, -Kamer, -Isolnr, MO, -Waarde.MSxSTxAB, -Pos, -rap.st, -MIC, -Diff, -isolaattest, -Waarde.TS_IsolaatTest, -Rapport.TS_IsolaatTest, -Afname, -is_icu, -is_clinical, -is_outward) %>% print(n=38)

  x %>% filter(sampleid == '22070019631', Isolnr == 3) %>%
    # door deze waarden ontstaan dubbelingen in radarData, daarom
    # tijdelijk wegfilteren. maar gaat later wel nodig zijn
    # select(-MIC, -Etst, -Diff, -rap.ab) %>%
    pivot_wider(names_from = AB_WHO, values_from = RIS) %>% view()
    # pivot_wider(names_from = Rapport.TS_IsolaatTest,
    #             values_from = Waarde.TS_IsolaatTest) %>% view()

  listMultiValCols = function(x) {
    z = x
    colns = colnames(z)
    collens = c()
    for (c in 1:length(colns)) {
      collens = c(collens,
                  z %>%
                    group_by(sampleid, Isolnr) %>%
                    pull(colns[c]) %>% unique() %>% length())
    }
    multValCols = colns[which(collens != 1)]
    return(z %>% select(sampleid, patientid, multValCols, RIS))
  }

  # dit AB levert nog steeds een List
  x %>% filter(AB_WHO == 'CAZ') %>% count(sampleid, Isolnr, AB_WHO) %>% filter(n>1)

  x %>%
    filter(
      sampleid == '22070547731' %>% str_trim(),
      AB_WHO == 'CAZ',
      RIS != 'V'
    ) %>%
    select(sampleid, Isolnr, AB_WHO, RIS)

  key = c('sampleid', 'Isolnr')
  y %>% filter(!is.na(FOX))%>% select(key, FOX)

    x %>%
      select(sampleid, Isolnr, AB_WHO, RIS) %>%
      pivot_wider(id_cols = c(sampleid, Isolnr),
                  names_from = AB_WHO,
                  values_from = RIS) %>%
      select(1:5)


  selectRelCols = function(x, s = c()) {
    x %>% select(sampleid, Isolnr, MO, AB_WHO, RIS, all_of(s))
  }
}


# vroom_write(x = x,
#             file = file.path(getwd(), dataPath, 'ST x AB_2022.csv'),
#             delim = ';')