144 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			144 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
require(tidyverse)
 | 
						||
 | 
						||
source('globalData.R')
 | 
						||
reqColumns = abAbbrevs
 | 
						||
 | 
						||
patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 
 | 
						||
                      'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 
 | 
						||
                      'patientid', 'birth_date', 'department', 'type_dept', 
 | 
						||
                      'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
 | 
						||
 | 
						||
readPatch = function(filename, reqColumns) {
 | 
						||
  patch = read_rds(filename)
 | 
						||
  for (c in 1:length(reqColumns)) {
 | 
						||
    if (!reqColumns[c] %in% colnames(patch)) { 
 | 
						||
      patch = patch %>% 
 | 
						||
        add_column(!!reqColumns[c] := as.rsi(NA)) %>%
 | 
						||
        select(order(colnames(.))) %>%
 | 
						||
        relocate(all_of(patchFirstColumns), .before = 1)
 | 
						||
    }
 | 
						||
  }
 | 
						||
  return(patch)
 | 
						||
}
 | 
						||
 | 
						||
readAllPatches = function() {
 | 
						||
  tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds'))
 | 
						||
  
 | 
						||
  tPatches = tPatches %>% 
 | 
						||
    separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>%
 | 
						||
    separate(tail, into = c('to', 'ext'), sep = '\\.') %>%
 | 
						||
    filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>%
 | 
						||
    mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>%
 | 
						||
    arrange(from, to)
 | 
						||
  
 | 
						||
  return(tPatches)
 | 
						||
}
 | 
						||
 | 
						||
stitchPatches = function(tPatches, dataPath = 'data', reqColumns) {
 | 
						||
  for (i in 1:nrow(tPatches)) {
 | 
						||
    # patch = read_rds(file.path(dataPath, tPatches[i,]$name))
 | 
						||
    # for (c in 1:length(reqColumns)) {
 | 
						||
    #   if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) }
 | 
						||
    # }
 | 
						||
    cat(paste0('  patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n'))
 | 
						||
    patch = readPatch(filename = file.path(dataPath, tPatches$name[i]), 
 | 
						||
                      reqColumns = reqColumns)
 | 
						||
    # print(colnames(patch))
 | 
						||
    if (i == 1) {
 | 
						||
      tStitched = patch
 | 
						||
    } else {
 | 
						||
      tStitched = tStitched %>% add_row(patch)
 | 
						||
    }
 | 
						||
  }
 | 
						||
  tStitched = tStitched %>% arrange(date, sampleid)
 | 
						||
  
 | 
						||
  return(tStitched)
 | 
						||
}
 | 
						||
 | 
						||
appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) {
 | 
						||
  assert_that(file.exists(fnTotalSofar))#, 
 | 
						||
              # msg = paste0("File ", fnTotalSofar, " doesn't exist"))
 | 
						||
  assert_that(file.exists(fnDay))
 | 
						||
  
 | 
						||
  tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns)
 | 
						||
  tDay        = readPatch(fnDay, reqColumns = reqColumns)
 | 
						||
  
 | 
						||
  tTotal = tTotalSofar %>% add_row(tDay)
 | 
						||
  
 | 
						||
  write_rds(x = tTotal, file = fnTotal, compress = 'gz')
 | 
						||
  
 | 
						||
  return(tTotal)
 | 
						||
}
 | 
						||
 | 
						||
manualStitch_2022_09_30 = function() {
 | 
						||
  a = 
 | 
						||
    mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion
 | 
						||
    select(order(colnames(.))) %>%
 | 
						||
    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
 | 
						||
               Materiaal, specimen_type, specimen_group, uuid, patientid,
 | 
						||
               old_patientid, birth_date, old_birth_date, department,
 | 
						||
               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
 | 
						||
  # a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before
 | 
						||
  
 | 
						||
  b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>%
 | 
						||
    select(order(colnames(.))) %>%
 | 
						||
    relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
 | 
						||
               Materiaal, specimen_type, specimen_group, patientid,
 | 
						||
               birth_date, department,
 | 
						||
               type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
 | 
						||
  b = b %>% mutate(across(AMC:VOR, as.rsi))
 | 
						||
  
 | 
						||
  tTotal = a %>% 
 | 
						||
    add_row(b) %>%
 | 
						||
    mutate(MIC = as.mic(MIC),
 | 
						||
           mo = as.mo(mo))
 | 
						||
  # Warning: Problem with `mutate()` column `mo`.
 | 
						||
  # ℹ `mo = as.mo(mo)`.
 | 
						||
  # ℹ 
 | 
						||
  # in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair",
 | 
						||
  # "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne".
 | 
						||
  # Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo).
 | 
						||
  # You can also use your own reference data with set_mo_source() or directly, e.g.:
 | 
						||
  #   as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
 | 
						||
  # mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
 | 
						||
  # Warning message:
 | 
						||
  #   Problem with `mutate()` column `MIC`.
 | 
						||
  # ℹ `MIC = as.mic(MIC)`.
 | 
						||
  # ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431" 
 | 
						||
  
 | 
						||
  write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz')
 | 
						||
}
 | 
						||
 | 
						||
lastLargePatchesStitching = function() manualStitch_2022_09_30()
 | 
						||
 | 
						||
# > mo_uncertainties()
 | 
						||
# Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name.
 | 
						||
# See `?mo_matching_score`.
 | 
						||
# 
 | 
						||
# "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316)
 | 
						||
# Also matched: Brevibacterium luteolum (0.304)
 | 
						||
# 
 | 
						||
# "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284)
 | 
						||
 | 
						||
# read 1
 | 
						||
if (F) {
 | 
						||
  tPatches = readAllPatches()
 | 
						||
  tStitched = stitchPatches(tPatches)
 | 
						||
  tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds',
 | 
						||
                          compress = 'gz')
 | 
						||
 | 
						||
    x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds')
 | 
						||
  x = x %>% relocate(all_of(cns), .after = 'date') %>% 
 | 
						||
    relocate(specimen_group, .after = specimen_type) %>%
 | 
						||
    relocate(MIC, .after = mo)
 | 
						||
  y = x %>% mutate(across(AMB:DAP, as.rsi))
 | 
						||
  z = y %>% mutate(mo = as.mo(mo))
 | 
						||
  mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA')
 | 
						||
  
 | 
						||
  write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds', 
 | 
						||
            compress = 'gz')
 | 
						||
  write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds', 
 | 
						||
            compress = 'gz')
 | 
						||
}
 | 
						||
 |