require(tidyverse) source('globalData.R') reqColumns = abAbbrevs patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward', 'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group', 'patientid', 'birth_date', 'department', 'type_dept', 'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo') readPatch = function(filename, reqColumns) { patch = read_rds(filename) for (c in 1:length(reqColumns)) { if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := as.rsi(NA)) %>% select(order(colnames(.))) %>% relocate(all_of(patchFirstColumns), .before = 1) } } return(patch) } readAllPatches = function() { tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds')) tPatches = tPatches %>% separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>% separate(tail, into = c('to', 'ext'), sep = '\\.') %>% filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>% mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>% arrange(from, to) return(tPatches) } stitchPatches = function(tPatches, dataPath = 'data', reqColumns) { for (i in 1:nrow(tPatches)) { # patch = read_rds(file.path(dataPath, tPatches[i,]$name)) # for (c in 1:length(reqColumns)) { # if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) } # } cat(paste0(' patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n')) patch = readPatch(filename = file.path(dataPath, tPatches$name[i]), reqColumns = reqColumns) # print(colnames(patch)) if (i == 1) { tStitched = patch } else { tStitched = tStitched %>% add_row(patch) } } tStitched = tStitched %>% arrange(date, sampleid) return(tStitched) } appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) { assert_that(file.exists(fnTotalSofar))#, # msg = paste0("File ", fnTotalSofar, " doesn't exist")) assert_that(file.exists(fnDay)) tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns) tDay = readPatch(fnDay, reqColumns = reqColumns) tTotal = tTotalSofar %>% add_row(tDay) write_rds(x = tTotal, file = fnTotal, compress = 'gz') return(tTotal) } manualStitch_2022_09_30 = function() { a = mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion select(order(colnames(.))) %>% relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr, Materiaal, specimen_type, specimen_group, uuid, patientid, old_patientid, birth_date, old_birth_date, department, type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1) # a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>% select(order(colnames(.))) %>% relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr, Materiaal, specimen_type, specimen_group, patientid, birth_date, department, type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1) b = b %>% mutate(across(AMC:VOR, as.rsi)) tTotal = a %>% add_row(b) %>% mutate(MIC = as.mic(MIC), mo = as.mo(mo)) # Warning: Problem with `mutate()` column `mo`. # ℹ `mo = as.mo(mo)`. # ℹ # in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair", # "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne". # Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo). # You can also use your own reference data with set_mo_source() or directly, e.g.: # as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI")) # mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI")) # Warning message: # Problem with `mutate()` column `MIC`. # ℹ `MIC = as.mic(MIC)`. # ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431" write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz') } lastLargePatchesStitching = function() manualStitch_2022_09_30() # > mo_uncertainties() # Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name. # See `?mo_matching_score`. # # "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316) # Also matched: Brevibacterium luteolum (0.304) # # "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284) # read 1 if (F) { tPatches = readAllPatches() tStitched = stitchPatches(tPatches) tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds', compress = 'gz') x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds') x = x %>% relocate(all_of(cns), .after = 'date') %>% relocate(specimen_group, .after = specimen_type) %>% relocate(MIC, .after = mo) y = x %>% mutate(across(AMB:DAP, as.rsi)) z = y %>% mutate(mo = as.mo(mo)) mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA') write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds', compress = 'gz') write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds', compress = 'gz') }