144 lines
5.7 KiB
R
144 lines
5.7 KiB
R
require(tidyverse)
|
||
|
||
source('globalData.R')
|
||
reqColumns = abAbbrevs
|
||
|
||
patchFirstColumns = c('sampleid', 'date', 'is_clinical', 'is_icu', 'is_outward',
|
||
'IsolNr', 'Materiaal', 'specimen_type', 'specimen_group',
|
||
'patientid', 'birth_date', 'department', 'type_dept',
|
||
'specialism', 'age', 'gender', 'MIC', 'RAP', 'mo')
|
||
|
||
readPatch = function(filename, reqColumns) {
|
||
patch = read_rds(filename)
|
||
for (c in 1:length(reqColumns)) {
|
||
if (!reqColumns[c] %in% colnames(patch)) {
|
||
patch = patch %>%
|
||
add_column(!!reqColumns[c] := as.rsi(NA)) %>%
|
||
select(order(colnames(.))) %>%
|
||
relocate(all_of(patchFirstColumns), .before = 1)
|
||
}
|
||
}
|
||
return(patch)
|
||
}
|
||
|
||
readAllPatches = function() {
|
||
tPatches = tibble(name = list.files(path = 'data', pattern = 'bk_.*\\.rds'))
|
||
|
||
tPatches = tPatches %>%
|
||
separate(name, into = c('prefix', 'from', 'tail'), sep = '_') %>%
|
||
separate(tail, into = c('to', 'ext'), sep = '\\.') %>%
|
||
filter(!is.na(to), !str_detect(to, '[:alpha:]')) %>%
|
||
mutate(name = paste0(prefix, '_', from, '_', to, '.', ext)) %>%
|
||
arrange(from, to)
|
||
|
||
return(tPatches)
|
||
}
|
||
|
||
stitchPatches = function(tPatches, dataPath = 'data', reqColumns) {
|
||
for (i in 1:nrow(tPatches)) {
|
||
# patch = read_rds(file.path(dataPath, tPatches[i,]$name))
|
||
# for (c in 1:length(reqColumns)) {
|
||
# if (!reqColumns[c] %in% colnames(patch)) { patch = patch %>% add_column(!!reqColumns[c] := is.rsi(NA)) }
|
||
# }
|
||
cat(paste0(' patch ', i, ': ', tPatches$from[i], ' ', tPatches$to[i], '\n'))
|
||
patch = readPatch(filename = file.path(dataPath, tPatches$name[i]),
|
||
reqColumns = reqColumns)
|
||
# print(colnames(patch))
|
||
if (i == 1) {
|
||
tStitched = patch
|
||
} else {
|
||
tStitched = tStitched %>% add_row(patch)
|
||
}
|
||
}
|
||
tStitched = tStitched %>% arrange(date, sampleid)
|
||
|
||
return(tStitched)
|
||
}
|
||
|
||
appendDailyData = function(fnTotal, fnTotalSofar, fnDay, reqColumns) {
|
||
assert_that(file.exists(fnTotalSofar))#,
|
||
# msg = paste0("File ", fnTotalSofar, " doesn't exist"))
|
||
assert_that(file.exists(fnDay))
|
||
|
||
tTotalSofar = readPatch(fnTotalSofar, reqColumns = reqColumns)
|
||
tDay = readPatch(fnDay, reqColumns = reqColumns)
|
||
|
||
tTotal = tTotalSofar %>% add_row(tDay)
|
||
|
||
write_rds(x = tTotal, file = fnTotal, compress = 'gz')
|
||
|
||
return(tTotal)
|
||
}
|
||
|
||
manualStitch_2022_09_30 = function() {
|
||
a =
|
||
mutate(RAP = as.character('#')) %>% # correct for accidental inclusion in as.rsi conversion
|
||
select(order(colnames(.))) %>%
|
||
relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
|
||
Materiaal, specimen_type, specimen_group, uuid, patientid,
|
||
old_patientid, birth_date, old_birth_date, department,
|
||
type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
|
||
# a = a %>% mutate(across(AMC:VOR, as.rsi)) was already done before
|
||
|
||
b = readPatch('bk_2022-06-17_2022-09-29.rds', reqColumns) %>%
|
||
select(order(colnames(.))) %>%
|
||
relocate(c(sampleid, date, is_clinical, is_icu, is_outward, IsolNr,
|
||
Materiaal, specimen_type, specimen_group, patientid,
|
||
birth_date, department,
|
||
type_dept, specialism, age, gender, MIC, RAP, mo), .before = 1)
|
||
b = b %>% mutate(across(AMC:VOR, as.rsi))
|
||
|
||
tTotal = a %>%
|
||
add_row(b) %>%
|
||
mutate(MIC = as.mic(MIC),
|
||
mo = as.mo(mo))
|
||
# Warning: Problem with `mutate()` column `mo`.
|
||
# ℹ `mo = as.mo(mo)`.
|
||
# ℹ
|
||
# in `as.mo()`: nine unique values (covering 0.0%) could not be coerced and were considered 'unknown': "}hemstre", "}Primair",
|
||
# "acgsch", "anrhyd", "bclmeg", "BK_Isolaat_AEF", "corjimi", "strsalg" and "vvStrpne".
|
||
# Use `mo_failures()` to review them. Edit the `allow_uncertain` argument if needed (see ?as.mo).
|
||
# You can also use your own reference data with set_mo_source() or directly, e.g.:
|
||
# as.mo("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
|
||
# mo_name("mycode", reference_df = data.frame(own = "mycode", mo = "B_ESCHR_COLI"))
|
||
# Warning message:
|
||
# Problem with `mutate()` column `MIC`.
|
||
# ℹ `MIC = as.mic(MIC)`.
|
||
# ℹ in `as.mic()`: 3 results truncated (0%) that were invalid MICs: "?", "13" and "17090355431"
|
||
|
||
write_rds(x = tTotal, file = 'data/total_2022-09-29.rds', compress = 'gz')
|
||
}
|
||
|
||
lastLargePatchesStitching = function() manualStitch_2022_09_30()
|
||
|
||
# > mo_uncertainties()
|
||
# Matching scores (in blue) are based on pathogenicity in humans and the resemblance between the input and the full taxonomic name.
|
||
# See `?mo_matching_score`.
|
||
#
|
||
# "brelut" -> Brevundimonas lutea (B_BRVND_LUTE, 0.316)
|
||
# Also matched: Brevibacterium luteolum (0.304)
|
||
#
|
||
# "vvEncspp" -> Brevibacterium ravenspurgense (B_BCTRM_RVNS, 0.284)
|
||
|
||
# read 1
|
||
if (F) {
|
||
tPatches = readAllPatches()
|
||
tStitched = stitchPatches(tPatches)
|
||
tStitched %>% write_rds(file = 'data/bk_archive_2014-08_2022-06_v2022-09-15.rds',
|
||
compress = 'gz')
|
||
|
||
x = read_rds('data/bk_archive_2014-08_2022-06_v2022-09-15.rds')
|
||
x = x %>% relocate(all_of(cns), .after = 'date') %>%
|
||
relocate(specimen_group, .after = specimen_type) %>%
|
||
relocate(MIC, .after = mo)
|
||
y = x %>% mutate(across(AMB:DAP, as.rsi))
|
||
z = y %>% mutate(mo = as.mo(mo))
|
||
mo_failures = c('}AeNeSt', '}hemstre', '}nietfe', '}Primair', 'acgsan', 'acgsch', 'bclmeg', 'bctfae', 'BK_Isolaat_AEF', 'BK_Isolaat_ANF', 'BK_Isolaat_PEF', 'chcgle', 'corjimi', 'enbbug', 'encfcmva', 'Meng', 'psspsy', 'staaurar', 'stainp', 'strsalg', 'surwad', 'vvAeNeSt', 'vvAePoSt', 'vvMRSA')
|
||
|
||
write_rds(z, 'data/bk_archive_2014-08_2022-06_v2022-09-15_3.rds',
|
||
compress = 'gz')
|
||
write_rds(z %>% filter(date >= '2021-07-01'), 'data/bk_archive_2021-07_2022-06_v2022-09-23_3.rds',
|
||
compress = 'gz')
|
||
}
|
||
|