1
0
mirror of https://github.com/msberends/AMR.git synced 2025-08-24 16:22:10 +02:00

(v1.5.0.9041) SNOMED update

This commit is contained in:
2021-03-11 21:42:30 +01:00
parent 8d6ceb6a15
commit 4e0a9533ad
65 changed files with 86943 additions and 67626 deletions

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -219,11 +219,11 @@ changed_md5 <- function(object) {
compared
}, error = function(e) TRUE)
}
usethis::ui_done(paste0("Saving raw data to {usethis::ui_value('/data-raw/')}"))
# give official names to ABs and MOs
rsi <- dplyr::mutate(rsi_translation, ab = ab_name(ab), mo = mo_name(mo))
if (changed_md5(rsi)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('rsi_translation')} to {usethis::ui_value('/data-raw/')}"))
write_md5(rsi)
try(saveRDS(rsi, "data-raw/rsi_translation.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(rsi, "data-raw/rsi_translation.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
@@ -235,16 +235,18 @@ if (changed_md5(rsi)) {
mo <- dplyr::mutate_if(microorganisms, ~!is.numeric(.), as.character)
if (changed_md5(mo)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms')} to {usethis::ui_value('/data-raw/')}"))
write_md5(mo)
try(saveRDS(mo, "data-raw/microorganisms.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(mo, "data-raw/microorganisms.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
try(haven::write_sas(dplyr::select(mo, -snomed), "data-raw/microorganisms.sas"), silent = TRUE)
try(haven::write_sav(dplyr::select(mo, -snomed), "data-raw/microorganisms.sav"), silent = TRUE)
try(haven::write_dta(dplyr::select(mo, -snomed), "data-raw/microorganisms.dta"), silent = TRUE)
try(openxlsx::write.xlsx(mo, "data-raw/microorganisms.xlsx"), silent = TRUE)
}
if (changed_md5(microorganisms.old)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('microorganisms.old')} to {usethis::ui_value('/data-raw/')}"))
write_md5(microorganisms.old)
try(saveRDS(microorganisms.old, "data-raw/microorganisms.old.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(microorganisms.old, "data-raw/microorganisms.old.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
@@ -256,6 +258,7 @@ if (changed_md5(microorganisms.old)) {
ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
if (changed_md5(ab)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('/data-raw/')}"))
write_md5(ab)
try(saveRDS(ab, "data-raw/antibiotics.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(ab, "data-raw/antibiotics.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
@@ -267,6 +270,7 @@ if (changed_md5(ab)) {
av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
if (changed_md5(av)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('/data-raw/')}"))
write_md5(av)
try(saveRDS(av, "data-raw/antivirals.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(av, "data-raw/antivirals.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
@@ -277,6 +281,7 @@ if (changed_md5(av)) {
}
if (changed_md5(intrinsic_resistant)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('intrinsic_resistant')} to {usethis::ui_value('/data-raw/')}"))
write_md5(intrinsic_resistant)
try(saveRDS(intrinsic_resistant, "data-raw/intrinsic_resistant.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(intrinsic_resistant, "data-raw/intrinsic_resistant.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)
@@ -287,6 +292,7 @@ if (changed_md5(intrinsic_resistant)) {
}
if (changed_md5(dosage)) {
usethis::ui_info(paste0("Saving {usethis::ui_value('dosage')} to {usethis::ui_value('/data-raw/')}"))
write_md5(dosage)
try(saveRDS(dosage, "data-raw/dosage.rds", version = 2, compress = "xz"), silent = TRUE)
try(write.table(dosage, "data-raw/dosage.txt", sep = "\t", na = "", row.names = FALSE), silent = TRUE)

View File

@@ -1 +1 @@
fa68ab044001078f290218a7de6cc5c4
77f6cca42687a0e3b1b1045a2d70b226

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -108,7 +108,7 @@
"CPT" "J01DI02" 56841980 "Ceftaroline" "Cephalosporins (5th gen.)" "c(\"\", \"cfro\")" "c(\"teflaro\", \"zinforo\")" 1.2 "character(0)"
"CPA" "Ceftaroline/avibactam" "Cephalosporins (5th gen.)" "" "" ""
"CAZ" "J01DD02" 5481173 "Ceftazidime" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"caz\", \"cefta\", \"cfta\", \"cftz\", \"taz\", \"tz\", \"xtz\")" "c(\"ceftazidim\", \"ceftazidima\", \"ceftazidime\", \"ceftazidimum\", \"ceptaz\", \"fortaz\", \"fortum\", \"pentacef\", \"tazicef\", \"tazidime\")" 4 "g" "c(\"21151-6\", \"3449-6\", \"80960-8\")"
"CZA" "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "c(\"\", \"cfav\")" "" ""
"CZA" "J01DD52" 90643431 "Ceftazidime/avibactam" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"\", \"cfav\")" "c(\"avycaz\", \"zavicefta\")" 6 "g" ""
"CCV" "J01DD52" 9575352 "Ceftazidime/clavulanic acid" "Cephalosporins (3rd gen.)" "Other beta-lactam antibacterials" "Third-generation cephalosporins" "c(\"czcl\", \"xtzl\")" "" 6 ""
"CEM" 6537431 "Cefteram" "Cephalosporins (3rd gen.)" "" "c(\"cefteram\", \"cefterame\", \"cefteramum\", \"ceftetrame\")" "character(0)"
"CPL" 5362114 "Cefteram pivoxil" "Cephalosporins (3rd gen.)" "" "c(\"cefteram pivoxil\", \"tomiron\")" "character(0)"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1 +1 @@
8c6d0e8e487d19d9a429abd64fce9290
82bd6236cf159569f6f5c99f48f92d86

View File

@@ -166,7 +166,7 @@ abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixe
# vector with official names, returns vector with CIDs
get_CID <- function(ab) {
CID <- rep(NA_integer_, length(ab))
p <- progress_estimated(n = length(ab), min_time = 0)
p <- progress_ticker(n = length(ab), min_time = 0)
for (i in 1:length(ab)) {
p$tick()$print()
@@ -208,10 +208,10 @@ abx2[is.na(CIDs),] %>% View()
# returns list with synonyms (brand names), with CIDs as names
get_synonyms <- function(CID, clean = TRUE) {
synonyms <- rep(NA_character_, length(CID))
p <- progress_estimated(n = length(CID), min_time = 0)
#p <- progress_ticker(n = length(CID), min_time = 0)
for (i in 1:length(CID)) {
p$tick()$print()
#p$tick()$print()
synonyms_txt <- ""
@@ -564,6 +564,14 @@ antibiotics[which(antibiotics$ab == "CPT"), "atc"] <- "J01DI02"
antibiotics[which(antibiotics$ab == "FAR"), "atc"] <- "J01DI03"
# ceftobiprole
antibiotics[which(antibiotics$ab == "BPR"), "atc"] <- "J01DI01"
# ceftazidime / avibactam
antibiotics[which(antibiotics$ab == "CZA"), "atc"] <- "J01DD52"
antibiotics[which(antibiotics$ab == "CZA"), "cid"] <- 90643431
antibiotics[which(antibiotics$ab == "CZA"), "atc_group1"] <- "Other beta-lactam antibacterials"
antibiotics[which(antibiotics$ab == "CZA"), "atc_group2"] <- "Third-generation cephalosporins"
antibiotics[which(antibiotics$ab == "CZA"), "iv_ddd"] <- 6
antibiotics[which(antibiotics$ab == "CZA"), "iv_units"] <- "g"
antibiotics[which(antibiotics$ab == "CZA"), "synonyms"] <- list(c("Avycaz", "Zavicefta"))
# typo
antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin"

View File

@@ -26,89 +26,44 @@
library(AMR)
library(tidyverse)
# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ (Ctrl/Cmd + A in table)
# read the table from clipboard
snomed <- clipr::read_clip_tbl(skip = 2)
snomed <- snomed %>%
dplyr::filter(gsub("(^genus |^familie |^stam |ss.? |subsp.? |subspecies )", "",
Omschrijving.,
ignore.case = TRUE) %in% c(microorganisms$fullname,
microorganisms.old$fullname)) %>%
dplyr::transmute(fullname = mo_name(Omschrijving.),
snomed = as.integer(Id)) %>%
dplyr::filter(!fullname %like% "unknown")
snomed_trans <- snomed %>%
group_by(fullname) %>%
mutate(snomed_list = list(snomed)) %>%
ungroup() %>%
select(fullname, snomed = snomed_list) %>%
distinct(fullname, .keep_all = TRUE)
# we will use Public Health Information Network Vocabulary Access and Distribution System (PHIN VADS)
# as a source, which copies directly from the latest US SNOMED CT version
# - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
# - check that current online version is higher than SNOMED_VERSION$current_version
# - if so, click on 'Download Value Set', choose 'TXT'
snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
select(1:2) %>%
set_names(c("snomed", "mo"))
microorganisms <- AMR::microorganisms %>%
left_join(snomed_trans)
# remove the NULLs, set to NA
microorganisms$snomed <- lapply(microorganisms$snomed, function(x) if (length(x) == 0) NA else x)
# save all valid genera, species and subspecies
vctr <- unique(unlist(strsplit(c(microorganisms$fullname, microorganisms.old$fullname), " ")))
vctr <- tolower(vctr[vctr %like% "^[a-z]+$"])
microorganisms <- dataset_UTF8_to_ASCII(microorganisms)
# remove all parts of the name that are no valid values in genera, species or subspecies
snomed <- snomed %>%
mutate(fullname = vapply(FUN.VALUE = character(1),
# split on space and/or comma
strsplit(tolower(mo), "[ ,]"),
function(x) trimws(paste0(x[x %in% vctr], collapse = " "))),
# remove " group"
fullname = gsub(" group", "", fullname, fixed = TRUE))
usethis::use_data(microorganisms, overwrite = TRUE)
rm(microorganisms)
snomed_keep <- snomed %>%
filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
group_by(fullname_lower = fullname) %>%
summarise(snomed = list(snomed))
# OLD ---------------------------------------------------------------------
# save to microorganisms data set
microorganisms <- microorganisms %>%
# remove old snomed
select(-snomed) %>%
# create dummy var for joining
mutate(fullname_lower = tolower(fullname)) %>%
# join new snomed
left_join(snomed_keep) %>%
# remove dummy var
select(-fullname_lower) %>%
AMR:::dataset_UTF8_to_ASCII()
usethis::use_data(microorganisms, overwrite = TRUE, compress = "xz")
# baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
# edition <- 'MAIN'
# version <- '2019-07-31'
#
# microorganisms.snomed <- data.frame(conceptid = character(0),
# mo = character(0),
# stringsAsFactors = FALSE)
# microorganisms$snomed <- ""
#
# # for (i in 1:50) {
# for (i in 1:1000) {
#
# if (i %% 10 == 0) {
# cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
# }
#
# mo_data <- microorganisms %>%
# filter(mo == microorganisms$mo[i]) %>%
# as.list()
#
# if (!mo_data$rank %in% c("genus", "species")) {
# next
# }
#
# searchTerm <- paste0(
# ifelse(mo_data$rank == "genus", "Genus ", ""),
# mo_data$fullname,
# " (organism)")
#
# url <- paste0(baseUrl, '/browser/',
# edition, '/',
# version,
# '/descriptions?term=', curl::curl_escape(searchTerm),
# '&mode=fullText&activeFilter=true&limit=', 250)
# results <- url %>%
# httr::GET() %>%
# httr::content(type = "text", encoding = "UTF-8") %>%
# jsonlite::fromJSON(flatten = TRUE) %>%
# .$items
# if (NROW(results) == 0) {
# next
# } else {
# message("Adding ", crayon::italic(mo_data$fullname))
# }
#
# tryCatch(
# microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
# error = function(e) invisible()
# )
#
# if (nrow(results) > 1) {
# microorganisms.snomed <- microorganisms.snomed %>%
# bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
# mo = as.character(mo_data$mo)))
# }
# }
# don't forget to update the version number in SNOMED_VERSION in ./R/globals.R!