AMR/data-raw/snomed.R

115 lines
4.7 KiB
R
Raw Normal View History

2020-01-26 20:20:00 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Data Analysis for R #
2020-01-26 20:20:00 +01:00
# #
# SOURCE #
# https://github.com/msberends/AMR #
2020-01-26 20:20:00 +01:00
# #
# LICENCE #
2020-12-27 00:30:28 +01:00
# (c) 2018-2021 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
2020-01-26 20:20:00 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2020-01-26 20:20:00 +01:00
# ==================================================================== #
library(AMR)
2020-01-27 19:14:23 +01:00
library(tidyverse)
2020-05-27 16:37:49 +02:00
# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ (Ctrl/Cmd + A in table)
2020-01-27 19:14:23 +01:00
# read the table from clipboard
2020-05-27 16:37:49 +02:00
snomed <- clipr::read_clip_tbl(skip = 2)
2020-01-27 19:14:23 +01:00
snomed <- snomed %>%
2020-05-27 16:37:49 +02:00
dplyr::filter(gsub("(^genus |^familie |^stam |ss.? |subsp.? |subspecies )", "",
Omschrijving.,
ignore.case = TRUE) %in% c(microorganisms$fullname,
microorganisms.old$fullname)) %>%
dplyr::transmute(fullname = mo_name(Omschrijving.),
2020-01-27 19:14:23 +01:00
snomed = as.integer(Id)) %>%
2020-05-27 16:37:49 +02:00
dplyr::filter(!fullname %like% "unknown")
2020-01-27 19:14:23 +01:00
snomed_trans <- snomed %>%
group_by(fullname) %>%
mutate(snomed_list = list(snomed)) %>%
ungroup() %>%
select(fullname, snomed = snomed_list) %>%
distinct(fullname, .keep_all = TRUE)
microorganisms <- AMR::microorganisms %>%
left_join(snomed_trans)
# remove the NULLs, set to NA
microorganisms$snomed <- lapply(microorganisms$snomed, function(x) if (length(x) == 0) NA else x)
microorganisms <- dataset_UTF8_to_ASCII(microorganisms)
usethis::use_data(microorganisms, overwrite = TRUE)
rm(microorganisms)
# OLD ---------------------------------------------------------------------
2020-01-26 20:20:00 +01:00
2020-05-27 16:37:49 +02:00
# baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
# edition <- 'MAIN'
# version <- '2019-07-31'
#
# microorganisms.snomed <- data.frame(conceptid = character(0),
# mo = character(0),
# stringsAsFactors = FALSE)
# microorganisms$snomed <- ""
#
# # for (i in 1:50) {
# for (i in 1:1000) {
#
# if (i %% 10 == 0) {
# cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
# }
#
# mo_data <- microorganisms %>%
# filter(mo == microorganisms$mo[i]) %>%
# as.list()
#
# if (!mo_data$rank %in% c("genus", "species")) {
# next
# }
#
# searchTerm <- paste0(
# ifelse(mo_data$rank == "genus", "Genus ", ""),
# mo_data$fullname,
# " (organism)")
#
# url <- paste0(baseUrl, '/browser/',
# edition, '/',
# version,
# '/descriptions?term=', curl::curl_escape(searchTerm),
# '&mode=fullText&activeFilter=true&limit=', 250)
# results <- url %>%
# httr::GET() %>%
# httr::content(type = "text", encoding = "UTF-8") %>%
# jsonlite::fromJSON(flatten = TRUE) %>%
# .$items
# if (NROW(results) == 0) {
# next
# } else {
# message("Adding ", crayon::italic(mo_data$fullname))
# }
#
# tryCatch(
# microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
# error = function(e) invisible()
# )
#
# if (nrow(results) > 1) {
# microorganisms.snomed <- microorganisms.snomed %>%
# bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
# mo = as.character(mo_data$mo)))
# }
# }