# WORK IN PROGRESS -------------------------------------------------------- # vector with official names, return vector with CIDs get_CID <- function(ab) { CID <- rep(NA_integer_, length(ab)) p <- progress_estimated(n = length(ab), min_time = 0) for (i in 1:length(ab)) { p$tick()$print() CID[i] <- tryCatch( data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/", ab[i], "/cids/TXT?name_type=complete"), showProgress = FALSE)[[1]][1], error = function(e) NA_integer_) Sys.sleep(0.2) } CID } # returns vector with synonyms (brand names) for a single CID get_synonyms <- function(CID, clean = TRUE) { p <- progress_estimated(n = length(CID), min_time = 0) p$tick()$print() synonyms_txt <- tryCatch( data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/", CID, "/synonyms/TXT"), sep = "\n", showProgress = FALSE)[[1]], error = function(e) NA_character_) if (clean == TRUE) { # remove txt between brackets synonyms_txt <- trimws(gsub("[(].*[)]", "", gsub("[[].*[]]", "", synonyms_txt))) # only length 6 to 20 and no txt with reading marks or numbers synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) & !synonyms_txt %like% "[-&{},_0-9]"] synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE)) } synonyms_txt <- synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))] sort(synonyms_txt) } CIDs <- get_CID(antibiotics$official) synonyms <- character(length(CIDs)) p <- progress_estimated(n = length(synonyms), min_time = 0) for (i in 365:length(synonyms)) { #p$tick()$print() if (!is.na(CIDs[i])) { synonyms[i] <- paste(get_synonyms(CIDs[i]), collapse = "|") } } antibiotics$cid <- CIDs antibiotics$trade_name <- synonyms