AMR/reproduction_of_antibiotics.R


# WORK IN PROGRESS --------------------------------------------------------

# vector with official names, return vector with CIDs
get_CID <- function(ab) {
  CID <- rep(NA_integer_, length(ab))
  p <- progress_estimated(n = length(ab), min_time = 0)
  for (i in 1:length(ab)) {
    p$tick()$print()

    CID[i] <- tryCatch(
      data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
                               ab[i],
                               "/cids/TXT?name_type=complete"),
                        showProgress = FALSE)[[1]][1],
      error = function(e) NA_integer_)
    Sys.sleep(0.2)
  }
  CID
}

# returns vector with synonyms (brand names) for a single CID
get_synonyms <- function(CID, clean = TRUE) {
  p <- progress_estimated(n = length(CID), min_time = 0)
  p$tick()$print()

  synonyms_txt <- tryCatch(
    data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
                                           CID,
                                           "/synonyms/TXT"),
                                    sep = "\n",
                                    showProgress = FALSE)[[1]],
    error = function(e) NA_character_)

  if (clean == TRUE) {
    # remove txt between brackets
    synonyms_txt <- trimws(gsub("[(].*[)]", "", gsub("[[].*[]]", "", synonyms_txt)))
    # only length 6 to 20 and no txt with reading marks or numbers
    synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)
                                 & !synonyms_txt %like% "[-&{},_0-9]"]
    synonyms_txt <- unlist(strsplit(synonyms_txt,  ";", fixed = TRUE))
  }
  synonyms_txt <- synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]
  sort(synonyms_txt)
}

CIDs <- get_CID(antibiotics$official)
synonyms <- character(length(CIDs))
p <- progress_estimated(n = length(synonyms), min_time = 0)
for (i in 365:length(synonyms)) {
  #p$tick()$print()
  if (!is.na(CIDs[i])) {
    synonyms[i] <- paste(get_synonyms(CIDs[i]), collapse = "|")
  }
}

antibiotics$cid <- CIDs
antibiotics$trade_name <- synonyms
v0.6.1 2019-03-28 21:33:28 +01:00
			`# WORK IN PROGRESS --------------------------------------------------------`

			`# vector with official names, return vector with CIDs`
			`get_CID <- function(ab) {`
			`CID <- rep(NA_integer_, length(ab))`
			`p <- progress_estimated(n = length(ab), min_time = 0)`
			`for (i in 1:length(ab)) {`
			`p$tick()$print()`

			`CID[i] <- tryCatch(`
			`data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",`
			`ab[i],`
			`"/cids/TXT?name_type=complete"),`
			`showProgress = FALSE)[[1]][1],`
			`error = function(e) NA_integer_)`
			`Sys.sleep(0.2)`
			`}`
			`CID`
			`}`

			`# returns vector with synonyms (brand names) for a single CID`
			`get_synonyms <- function(CID, clean = TRUE) {`
			`p <- progress_estimated(n = length(CID), min_time = 0)`
			`p$tick()$print()`

			`synonyms_txt <- tryCatch(`
			`data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",`
			`CID,`
			`"/synonyms/TXT"),`
			`sep = "\n",`
			`showProgress = FALSE)[[1]],`
			`error = function(e) NA_character_)`

			`if (clean == TRUE) {`
			`# remove txt between brackets`
			`synonyms_txt <- trimws(gsub("[(].[)]", "", gsub("[[].[]]", "", synonyms_txt)))`
			`# only length 6 to 20 and no txt with reading marks or numbers`
			`synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)`
			`& !synonyms_txt %like% "[-&{},_0-9]"]`
			`synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))`
			`}`
			`synonyms_txt <- synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]`
			`sort(synonyms_txt)`
			`}`

			`CIDs <- get_CID(antibiotics$official)`
			`synonyms <- character(length(CIDs))`
			`p <- progress_estimated(n = length(synonyms), min_time = 0)`
			`for (i in 365:length(synonyms)) {`
			`#p$tick()$print()`
			`if (!is.na(CIDs[i])) {`
			`synonyms[i] <- paste(get_synonyms(CIDs[i]), collapse = "\|")`
			`}`
			`}`

			`antibiotics$cid <- CIDs`
			`antibiotics$trade_name <- synonyms`