diff --git a/DESCRIPTION b/DESCRIPTION
index 3f44bf9a..f32292ca 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
Package: AMR
-Version: 2.1.1.9089
+Version: 2.1.1.9090
Date: 2024-10-04
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)
diff --git a/NEWS.md b/NEWS.md
index b06cee4e..a0c9916e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 2.1.1.9089
+# AMR 2.1.1.9090
*(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 09569450..7f846153 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -59,7 +59,6 @@ home:
sidebar:
structure: [gpthelp, toc, links, authors]
components:
-
gpthelp: ''
navbar:
diff --git a/data-raw/reproduction_of_antibiotics.R b/data-raw/reproduction_of_antibiotics.R
index 3022e7b4..99b3705c 100644
--- a/data-raw/reproduction_of_antibiotics.R
+++ b/data-raw/reproduction_of_antibiotics.R
@@ -262,43 +262,66 @@ get_synonyms <- function(CID, clean = TRUE) {
if (is.na(CID[i])) {
next
}
-
- synonyms_txt <- tryCatch(
+
+ # we will now get the closest compounds with a 96% threshold
+ similar_cids <- tryCatch(
data.table::fread(
paste0(
- "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
+ "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastsimilarity_2d/cid/",
CID[i],
- "/synonyms/TXT"
+ "/cids/TXT?Threshold=96&MaxRecords=5"
),
sep = "\n",
showProgress = FALSE
)[[1]],
error = function(e) NA_character_
)
-
- Sys.sleep(0.1)
-
- if (clean == TRUE) {
- # remove text between brackets
- synonyms_txt <- trimws(gsub(
- "[(].*[)]", "",
- gsub(
- "[[].*[]]", "",
+ all_cids <- unique(c(CID[i], similar_cids))
+ # for each one, we are getting the synonyms
+ current_syns <- character(0)
+ for (j in seq_len(length(all_cids))) {
+ synonyms_txt <- tryCatch(
+ data.table::fread(
+ paste0(
+ "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
+ all_cids[j],
+ "/synonyms/TXT"
+ ),
+ sep = "\n",
+ showProgress = FALSE
+ )[[1]],
+ error = function(e) NA_character_
+ )
+
+ Sys.sleep(0.05)
+
+ if (clean == TRUE) {
+ # remove text between brackets
+ synonyms_txt <- trimws(gsub(
+ "[(].*[)]", "",
gsub(
- "[(].*[]]", "",
- gsub("[[].*[)]", "", synonyms_txt)
+ "[[].*[]]", "",
+ gsub(
+ "[(].*[]]", "",
+ gsub("[[].*[)]", "", synonyms_txt)
+ )
)
- )
- ))
- synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
- # only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
- synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) &
- !grepl("[-&{},_0-9/:]", synonyms_txt) &
- grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
- synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
+ ))
+ synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
+ synonyms_txt <- gsub(" ?(mono)?sodium ?", "", ignore.case = TRUE, synonyms_txt)
+ synonyms_txt <- gsub(" ?injection ?", "", ignore.case = TRUE, synonyms_txt)
+ # only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
+ synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(5:20) &
+ !grepl("[-&{},_0-9/:]", synonyms_txt) &
+ grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
+ synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
+ }
+
+ current_syns <- c(current_syns, synonyms_txt)
}
- synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]))
- synonyms[i] <- list(sort(synonyms_txt))
+
+ current_syns <- unique(trimws(current_syns[tolower(current_syns) %in% unique(tolower(current_syns))]))
+ synonyms[i] <- list(sort(current_syns))
}
names(synonyms) <- CID
synonyms
@@ -319,7 +342,7 @@ for (i in seq_len(length(synonyms))) {
antibiotics$synonyms <- synonyms
-stop("remember to remove co-trimoxazole as synonyms from SXT (Sulfamethoxazole), so it only exists in SXT!")
+stop("remember to remove co-trimoxazole as synonyms from SMX (Sulfamethoxazole), so it only exists in SXT!")
sulfa <- antibiotics[which(antibiotics$ab == "SMX"), "synonyms", drop = TRUE][[1]]
cotrim <- antibiotics[which(antibiotics$ab == "SXT"), "synonyms", drop = TRUE][[1]]
sulfa <- sulfa[!sulfa %in% cotrim]