(v1.7.0.9000) package size

2025-11-25 06:11:06 +01:00 · 2021-05-30 22:14:38 +02:00
parent f1d9b489c5
commit f406319503
43 changed files with 305 additions and 283 deletions
--- a/data-raw/AMR_latest.tar.gz
+++ b/data-raw/AMR_latest.tar.gz
--- a/data-raw/_internals.R
+++ b/data-raw/_internals.R
@@ -31,109 +31,6 @@ devtools::load_all(quiet = TRUE)

 old_globalenv <- ls(envir = globalenv())

-# Helper functions --------------------------------------------------------
-
-create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
-  # Determination of which staphylococcal species are CoNS/CoPS according to:
-  # - Becker et al. 2014, PMID 25278577
-  # - Becker et al. 2019, PMID 30872103
-  # - Becker et al. 2020, PMID 32056452
-  # this function returns class <mo>
-  MO_staph <- AMR::microorganisms
-  MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
-  if (type == "CoNS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
-                                           "auricularis", "borealis", "caeli", "capitis", "caprae", 
-                                           "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
-                                           "croceilyticus",
-                                           "debuckii", "devriesei", "edaphicus", "epidermidis",
-                                           "equorum", "felis", "fleurettii", "gallinarum",
-                                           "haemolyticus", "hominis", "jettensis", "kloosii",
-                                           "lentus", "lugdunensis", "massiliensis", "microti",
-                                           "muscae", "nepalensis", "pasteuri", "petrasii",
-                                           "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
-                                           "pulvereri", "rostri", "saccharolyticus", "saprophyticus",
-                                           "sciuri", "simulans", "stepanovicii", "succinus",
-                                           "ureilyticus",
-                                           "vitulinus", "vitulus", "warneri", "xylosus")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
-             "mo", drop = TRUE]
-  } else if (type == "CoPS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
-                                           "agnetis", "argenteus",
-                                           "cornubiensis",
-                                           "delphini", "lutrae",
-                                           "hyicus", "intermedius",
-                                           "pseudintermedius", "pseudointermedius",
-                                           "schweitzeri", "simiae")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
-             "mo", drop = TRUE]
-  }
-}
-
-create_AB_lookup <- function() {
-  AB_lookup <- AMR::antibiotics
-  AB_lookup$generalised_name <- generalise_antibiotic_name(AB_lookup$name)
-  AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
-  AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
-  AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
-  AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[, 
-                                                                               c("ab", "atc", "cid", "name",
-                                                                                 colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
-                                                                               drop = FALSE]),
-                                                                   stringsAsFactors = FALSE)),
-                                             function(x) {
-                                               x <- generalise_antibiotic_name(unname(unlist(x)))
-                                               x[x != ""]
-                                             }))
-  AB_lookup
-}
-
-create_MO_lookup <- function() {
-  MO_lookup <- AMR::microorganisms
-  
-  MO_lookup$kingdom_index <- NA_real_
-  MO_lookup[which(MO_lookup$kingdom == "Bacteria" | MO_lookup$mo == "UNKNOWN"), "kingdom_index"] <- 1
-  MO_lookup[which(MO_lookup$kingdom == "Fungi"), "kingdom_index"] <- 2
-  MO_lookup[which(MO_lookup$kingdom == "Protozoa"), "kingdom_index"] <- 3
-  MO_lookup[which(MO_lookup$kingdom == "Archaea"), "kingdom_index"] <- 4
-  # all the rest
-  MO_lookup[which(is.na(MO_lookup$kingdom_index)), "kingdom_index"] <- 5
-  
-  # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
-  MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus, 
-                                                   MO_lookup$species,
-                                                   MO_lookup$subspecies)))
-  ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname)
-  MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname"])
-  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
-  
-  # add a column with only "e coli" like combinations
-  MO_lookup$g_species <- gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO_lookup$fullname_lower, perl = TRUE)
-  
-  # so arrange data on prevalence first, then kingdom, then full name
-  MO_lookup[order(MO_lookup$prevalence, MO_lookup$kingdom_index, MO_lookup$fullname_lower), ]
-}
-
-create_MO.old_lookup <- function() {
-  MO.old_lookup <- AMR::microorganisms.old
-  MO.old_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", tolower(trimws(MO.old_lookup$fullname))))
-  
-  # add a column with only "e coli"-like combinations
-  MO.old_lookup$g_species <- trimws(gsub("^([a-z])[a-z]+ ([a-z]+) ?.*", "\\1 \\2", MO.old_lookup$fullname_lower))
-  
-  # so arrange data on prevalence first, then full name
-  MO.old_lookup[order(MO.old_lookup$prevalence, MO.old_lookup$fullname_lower), ]
-}
-
-create_intr_resistance <- function() {
-  # for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
-  paste(AMR::microorganisms[match(AMR::intrinsic_resistant$microorganism, AMR::microorganisms$fullname), "mo", drop = TRUE],
-        AMR::antibiotics[match(AMR::intrinsic_resistant$antibiotic, AMR::antibiotics$name), "ab", drop = TRUE])
-}
-
-
-
 # Save internal data to R/sysdata.rda -------------------------------------

 # See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
@@ -170,24 +67,50 @@ translations_file <- utils::read.delim(file = "data-raw/translations.tsv",
                                       allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
                                       quote = "")

-# Old microorganism codes
-microorganisms.translation <- readRDS("data-raw/microorganisms.translation.rds")
-
-# for mo_is_intrinsic_resistant() - saves a lot of time when executed on this vector
-INTRINSIC_R <- create_intr_resistance()
-
 # for checking input in `language` argument in e.g. mo_*() and ab_*() functions
 LANGUAGES_SUPPORTED <- sort(c("en", colnames(translations_file)[nchar(colnames(translations_file)) == 2]))

 # vectors of CoNS and CoPS, improves speed in as.mo()
+create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
+  # Determination of which staphylococcal species are CoNS/CoPS according to:
+  # - Becker et al. 2014, PMID 25278577
+  # - Becker et al. 2019, PMID 30872103
+  # - Becker et al. 2020, PMID 32056452
+  # this function returns class <mo>
+  MO_staph <- AMR::microorganisms
+  MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
+  if (type == "CoNS") {
+    MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
+                                           "auricularis", "borealis", "caeli", "capitis", "caprae", 
+                                           "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
+                                           "croceilyticus",
+                                           "debuckii", "devriesei", "edaphicus", "epidermidis",
+                                           "equorum", "felis", "fleurettii", "gallinarum",
+                                           "haemolyticus", "hominis", "jettensis", "kloosii",
+                                           "lentus", "lugdunensis", "massiliensis", "microti",
+                                           "muscae", "nepalensis", "pasteuri", "petrasii",
+                                           "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
+                                           "pulvereri", "rostri", "saccharolyticus", "saprophyticus",
+                                           "sciuri", "simulans", "stepanovicii", "succinus",
+                                           "ureilyticus",
+                                           "vitulinus", "vitulus", "warneri", "xylosus")
+                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
+             "mo", drop = TRUE]
+  } else if (type == "CoPS") {
+    MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
+                                           "agnetis", "argenteus",
+                                           "cornubiensis",
+                                           "delphini", "lutrae",
+                                           "hyicus", "intermedius",
+                                           "pseudintermedius", "pseudointermedius",
+                                           "schweitzeri", "simiae")
+                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
+             "mo", drop = TRUE]
+  }
+}
 MO_CONS <- create_species_cons_cops("CoNS")
 MO_COPS <- create_species_cons_cops("CoPS")

-# reference data - they have additional columns compared to `antibiotics` and `microorganisms` to improve speed
-AB_lookup <- create_AB_lookup()
-MO_lookup <- create_MO_lookup()
-MO.old_lookup <- create_MO.old_lookup()
-
 # antibiotic groups
 # (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv)
 globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab")
@@ -220,14 +143,10 @@ DEFINED_AB_GROUPS <- DEFINED_AB_GROUPS[!DEFINED_AB_GROUPS %in% globalenv_before_
 # Export to package as internal data ----
 usethis::use_data(eucast_rules_file, 
                  translations_file,
-                  microorganisms.translation,
-                  INTRINSIC_R,
                  LANGUAGES_SUPPORTED,
                  MO_CONS,
                  MO_COPS,
                  AB_lookup,
-                  MO_lookup,
-                  MO.old_lookup,
                  AMINOGLYCOSIDES,
                  AMINOPENICILLINS,
                  CARBAPENEMS,
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
@@ -872,12 +872,12 @@ View(old_new)
 # set new MO codes as names to existing data sets
 rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL)
 microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL)
-microorganisms.translation <- AMR:::microorganisms.translation %>%
-  bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
-  filter(!mo_old %in% MOs$mo) %>% 
-  mutate(mo_new = mo_name(mo_new, language = NULL)) %>% 
-  bind_rows(old_new %>% select(mo_old, mo_new)) %>% 
-  distinct(mo_old, .keep_all = TRUE)
+# microorganisms.translation <- AMR:::microorganisms.translation %>%
+#   bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
+#   filter(!mo_old %in% MOs$mo) %>% 
+#   mutate(mo_new = mo_name(mo_new, language = NULL)) %>% 
+#   bind_rows(old_new %>% select(mo_old, mo_new)) %>% 
+#   distinct(mo_old, .keep_all = TRUE)

 # arrange the data sets to save
 MOs <- MOs %>% arrange(fullname)
@@ -911,23 +911,23 @@ devtools::load_all(".")
 rsi_translation$mo <- as.mo(rsi_translation$mo)
 microorganisms.codes$mo <- as.mo(microorganisms.codes$mo)
 class(microorganisms.codes$mo) <- c("mo", "character")
-microorganisms.translation <- microorganisms.translation %>%
-  # (to do: add last package version to column pkg_version)
-  left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
-            by = c("mo_new" = "fullname")) %>%
-  mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>% 
-  left_join(microorganisms[, c("fullname", "mo")],               # as is microorganisms
-            by = c("name" = "fullname")) %>% 
-  select(mo_old, mo_new = mo) %>% 
-  filter(!is.na(mo_old), !is.na(mo_new))
-class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
-class(microorganisms.translation$mo_new) <- c("mo", "character")
+# microorganisms.translation <- microorganisms.translation %>%
+#   # (to do: add last package version to column pkg_version)
+#   left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
+#             by = c("mo_new" = "fullname")) %>%
+#   mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>% 
+#   left_join(microorganisms[, c("fullname", "mo")],               # as is microorganisms
+#             by = c("name" = "fullname")) %>% 
+#   select(mo_old, mo_new = mo) %>% 
+#   filter(!is.na(mo_old), !is.na(mo_new))
+# class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
+# class(microorganisms.translation$mo_new) <- c("mo", "character")
 # save those to the package
 usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
 usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
-saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
+# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
 # to save microorganisms.translation internally to the package
-source("data-raw/_internals.R")
+# source("data-raw/_internals.R")

 # load new data sets again
 devtools::load_all(".")
@@ -935,7 +935,7 @@ devtools::load_all(".")
 # and check: these codes should not be missing (will otherwise throw a unit test error):
 AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
 AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
-AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
+# AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)

 # update the example_isolates data set
 example_isolates$mo <- as.mo(example_isolates$mo)
--- a/data-raw/reproduction_of_microorganisms_update.R
+++ b/data-raw/reproduction_of_microorganisms_update.R
@@ -380,37 +380,37 @@ MOs.old <- microorganisms.old %>%
 # Keep old codes for translation ------------------------------------------

 # add removed microbial IDs to the internal translation table so old package versions keep working
-MOs.translation <- microorganisms %>% 
-  filter(!mo %in% MOs$mo) %>%
-  select(mo, fullname) %>%
-  left_join(new_synonyms) %>% 
-  left_join(MOs %>% transmute(fullname_new = fullname, mo2 = as.character(mo))) %>% 
-  select(mo_old = mo, mo_new = mo2) %>% 
-  distinct()
-MOs.translation <- AMR:::microorganisms.translation %>% 
-  left_join(MOs.translation %>% select(mo_new_update = mo_new, mo_new = mo_old)) %>% 
-  mutate(mo_new = as.character(ifelse(!is.na(mo_new_update), mo_new_update, mo_new))) %>% 
-  select(-mo_new_update) %>% 
-  bind_rows(
-    # old IDs used in microorganisms.codes must put in here as well
-    microorganisms.codes %>%
-      filter(!mo %in% MOs$mo) %>%
-      transmute(mo_old = mo, fullname = mo_name(mo)) %>%
-      left_join(MOs.old %>%
-                  select(fullname, fullname_new)) %>%
-      left_join(MOs %>%
-                  select(mo_new = mo, fullname_new = fullname)) %>%
-      transmute(mo_old = as.character(mo_old), mo_new)) %>% 
-  arrange(mo_old) %>% 
-  filter(mo_old != mo_new,
-         !mo_old %in% MOs$mo) %>%
-  left_join(., ., 
-            by = c("mo_new" = "mo_old"), 
-            suffix = c("", ".2")) %>%
-  mutate(mo_new = ifelse(!is.na(mo_new.2), mo_new.2, mo_new)) %>% 
-  distinct(mo_old, mo_new) %>% 
-  # clean up
-  df_remove_nonASCII()
+# MOs.translation <- microorganisms %>% 
+#   filter(!mo %in% MOs$mo) %>%
+#   select(mo, fullname) %>%
+#   left_join(new_synonyms) %>% 
+#   left_join(MOs %>% transmute(fullname_new = fullname, mo2 = as.character(mo))) %>% 
+#   select(mo_old = mo, mo_new = mo2) %>% 
+#   distinct()
+# MOs.translation <- AMR:::microorganisms.translation %>% 
+#   left_join(MOs.translation %>% select(mo_new_update = mo_new, mo_new = mo_old)) %>% 
+#   mutate(mo_new = as.character(ifelse(!is.na(mo_new_update), mo_new_update, mo_new))) %>% 
+#   select(-mo_new_update) %>% 
+#   bind_rows(
+#     # old IDs used in microorganisms.codes must put in here as well
+#     microorganisms.codes %>%
+#       filter(!mo %in% MOs$mo) %>%
+#       transmute(mo_old = mo, fullname = mo_name(mo)) %>%
+#       left_join(MOs.old %>%
+#                   select(fullname, fullname_new)) %>%
+#       left_join(MOs %>%
+#                   select(mo_new = mo, fullname_new = fullname)) %>%
+#       transmute(mo_old = as.character(mo_old), mo_new)) %>% 
+#   arrange(mo_old) %>% 
+#   filter(mo_old != mo_new,
+#          !mo_old %in% MOs$mo) %>%
+#   left_join(., ., 
+#             by = c("mo_new" = "mo_old"), 
+#             suffix = c("", ".2")) %>%
+#   mutate(mo_new = ifelse(!is.na(mo_new.2), mo_new.2, mo_new)) %>% 
+#   distinct(mo_old, mo_new) %>% 
+#   # clean up
+#   df_remove_nonASCII()

 message("microorganisms new:     ", sum(!MOs$fullname %in% c(microorganisms$fullname, MOs.old$fullname)))
 message("microorganisms renamed: ", sum(!MOs.old$fullname %in% microorganisms.old$fullname))
@@ -424,12 +424,12 @@ class(MOs.translation$mo_new) <- c("mo", "character")

 microorganisms <- MOs
 microorganisms.old <- MOs.old
-microorganisms.translation <- MOs.translation
+# microorganisms.translation <- MOs.translation

 # on the server, do:
 usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")
 usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
-saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
+# saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
 rm(microorganisms)
 rm(microorganisms.old)
 rm(microorganisms.translation)