styled, unit test fix

2025-07-09 19:01:51 +02:00 · 2022-08-28 10:31:50 +02:00
parent 4cb1db4554
commit 4d050aef7c
147 changed files with 10897 additions and 8169 deletions
--- a/data-raw/_install_deps.R
+++ b/data-raw/_install_deps.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -28,10 +28,14 @@ install.packages("data-raw/tinytest_1.3.1.tar.gz", dependencies = c("Depends", "
 install.packages(getwd(), repos = NULL, type = "source")
 # install.packages("data-raw/AMR_latest.tar.gz", dependencies = FALSE)

-pkg_suggests <- gsub("[^a-zA-Z0-9]+", "",
-                     unlist(strsplit(unlist(packageDescription("AMR",
-                                                               fields = c("Suggests", "Enhances", "LinkingTo"))),
-                                     split = ", ?")))
+pkg_suggests <- gsub(
+  "[^a-zA-Z0-9]+", "",
+  unlist(strsplit(unlist(packageDescription("AMR",
+    fields = c("Suggests", "Enhances", "LinkingTo")
+  )),
+  split = ", ?"
+  ))
+)
 pkg_suggests <- unname(pkg_suggests[!is.na(pkg_suggests)])
 cat("################################################\n")
 cat("Packages listed in Suggests/Enhances:", paste(pkg_suggests, collapse = ", "), "\n")
@ -49,22 +53,26 @@ if (length(to_install) == 0) {
 for (i in seq_len(length(to_install))) {
  cat("Installing package", to_install[i], "\n")
  tryCatch(install.packages(to_install[i],
-                            type = "source",
-                            repos = "https://cran.rstudio.com/",
-                            dependencies = c("Depends", "Imports", "LinkingTo"),
-                            quiet = FALSE),
-           # message = function(m) invisible(),
-           warning = function(w) message(w$message),
-           error = function(e) message(e$message))
+    type = "source",
+    repos = "https://cran.rstudio.com/",
+    dependencies = c("Depends", "Imports", "LinkingTo"),
+    quiet = FALSE
+  ),
+  # message = function(m) invisible(),
+  warning = function(w) message(w$message),
+  error = function(e) message(e$message)
+  )
  if (.Platform$OS.type != "unix" && !to_install[i] %in% rownames(utils::installed.packages())) {
    tryCatch(install.packages(to_install[i],
-                              type = "binary",
-                              repos = "https://cran.rstudio.com/",
-                              dependencies = c("Depends", "Imports", "LinkingTo"),
-                              quiet = FALSE),
-             # message = function(m) invisible(),
-             warning = function(w) message(w$message),
-             error = function(e) message(e$message))
+      type = "binary",
+      repos = "https://cran.rstudio.com/",
+      dependencies = c("Depends", "Imports", "LinkingTo"),
+      quiet = FALSE
+    ),
+    # message = function(m) invisible(),
+    warning = function(w) message(w$message),
+    error = function(e) message(e$message)
+    )
  }
 }

@ -76,8 +84,9 @@ if (length(to_update) == 0) {
 for (i in seq_len(length(to_update))) {
  cat("Updating package '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
  tryCatch(update.packages(to_update[i], repos = "https://cran.rstudio.com/", ask = FALSE),
-           # message = function(m) invisible(),
-           warning = function(w) message(w$message),
-           error = function(e) message(e$message))
+    # message = function(m) invisible(),
+    warning = function(w) message(w$message),
+    error = function(e) message(e$message)
+  )
  cat("Updated to '", to_update[i], "' v", as.character(packageVersion(to_update[i])), "\n", sep = "")
 }
--- a/data-raw/_language_update.R
+++ b/data-raw/_language_update.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -28,9 +28,10 @@

 if (!file.exists("DESCRIPTION") || !"Package: AMR" %in% readLines("DESCRIPTION")) {
  stop("Be sure to run this script in the root location of the AMR package folder.\n",
-       "Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
-       "Current working directory: ", getwd(),
-       call. = FALSE)
+    "Working directory expected to contain the DESCRIPTION file of the AMR package.\n",
+    "Current working directory: ", getwd(),
+    call. = FALSE
+  )
 }

 # save old global env to restore later
@ -41,34 +42,42 @@ load("R/sysdata.rda", envir = lang_env)

 # replace language objects with updates
 message("Reading translation file...")
-lang_env$TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
-                                           sep = "\t",
-                                           stringsAsFactors = FALSE,
-                                           header = TRUE,
-                                           blank.lines.skip = TRUE,
-                                           fill = TRUE,
-                                           strip.white = TRUE,
-                                           encoding = "UTF-8",
-                                           fileEncoding = "UTF-8",
-                                           na.strings = c(NA, "", NULL),
-                                           allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
-                                           quote = "")
+lang_env$TRANSLATIONS <- utils::read.delim(
+  file = "data-raw/translations.tsv",
+  sep = "\t",
+  stringsAsFactors = FALSE,
+  header = TRUE,
+  blank.lines.skip = TRUE,
+  fill = TRUE,
+  strip.white = TRUE,
+  encoding = "UTF-8",
+  fileEncoding = "UTF-8",
+  na.strings = c(NA, "", NULL),
+  allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
+  quote = ""
+)

-lang_env$LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
-                                        lapply(lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
-                                               function(x) list(exonym = x[1], endonym = x[2])))
+lang_env$LANGUAGES_SUPPORTED_NAMES <- c(
+  list(en = list(exonym = "English", endonym = "English")),
+  lapply(
+    lang_env$TRANSLATIONS[, which(nchar(colnames(lang_env$TRANSLATIONS)) == 2), drop = FALSE],
+    function(x) list(exonym = x[1], endonym = x[2])
+  )
+)

 lang_env$LANGUAGES_SUPPORTED <- names(lang_env$LANGUAGES_SUPPORTED_NAMES)

 # save env to internal package data
 # usethis::use_data() does not allow to save a list :(
 message("Saving to internal data...")
-save(list = names(lang_env),
-     file = "R/sysdata.rda",
-     ascii = FALSE,
-     version = 2,
-     compress = "xz",
-     envir = lang_env)
+save(
+  list = names(lang_env),
+  file = "R/sysdata.rda",
+  ascii = FALSE,
+  version = 2,
+  compress = "xz",
+  envir = lang_env
+)

 rm(lang_env)

--- a/data-raw/_pre_commit_hook.R
+++ b/data-raw/_pre_commit_hook.R
@ -24,7 +24,7 @@
 # ==================================================================== #

 # Run this file to update the package using:
-# source("data-raw/pre-commit-hook.R")
+# source("data-raw/_pre_commit_hook.R")

 library(dplyr, warn.conflicts = FALSE)
 devtools::load_all(quiet = TRUE)
@ -36,41 +36,54 @@ old_globalenv <- ls(envir = globalenv())
 # Save internal data to R/sysdata.rda -------------------------------------

 # See 'data-raw/eucast_rules.tsv' for the EUCAST reference file
-EUCAST_RULES_DF <- utils::read.delim(file = "data-raw/eucast_rules.tsv",
-                                     skip = 10,
-                                     sep = "\t",
-                                     stringsAsFactors = FALSE,
-                                     header = TRUE,
-                                     strip.white = TRUE,
-                                     na = c(NA, "", NULL)) %>%
+EUCAST_RULES_DF <- utils::read.delim(
+  file = "data-raw/eucast_rules.tsv",
+  skip = 10,
+  sep = "\t",
+  stringsAsFactors = FALSE,
+  header = TRUE,
+  strip.white = TRUE,
+  na = c(NA, "", NULL)
+) %>%
  # take the order of the reference.rule_group column in the original data file
-  mutate(reference.rule_group = factor(reference.rule_group,
-                                       levels = unique(reference.rule_group),
-                                       ordered = TRUE),
-         sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)) %>%
-  arrange(reference.rule_group,
-          reference.version,
-          sorting_rule,
-          reference.rule) %>%
+  mutate(
+    reference.rule_group = factor(reference.rule_group,
+      levels = unique(reference.rule_group),
+      ordered = TRUE
+    ),
+    sorting_rule = ifelse(grepl("^Table", reference.rule, ignore.case = TRUE), 1, 2)
+  ) %>%
+  arrange(
+    reference.rule_group,
+    reference.version,
+    sorting_rule,
+    reference.rule
+  ) %>%
  mutate(reference.rule_group = as.character(reference.rule_group)) %>%
  select(-sorting_rule)

-TRANSLATIONS <- utils::read.delim(file = "data-raw/translations.tsv",
-                                  sep = "\t",
-                                  stringsAsFactors = FALSE,
-                                  header = TRUE,
-                                  blank.lines.skip = TRUE,
-                                  fill = TRUE,
-                                  strip.white = TRUE,
-                                  encoding = "UTF-8",
-                                  fileEncoding = "UTF-8",
-                                  na.strings = c(NA, "", NULL),
-                                  allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
-                                  quote = "")
+TRANSLATIONS <- utils::read.delim(
+  file = "data-raw/translations.tsv",
+  sep = "\t",
+  stringsAsFactors = FALSE,
+  header = TRUE,
+  blank.lines.skip = TRUE,
+  fill = TRUE,
+  strip.white = TRUE,
+  encoding = "UTF-8",
+  fileEncoding = "UTF-8",
+  na.strings = c(NA, "", NULL),
+  allowEscapes = TRUE, # else "\\1" will be imported as "\\\\1"
+  quote = ""
+)

-LANGUAGES_SUPPORTED_NAMES <- c(list(en = list(exonym = "English", endonym = "English")),
-                               lapply(TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
-                                      function(x) list(exonym = x[1], endonym = x[2])))
+LANGUAGES_SUPPORTED_NAMES <- c(
+  list(en = list(exonym = "English", endonym = "English")),
+  lapply(
+    TRANSLATIONS[, which(nchar(colnames(TRANSLATIONS)) == 2), drop = FALSE],
+    function(x) list(exonym = x[1], endonym = x[2])
+  )
+)

 LANGUAGES_SUPPORTED <- names(LANGUAGES_SUPPORTED_NAMES)

@ -84,43 +97,53 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
  MO_staph <- AMR::microorganisms
  MO_staph <- MO_staph[which(MO_staph$genus == "Staphylococcus"), , drop = FALSE]
  if (type == "CoNS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-negative", "argensis", "arlettae",
-                                           "auricularis", "borealis", "caeli", "capitis", "caprae",
-                                           "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
-                                           "croceilyticus",
-                                           "debuckii", "devriesei", "edaphicus", "epidermidis",
-                                           "equorum", "felis", "fleurettii", "gallinarum",
-                                           "haemolyticus", "hominis", "jettensis", "kloosii",
-                                           "lentus", "lugdunensis", "massiliensis", "microti",
-                                           "muscae", "nepalensis", "pasteuri", "petrasii",
-                                           "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
-                                           "pulvereri", "rostri", "saccharolyticus", "saprophyticus",
-                                           "sciuri", "simulans", "stepanovicii", "succinus",
-                                           "ureilyticus",
-                                           "vitulinus", "vitulus", "warneri", "xylosus",
-                                           "caledonicus", "canis",
-                                           "durrellii", "lloydii")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
-             "mo", drop = TRUE]
+    MO_staph[which(MO_staph$species %in% c(
+      "coagulase-negative", "argensis", "arlettae",
+      "auricularis", "borealis", "caeli", "capitis", "caprae",
+      "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
+      "croceilyticus",
+      "debuckii", "devriesei", "edaphicus", "epidermidis",
+      "equorum", "felis", "fleurettii", "gallinarum",
+      "haemolyticus", "hominis", "jettensis", "kloosii",
+      "lentus", "lugdunensis", "massiliensis", "microti",
+      "muscae", "nepalensis", "pasteuri", "petrasii",
+      "pettenkoferi", "piscifermentans", "pragensis", "pseudoxylosus",
+      "pulvereri", "rostri", "saccharolyticus", "saprophyticus",
+      "sciuri", "simulans", "stepanovicii", "succinus",
+      "ureilyticus",
+      "vitulinus", "vitulus", "warneri", "xylosus",
+      "caledonicus", "canis",
+      "durrellii", "lloydii"
+    ) |
+      (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
+    "mo",
+    drop = TRUE
+    ]
  } else if (type == "CoPS") {
-    MO_staph[which(MO_staph$species %in% c("coagulase-positive", "coagulans",
-                                           "agnetis", "argenteus",
-                                           "cornubiensis",
-                                           "delphini", "lutrae",
-                                           "hyicus", "intermedius",
-                                           "pseudintermedius", "pseudointermedius",
-                                           "schweitzeri", "simiae",
-                                           "roterodami")
-                   | (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
-             "mo", drop = TRUE]
+    MO_staph[which(MO_staph$species %in% c(
+      "coagulase-positive", "coagulans",
+      "agnetis", "argenteus",
+      "cornubiensis",
+      "delphini", "lutrae",
+      "hyicus", "intermedius",
+      "pseudintermedius", "pseudointermedius",
+      "schweitzeri", "simiae",
+      "roterodami"
+    ) |
+      (MO_staph$species == "schleiferi" & MO_staph$subspecies == "coagulans")),
+    "mo",
+    drop = TRUE
+    ]
  }
 }
 create_MO_fullname_lower <- function() {
  MO_lookup <- AMR::microorganisms
  # use this paste instead of `fullname` to work with Viridans Group Streptococci, etc.
-  MO_lookup$fullname_lower <- tolower(trimws(paste(MO_lookup$genus,
-                                                   MO_lookup$species,
-                                                   MO_lookup$subspecies)))
+  MO_lookup$fullname_lower <- tolower(trimws(paste(
+    MO_lookup$genus,
+    MO_lookup$species,
+    MO_lookup$subspecies
+  )))
  ind <- MO_lookup$genus == "" | grepl("^[(]unknown ", MO_lookup$fullname, perl = TRUE)
  MO_lookup[ind, "fullname_lower"] <- tolower(MO_lookup[ind, "fullname", drop = TRUE])
  MO_lookup$fullname_lower <- trimws(gsub("[^.a-z0-9/ \\-]+", "", MO_lookup$fullname_lower, perl = TRUE))
@ -130,59 +153,103 @@ MO_CONS <- create_species_cons_cops("CoNS")
 MO_COPS <- create_species_cons_cops("CoPS")
 MO_STREP_ABCG <- as.mo(MO_lookup[which(MO_lookup$genus == "Streptococcus"), "mo", drop = TRUE], Lancefield = TRUE) %in% c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_GRPC", "B_STRPT_GRPG")
 MO_FULLNAME_LOWER <- create_MO_fullname_lower()
-MO_PREVALENT_GENERA <- c("Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
-                         "Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
-                         "Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
-                         "Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
-                         "Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
-                         "Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
-                         "Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
-                         "Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
-                         "Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
-                         "Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
-                         "Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
-                         "Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
-                         "Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
-                         "Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
-                         "Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
-                         "Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
-                         "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
-                         "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
-                         "Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
-                         "Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
-                         "Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
-                         "Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
-                         "Weeksella", "Wuchereria")
+MO_PREVALENT_GENERA <- c(
+  "Absidia", "Acholeplasma", "Acremonium", "Actinotignum", "Aedes", "Alistipes", "Alloprevotella",
+  "Alternaria", "Anaerosalibacter", "Ancylostoma", "Angiostrongylus", "Anisakis", "Anopheles",
+  "Apophysomyces", "Arachnia", "Aspergillus", "Aureobasidium", "Bacteroides", "Basidiobolus",
+  "Beauveria", "Bergeyella", "Blastocystis", "Blastomyces", "Borrelia", "Brachyspira", "Branhamella",
+  "Butyricimonas", "Candida", "Capillaria", "Capnocytophaga", "Catabacter", "Cetobacterium", "Chaetomium",
+  "Chlamydia", "Chlamydophila", "Chryseobacterium", "Chrysonilia", "Cladophialophora", "Cladosporium",
+  "Conidiobolus", "Contracaecum", "Cordylobia", "Cryptococcus", "Curvularia", "Deinococcus", "Demodex",
+  "Dermatobia", "Diphyllobothrium", "Dirofilaria", "Dysgonomonas", "Echinostoma", "Elizabethkingia",
+  "Empedobacter", "Enterobius", "Exophiala", "Exserohilum", "Fasciola", "Flavobacterium", "Fonsecaea",
+  "Fusarium", "Fusobacterium", "Giardia", "Haloarcula", "Halobacterium", "Halococcus", "Hendersonula",
+  "Heterophyes", "Histoplasma", "Hymenolepis", "Hypomyces", "Hysterothylacium", "Lelliottia",
+  "Leptosphaeria", "Leptotrichia", "Lucilia", "Lumbricus", "Malassezia", "Malbranchea", "Metagonimus",
+  "Microsporum", "Mortierella", "Mucor", "Mycocentrospora", "Mycoplasma", "Myroides", "Necator",
+  "Nectria", "Ochroconis", "Odoribacter", "Oesophagostomum", "Oidiodendron", "Opisthorchis",
+  "Ornithobacterium", "Parabacteroides", "Pediculus", "Pedobacter", "Phlebotomus", "Phocaeicola",
+  "Phocanema", "Phoma", "Piedraia", "Pithomyces", "Pityrosporum", "Porphyromonas", "Prevotella",
+  "Pseudallescheria", "Pseudoterranova", "Pulex", "Rhizomucor", "Rhizopus", "Rhodotorula", "Riemerella",
+  "Saccharomyces", "Sarcoptes", "Scolecobasidium", "Scopulariopsis", "Scytalidium", "Sphingobacterium",
+  "Spirometra", "Spiroplasma", "Sporobolomyces", "Stachybotrys", "Streptobacillus", "Strongyloides",
+  "Syngamus", "Taenia", "Tannerella", "Tenacibaculum", "Terrimonas", "Toxocara", "Treponema", "Trichinella",
+  "Trichobilharzia", "Trichoderma", "Trichomonas", "Trichophyton", "Trichosporon", "Trichostrongylus",
+  "Trichuris", "Tritirachium", "Trombicula", "Tunga", "Ureaplasma", "Victivallis", "Wautersiella",
+  "Weeksella", "Wuchereria"
+)

 # antibiotic groups
 # (these will also be used for eucast_rules() and understanding data-raw/eucast_rules.tsv)
 globalenv_before_ab <- c(ls(envir = globalenv()), "globalenv_before_ab")
-AB_AMINOGLYCOSIDES <- antibiotics %>% filter(group %like% "aminoglycoside") %>% pull(ab)
+AB_AMINOGLYCOSIDES <- antibiotics %>%
+  filter(group %like% "aminoglycoside") %>%
+  pull(ab)
 AB_AMINOPENICILLINS <- as.ab(c("AMP", "AMX"))
-AB_ANTIFUNGALS <- AB_lookup %>% filter(group %like% "antifungal") %>% pull(ab)
-AB_ANTIMYCOBACTERIALS <- AB_lookup %>% filter(group %like% "antimycobacterial") %>% pull(ab)
-AB_CARBAPENEMS <- antibiotics %>% filter(group %like% "carbapenem") %>% pull(ab)
-AB_CEPHALOSPORINS <- antibiotics %>% filter(group %like% "cephalosporin") %>% pull(ab)
-AB_CEPHALOSPORINS_1ST <- antibiotics %>% filter(group %like% "cephalosporin.*1") %>% pull(ab)
-AB_CEPHALOSPORINS_2ND <- antibiotics %>% filter(group %like% "cephalosporin.*2") %>% pull(ab)
-AB_CEPHALOSPORINS_3RD <- antibiotics %>% filter(group %like% "cephalosporin.*3") %>% pull(ab)
-AB_CEPHALOSPORINS_4TH <- antibiotics %>% filter(group %like% "cephalosporin.*4") %>% pull(ab)
-AB_CEPHALOSPORINS_5TH <- antibiotics %>% filter(group %like% "cephalosporin.*5") %>% pull(ab)
+AB_ANTIFUNGALS <- AB_lookup %>%
+  filter(group %like% "antifungal") %>%
+  pull(ab)
+AB_ANTIMYCOBACTERIALS <- AB_lookup %>%
+  filter(group %like% "antimycobacterial") %>%
+  pull(ab)
+AB_CARBAPENEMS <- antibiotics %>%
+  filter(group %like% "carbapenem") %>%
+  pull(ab)
+AB_CEPHALOSPORINS <- antibiotics %>%
+  filter(group %like% "cephalosporin") %>%
+  pull(ab)
+AB_CEPHALOSPORINS_1ST <- antibiotics %>%
+  filter(group %like% "cephalosporin.*1") %>%
+  pull(ab)
+AB_CEPHALOSPORINS_2ND <- antibiotics %>%
+  filter(group %like% "cephalosporin.*2") %>%
+  pull(ab)
+AB_CEPHALOSPORINS_3RD <- antibiotics %>%
+  filter(group %like% "cephalosporin.*3") %>%
+  pull(ab)
+AB_CEPHALOSPORINS_4TH <- antibiotics %>%
+  filter(group %like% "cephalosporin.*4") %>%
+  pull(ab)
+AB_CEPHALOSPORINS_5TH <- antibiotics %>%
+  filter(group %like% "cephalosporin.*5") %>%
+  pull(ab)
 AB_CEPHALOSPORINS_EXCEPT_CAZ <- AB_CEPHALOSPORINS[AB_CEPHALOSPORINS != "CAZ"]
-AB_FLUOROQUINOLONES <- antibiotics %>% filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>% pull(ab)
-AB_GLYCOPEPTIDES <- antibiotics %>% filter(group %like% "glycopeptide") %>% pull(ab)
+AB_FLUOROQUINOLONES <- antibiotics %>%
+  filter(atc_group2 %like% "fluoroquinolone" | (group %like% "quinolone" & is.na(atc_group2))) %>%
+  pull(ab)
+AB_GLYCOPEPTIDES <- antibiotics %>%
+  filter(group %like% "glycopeptide") %>%
+  pull(ab)
 AB_LIPOGLYCOPEPTIDES <- as.ab(c("DAL", "ORI", "TLV")) # dalba/orita/tela
 AB_GLYCOPEPTIDES_EXCEPT_LIPO <- AB_GLYCOPEPTIDES[!AB_GLYCOPEPTIDES %in% AB_LIPOGLYCOPEPTIDES]
-AB_LINCOSAMIDES <- antibiotics %>% filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>% pull(ab)
-AB_MACROLIDES <- antibiotics %>% filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>% pull(ab)
-AB_OXAZOLIDINONES <- antibiotics %>% filter(group %like% "oxazolidinone") %>% pull(ab)
-AB_PENICILLINS <- antibiotics %>% filter(group %like% "penicillin") %>% pull(ab)
-AB_POLYMYXINS <- antibiotics %>% filter(group %like% "polymyxin") %>% pull(ab)
-AB_QUINOLONES <- antibiotics %>% filter(group %like% "quinolone") %>% pull(ab)
-AB_STREPTOGRAMINS <- antibiotics %>% filter(atc_group2 %like% "streptogramin") %>% pull(ab)
-AB_TETRACYCLINES <- antibiotics %>% filter(group %like% "tetracycline") %>% pull(ab)
+AB_LINCOSAMIDES <- antibiotics %>%
+  filter(atc_group2 %like% "lincosamide" | (group %like% "lincosamide" & is.na(atc_group2))) %>%
+  pull(ab)
+AB_MACROLIDES <- antibiotics %>%
+  filter(atc_group2 %like% "macrolide" | (group %like% "macrolide" & is.na(atc_group2))) %>%
+  pull(ab)
+AB_OXAZOLIDINONES <- antibiotics %>%
+  filter(group %like% "oxazolidinone") %>%
+  pull(ab)
+AB_PENICILLINS <- antibiotics %>%
+  filter(group %like% "penicillin") %>%
+  pull(ab)
+AB_POLYMYXINS <- antibiotics %>%
+  filter(group %like% "polymyxin") %>%
+  pull(ab)
+AB_QUINOLONES <- antibiotics %>%
+  filter(group %like% "quinolone") %>%
+  pull(ab)
+AB_STREPTOGRAMINS <- antibiotics %>%
+  filter(atc_group2 %like% "streptogramin") %>%
+  pull(ab)
+AB_TETRACYCLINES <- antibiotics %>%
+  filter(group %like% "tetracycline") %>%
+  pull(ab)
 AB_TETRACYCLINES_EXCEPT_TGC <- AB_TETRACYCLINES[AB_TETRACYCLINES != "TGC"]
-AB_TRIMETHOPRIMS <- antibiotics %>% filter(group %like% "trimethoprim") %>% pull(ab)
+AB_TRIMETHOPRIMS <- antibiotics %>%
+  filter(group %like% "trimethoprim") %>%
+  pull(ab)
 AB_UREIDOPENICILLINS <- as.ab(c("PIP", "TZP", "AZL", "MEZ"))
 AB_BETALACTAMS <- c(AB_PENICILLINS, AB_CEPHALOSPORINS, AB_CARBAPENEMS)
 # this will be used for documentation:
@ -194,15 +261,21 @@ create_AB_lookup <- function() {
  AB_lookup$generalised_synonyms <- lapply(AB_lookup$synonyms, generalise_antibiotic_name)
  AB_lookup$generalised_abbreviations <- lapply(AB_lookup$abbreviations, generalise_antibiotic_name)
  AB_lookup$generalised_loinc <- lapply(AB_lookup$loinc, generalise_antibiotic_name)
-  AB_lookup$generalised_all <- unname(lapply(as.list(as.data.frame(t(AB_lookup[,
-                                                                               c("ab", "atc", "cid", "name",
-                                                                                 colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]),
-                                                                               drop = FALSE]),
-                                                                   stringsAsFactors = FALSE)),
-                                             function(x) {
-                                               x <- generalise_antibiotic_name(unname(unlist(x)))
-                                               x[x != ""]
-                                             }))
+  AB_lookup$generalised_all <- unname(lapply(
+    as.list(as.data.frame(t(AB_lookup[,
+      c(
+        "ab", "atc", "cid", "name",
+        colnames(AB_lookup)[colnames(AB_lookup) %like% "generalised"]
+      ),
+      drop = FALSE
+    ]),
+    stringsAsFactors = FALSE
+    )),
+    function(x) {
+      x <- generalise_antibiotic_name(unname(unlist(x)))
+      x[x != ""]
+    }
+  ))
  AB_lookup[, colnames(AB_lookup)[colnames(AB_lookup) %like% "^generalised"]]
 }
 AB_LOOKUP <- create_AB_lookup()
@ -210,48 +283,49 @@ AB_LOOKUP <- create_AB_lookup()
 # Export to package as internal data ----
 usethis::ui_info(paste0("Saving {usethis::ui_value('sysdata.rda')} to {usethis::ui_value('R/')}"))
 suppressMessages(usethis::use_data(EUCAST_RULES_DF,
-                                   TRANSLATIONS,
-                                   LANGUAGES_SUPPORTED_NAMES,
-                                   LANGUAGES_SUPPORTED,
-                                   MO_CONS,
-                                   MO_COPS,
-                                   MO_STREP_ABCG,
-                                   MO_FULLNAME_LOWER,
-                                   MO_PREVALENT_GENERA,
-                                   AB_LOOKUP,
-                                   AB_AMINOGLYCOSIDES,
-                                   AB_AMINOPENICILLINS,
-                                   AB_ANTIFUNGALS,
-                                   AB_ANTIMYCOBACTERIALS,
-                                   AB_CARBAPENEMS,
-                                   AB_CEPHALOSPORINS,
-                                   AB_CEPHALOSPORINS_1ST,
-                                   AB_CEPHALOSPORINS_2ND,
-                                   AB_CEPHALOSPORINS_3RD,
-                                   AB_CEPHALOSPORINS_4TH,
-                                   AB_CEPHALOSPORINS_5TH,
-                                   AB_CEPHALOSPORINS_EXCEPT_CAZ,
-                                   AB_FLUOROQUINOLONES,
-                                   AB_LIPOGLYCOPEPTIDES,
-                                   AB_GLYCOPEPTIDES,
-                                   AB_GLYCOPEPTIDES_EXCEPT_LIPO,
-                                   AB_LINCOSAMIDES,
-                                   AB_MACROLIDES,
-                                   AB_OXAZOLIDINONES,
-                                   AB_PENICILLINS,
-                                   AB_POLYMYXINS,
-                                   AB_QUINOLONES,
-                                   AB_STREPTOGRAMINS,
-                                   AB_TETRACYCLINES,
-                                   AB_TETRACYCLINES_EXCEPT_TGC,
-                                   AB_TRIMETHOPRIMS,
-                                   AB_UREIDOPENICILLINS,
-                                   AB_BETALACTAMS,
-                                   DEFINED_AB_GROUPS,
-                                   internal = TRUE,
-                                   overwrite = TRUE,
-                                   version = 2,
-                                   compress = "xz"))
+  TRANSLATIONS,
+  LANGUAGES_SUPPORTED_NAMES,
+  LANGUAGES_SUPPORTED,
+  MO_CONS,
+  MO_COPS,
+  MO_STREP_ABCG,
+  MO_FULLNAME_LOWER,
+  MO_PREVALENT_GENERA,
+  AB_LOOKUP,
+  AB_AMINOGLYCOSIDES,
+  AB_AMINOPENICILLINS,
+  AB_ANTIFUNGALS,
+  AB_ANTIMYCOBACTERIALS,
+  AB_CARBAPENEMS,
+  AB_CEPHALOSPORINS,
+  AB_CEPHALOSPORINS_1ST,
+  AB_CEPHALOSPORINS_2ND,
+  AB_CEPHALOSPORINS_3RD,
+  AB_CEPHALOSPORINS_4TH,
+  AB_CEPHALOSPORINS_5TH,
+  AB_CEPHALOSPORINS_EXCEPT_CAZ,
+  AB_FLUOROQUINOLONES,
+  AB_LIPOGLYCOPEPTIDES,
+  AB_GLYCOPEPTIDES,
+  AB_GLYCOPEPTIDES_EXCEPT_LIPO,
+  AB_LINCOSAMIDES,
+  AB_MACROLIDES,
+  AB_OXAZOLIDINONES,
+  AB_PENICILLINS,
+  AB_POLYMYXINS,
+  AB_QUINOLONES,
+  AB_STREPTOGRAMINS,
+  AB_TETRACYCLINES,
+  AB_TETRACYCLINES_EXCEPT_TGC,
+  AB_TRIMETHOPRIMS,
+  AB_UREIDOPENICILLINS,
+  AB_BETALACTAMS,
+  DEFINED_AB_GROUPS,
+  internal = TRUE,
+  overwrite = TRUE,
+  version = 2,
+  compress = "xz"
+))

 # Export data sets to the repository in different formats -----------------

@ -273,12 +347,15 @@ write_md5 <- function(object) {
  close(conn)
 }
 changed_md5 <- function(object) {
-  tryCatch({
-    conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
-    compared <- md5(object) != readLines(con = conn)
-    close(conn)
-    compared
-  }, error = function(e) TRUE)
+  tryCatch(
+    {
+      conn <- file(paste0("data-raw/", deparse(substitute(object)), ".md5"))
+      compared <- md5(object) != readLines(con = conn)
+      close(conn)
+      compared
+    },
+    error = function(e) TRUE
+  )
 }

 # give official names to ABs and MOs
@ -306,7 +383,7 @@ if (changed_md5(microorganisms)) {
  max_50_snomed <- sapply(microorganisms$snomed, function(x) paste(x[seq_len(min(50, length(x), na.rm = TRUE))], collapse = " "))
  mo <- microorganisms
  mo$snomed <- max_50_snomed
-  mo <- dplyr::mutate_if(mo, ~!is.numeric(.), as.character)
+  mo <- dplyr::mutate_if(mo, ~ !is.numeric(.), as.character)
  try(haven::write_sas(mo, "data-raw/microorganisms.sas"), silent = TRUE)
  try(haven::write_sav(mo, "data-raw/microorganisms.sav"), silent = TRUE)
  try(haven::write_dta(mo, "data-raw/microorganisms.dta"), silent = TRUE)
@ -328,7 +405,7 @@ if (changed_md5(microorganisms.old)) {
  try(arrow::write_parquet(microorganisms.old, "data-raw/microorganisms.old.parquet"), silent = TRUE)
 }

-ab <- dplyr::mutate_if(antibiotics, ~!is.numeric(.), as.character)
+ab <- dplyr::mutate_if(antibiotics, ~ !is.numeric(.), as.character)
 if (changed_md5(ab)) {
  usethis::ui_info(paste0("Saving {usethis::ui_value('antibiotics')} to {usethis::ui_value('data-raw/')}"))
  write_md5(ab)
@ -342,7 +419,7 @@ if (changed_md5(ab)) {
  try(arrow::write_parquet(antibiotics, "data-raw/antibiotics.parquet"), silent = TRUE)
 }

-av <- dplyr::mutate_if(antivirals, ~!is.numeric(.), as.character)
+av <- dplyr::mutate_if(antivirals, ~ !is.numeric(.), as.character)
 if (changed_md5(av)) {
  usethis::ui_info(paste0("Saving {usethis::ui_value('antivirals')} to {usethis::ui_value('data-raw/')}"))
  write_md5(av)
@ -357,9 +434,11 @@ if (changed_md5(av)) {
 }

 # give official names to ABs and MOs
-intrinsicR <- data.frame(microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
-                         antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
-                         stringsAsFactors = FALSE)
+intrinsicR <- data.frame(
+  microorganism = mo_name(intrinsic_resistant$mo, language = NULL),
+  antibiotic = ab_name(intrinsic_resistant$ab, language = NULL),
+  stringsAsFactors = FALSE
+)
 if (changed_md5(intrinsicR)) {
  usethis::ui_info(paste0("Saving {usethis::ui_value('intrinsic_resistant')} to {usethis::ui_value('data-raw/')}"))
  write_md5(intrinsicR)
@ -394,4 +473,25 @@ rm(list = current_globalenv[!current_globalenv %in% old_globalenv])
 rm(current_globalenv)

 devtools::load_all(quiet = TRUE)
-devtools::document()
+
+
+# Document pkg ------------------------------------------------------------
+usethis::ui_info("Documenting package")
+suppressMessages(devtools::document(quiet = TRUE))
+
+
+# Style pkg ---------------------------------------------------------------
+usethis::ui_info("Styling package")
+invisible(capture.output(styler::style_pkg(
+  style = styler::tidyverse_style,
+  filetype = c("R", "Rmd")
+)))
+invisible(capture.output(styler::style_dir(
+  path = "inst", # unit tests
+  style = styler::tidyverse_style,
+  filetype = c("R", "Rmd")
+)))
+
+
+# Finished ----------------------------------------------------------------
+usethis::ui_info("All done")
--- a/data-raw/loinc.R
+++ b/data-raw/loinc.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -30,8 +30,9 @@
 # 2. Download the CSV from https://loinc.org/download/loinc-table-file-csv/ (Loinc_2.67_Text_2.67.zip)
 # 3. Read Loinc.csv that's in this zip file
 loinc_df <- read.csv("data-raw/Loinc.csv",
-                     row.names = NULL,
-                     stringsAsFactors = FALSE)
+  row.names = NULL,
+  stringsAsFactors = FALSE
+)

 # 4. Clean and add
 library(dplyr)
@ -39,7 +40,10 @@ library(cleaner)
 library(AMR)
 loinc_df %>% freq(CLASS) # to find the drugs
 loinc_df <- loinc_df %>% filter(CLASS == "DRUG/TOX")
-ab_names <- antibiotics %>% pull(name) %>% paste0(collapse = "|") %>% paste0("(", ., ")")
+ab_names <- antibiotics %>%
+  pull(name) %>%
+  paste0(collapse = "|") %>%
+  paste0("(", ., ")")

 antibiotics$loinc <- as.list(rep(NA_character_, nrow(antibiotics)))
 for (i in seq_len(nrow(antibiotics))) {
--- a/data-raw/poorman_prepend.R
+++ b/data-raw/poorman_prepend.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -34,10 +34,10 @@
 #
 # All functions are prefixed with 'pm_' to make it obvious that they are dplyr substitutes.
 #
-# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a 
+# All code below was released under MIT license, that permits 'free of charge, to any person obtaining a
 # copy of the software and associated documentation files (the "Software"), to deal in the Software
 # without restriction, including without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
 # is furnished to do so', given that a copyright notice is given in the software.
 #
 # Copyright notice on {date}, the day this code was downloaded, as found on
--- a/data-raw/read_EUCAST.R
+++ b/data-raw/read_EUCAST.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -31,11 +31,10 @@ library(AMR)

 # USE THIS FUNCTION TO READ THE EUCAST EXCEL FILE THAT CONTAINS THE BREAKPOINT TABLES

-read_EUCAST <- function(sheet, file, guideline_name) { 
-  
+read_EUCAST <- function(sheet, file, guideline_name) {
  message("\nGetting sheet: ", sheet)
  sheet.bak <- sheet
-  
+
  uncertainties <- NULL
  add_uncertainties <- function(old, new) {
    if (is.null(old)) {
@ -44,55 +43,64 @@ read_EUCAST <- function(sheet, file, guideline_name) {
      bind_rows(old, new)
    }
  }
-  
-  raw_data <- read.xlsx(xlsxFile = file,
-                        sheet = sheet,
-                        colNames = FALSE,
-                        skipEmptyRows = FALSE,
-                        skipEmptyCols = FALSE, 
-                        fillMergedCells = TRUE,
-                        na.strings = c("", "-", "NA", "IE", "IP"))
-  probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~sum(!is.na(.))) %>% unlist() %>% max())
+
+  raw_data <- read.xlsx(
+    xlsxFile = file,
+    sheet = sheet,
+    colNames = FALSE,
+    skipEmptyRows = FALSE,
+    skipEmptyCols = FALSE,
+    fillMergedCells = TRUE,
+    na.strings = c("", "-", "NA", "IE", "IP")
+  )
+  probable_rows <- suppressWarnings(raw_data %>% mutate_all(as.double) %>% summarise_all(~ sum(!is.na(.))) %>% unlist() %>% max())
  if (probable_rows == 0) {
    message("NO ROWS FOUND")
    message("------------------------")
    return(NULL)
  }
-  
+
  # in the info header in the Excel file, EUCAST mentions which genera are targeted
  if (sheet %like% "anaerob.*Gram.*posi") {
-    sheet <- paste0(c("Actinomyces", "Bifidobacterium", "Clostridioides", 
-                      "Clostridium", "Cutibacterium", "Eggerthella", 
-                      "Eubacterium", "Lactobacillus", "Propionibacterium", 
-                      "Staphylococcus saccharolyticus"), 
-                    collapse = "_")
+    sheet <- paste0(c(
+      "Actinomyces", "Bifidobacterium", "Clostridioides",
+      "Clostridium", "Cutibacterium", "Eggerthella",
+      "Eubacterium", "Lactobacillus", "Propionibacterium",
+      "Staphylococcus saccharolyticus"
+    ),
+    collapse = "_"
+    )
  } else if (sheet %like% "anaerob.*Gram.*nega") {
-    sheet <- paste0(c("Bacteroides",
-                      "Bilophila",
-                      "Fusobacterium",
-                      "Mobiluncus",
-                      "Parabacteroides",
-                      "Porphyromonas",
-                      "Prevotella"), 
-                    collapse = "_")
+    sheet <- paste0(c(
+      "Bacteroides",
+      "Bilophila",
+      "Fusobacterium",
+      "Mobiluncus",
+      "Parabacteroides",
+      "Porphyromonas",
+      "Prevotella"
+    ),
+    collapse = "_"
+    )
  } else if (sheet == "Streptococcus A,B,C,G") {
    sheet <- paste0(microorganisms %>%
-                      filter(genus == "Streptococcus") %>%
-                      mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
-                      filter(lancefield %like% "^Streptococcus group") %>% 
-                      pull(fullname), 
-                    collapse = "_")
+      filter(genus == "Streptococcus") %>%
+      mutate(lancefield = mo_name(mo, Lancefield = TRUE)) %>%
+      filter(lancefield %like% "^Streptococcus group") %>%
+      pull(fullname),
+    collapse = "_"
+    )
  } else if (sheet %like% "PK.*PD") {
    sheet <- "UNKNOWN"
  }
  mo_sheet <- paste0(suppressMessages(as.mo(unlist(strsplit(sheet, "_")))), collapse = "|")
  if (!is.null(mo_uncertainties())) uncertainties <- add_uncertainties(uncertainties, mo_uncertainties())
-  
+
  set_columns_names <- function(x, cols) {
    colnames(x) <- cols[1:length(colnames(x))]
    x
  }
-  
+
  get_mo <- function(x) {
    for (i in seq_len(length(x))) {
      y <- trimws(unlist(strsplit(x[i], "(,|and)")))
@ -104,76 +112,91 @@ read_EUCAST <- function(sheet, file, guideline_name) {
    }
    x
  }
-  
-  MICs_with_trailing_superscript <- c(seq(from = 0.0011, to = 0.0019, by = 0.0001),
-                                      seq(from = 0.031, to = 0.039, by = 0.001),
-                                      seq(from = 0.061, to = 0.069, by = 0.001),
-                                      seq(from = 0.1251, to = 0.1259, by = 0.0001),
-                                      seq(from = 0.251, to = 0.259, by = 0.001),
-                                      seq(from = 0.51, to = 0.59, by = 0.01),
-                                      seq(from = 11, to = 19, by = 1),
-                                      seq(from = 161, to = 169, by = 01),
-                                      seq(from = 21, to = 29, by = 1),
-                                      seq(from = 321, to = 329, by = 1),
-                                      seq(from = 41, to = 49, by = 1),
-                                      seq(from = 81, to = 89, by = 1))
+
+  MICs_with_trailing_superscript <- c(
+    seq(from = 0.0011, to = 0.0019, by = 0.0001),
+    seq(from = 0.031, to = 0.039, by = 0.001),
+    seq(from = 0.061, to = 0.069, by = 0.001),
+    seq(from = 0.1251, to = 0.1259, by = 0.0001),
+    seq(from = 0.251, to = 0.259, by = 0.001),
+    seq(from = 0.51, to = 0.59, by = 0.01),
+    seq(from = 11, to = 19, by = 1),
+    seq(from = 161, to = 169, by = 01),
+    seq(from = 21, to = 29, by = 1),
+    seq(from = 321, to = 329, by = 1),
+    seq(from = 41, to = 49, by = 1),
+    seq(from = 81, to = 89, by = 1)
+  )
  has_superscript <- function(x) {
-    # because due to floating point error, 0.1252 is not in: 
+    # because due to floating point error, 0.1252 is not in:
    # seq(from = 0.1251, to = 0.1259, by = 0.0001)
    sapply(x, function(x) any(near(x, MICs_with_trailing_superscript)))
  }
-  
+
  has_zone_diameters <- rep(any(unlist(raw_data) %like% "zone diameter"), nrow(raw_data))

-  cleaned <- raw_data %>% 
-    as_tibble() %>% 
-    set_columns_names(LETTERS) %>% 
-    transmute(drug = A,
-              MIC_S = B,
-              MIC_R = C,
-              disk_dose = ifelse(has_zone_diameters, E, NA_character_),
-              disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
-              disk_R = ifelse(has_zone_diameters, G, NA_character_)) %>% 
-    filter(!is.na(drug),
-           !(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
-           MIC_S %unlike% "(MIC|S ≤|note)",
-           MIC_S %unlike% "^[-]",
-           drug != MIC_S,) %>% 
-    mutate(administration = case_when(drug %like% "[( ]oral" ~ "oral",
-                                      drug %like% "[( ]iv"   ~ "iv",
-                                      TRUE ~ NA_character_),
-           uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
-           systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
-           mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)) %>% 
+  cleaned <- raw_data %>%
+    as_tibble() %>%
+    set_columns_names(LETTERS) %>%
+    transmute(
+      drug = A,
+      MIC_S = B,
+      MIC_R = C,
+      disk_dose = ifelse(has_zone_diameters, E, NA_character_),
+      disk_S = ifelse(has_zone_diameters, `F`, NA_character_),
+      disk_R = ifelse(has_zone_diameters, G, NA_character_)
+    ) %>%
+    filter(
+      !is.na(drug),
+      !(is.na(MIC_S) & is.na(MIC_R) & is.na(disk_S) & is.na(disk_R)),
+      MIC_S %unlike% "(MIC|S ≤|note)",
+      MIC_S %unlike% "^[-]",
+      drug != MIC_S,
+    ) %>%
+    mutate(
+      administration = case_when(
+        drug %like% "[( ]oral" ~ "oral",
+        drug %like% "[( ]iv" ~ "iv",
+        TRUE ~ NA_character_
+      ),
+      uti = ifelse(drug %like% "(UTI|urinary|urine)", TRUE, FALSE),
+      systemic = ifelse(drug %like% "(systemic|septic)", TRUE, FALSE),
+      mo = ifelse(drug %like% "([.]|spp)", get_mo(drug), mo_sheet)
+    ) %>%
    # clean disk doses
-    mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>% 
+    mutate(disk_dose = clean_character(disk_dose, remove = "[^0-9.-]")) %>%
    # clean MIC and disk values
-    mutate(MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
-           MIC_R = gsub(".,.", "", MIC_R),
-           disk_S = gsub(".,.", "", disk_S),
-           disk_R = gsub(".,.", "", disk_R),
-           MIC_S = clean_double(MIC_S), # make them valid numeric values
-           MIC_R = clean_double(MIC_R),
-           disk_S = clean_integer(disk_S),
-           disk_R = clean_integer(disk_R),
-           # invalid MIC values have a superscript text, delete those
-           MIC_S = ifelse(has_superscript(MIC_S),
-                          substr(MIC_S, 1, nchar(MIC_S) - 1),
-                          MIC_S),
-           MIC_R = ifelse(has_superscript(MIC_R),
-                          substr(MIC_R, 1, nchar(MIC_R) - 1),
-                          MIC_R),
-           # and some are just awful
-           MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
-           MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
-    ) %>% 
+    mutate(
+      MIC_S = gsub(".,.", "", MIC_S), # remove superscript notes with comma, like 0.5^2,3
+      MIC_R = gsub(".,.", "", MIC_R),
+      disk_S = gsub(".,.", "", disk_S),
+      disk_R = gsub(".,.", "", disk_R),
+      MIC_S = clean_double(MIC_S), # make them valid numeric values
+      MIC_R = clean_double(MIC_R),
+      disk_S = clean_integer(disk_S),
+      disk_R = clean_integer(disk_R),
+      # invalid MIC values have a superscript text, delete those
+      MIC_S = ifelse(has_superscript(MIC_S),
+        substr(MIC_S, 1, nchar(MIC_S) - 1),
+        MIC_S
+      ),
+      MIC_R = ifelse(has_superscript(MIC_R),
+        substr(MIC_R, 1, nchar(MIC_R) - 1),
+        MIC_R
+      ),
+      # and some are just awful
+      MIC_S = ifelse(MIC_S == 43.4, 4, MIC_S),
+      MIC_R = ifelse(MIC_R == 43.4, 4, MIC_R),
+    ) %>%
    # clean drug names
-    mutate(drug = gsub(" ?[(, ].*$", "", drug),
-           drug = gsub("[1-9]+$", "", drug),
-           ab = as.ab(drug)) %>% 
-    select(ab, mo, everything(), -drug) %>% 
+    mutate(
+      drug = gsub(" ?[(, ].*$", "", drug),
+      drug = gsub("[1-9]+$", "", drug),
+      ab = as.ab(drug)
+    ) %>%
+    select(ab, mo, everything(), -drug) %>%
    as.data.frame(stringsAsFactors = FALSE)
-  
+
  # new row for every different MO mentioned
  for (i in 1:nrow(cleaned)) {
    mo <- cleaned[i, "mo", drop = TRUE]
@ -181,37 +204,44 @@ read_EUCAST <- function(sheet, file, guideline_name) {
      mo_vect <- unlist(strsplit(mo, "|", fixed = TRUE))
      cleaned[i, "mo"] <- mo_vect[1]
      for (j in seq_len(length(mo_vect))) {
-        cleaned <- bind_rows(cleaned, cleaned[i , , drop = FALSE])
+        cleaned <- bind_rows(cleaned, cleaned[i, , drop = FALSE])
        cleaned[nrow(cleaned), "mo"] <- mo_vect[j]
      }
    }
  }
-  
-  cleaned <- cleaned %>% 
-    distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>% 
-    arrange(ab, mo) %>% 
+
+  cleaned <- cleaned %>%
+    distinct(ab, mo, administration, uti, systemic, .keep_all = TRUE) %>%
+    arrange(ab, mo) %>%
    mutate_at(c("MIC_S", "MIC_R", "disk_S", "disk_R"), as.double) %>%
-    pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>% 
-    mutate(method = ifelse(type %like% "MIC", "MIC", "DISK"), 
-           type = gsub("^.*_", "breakpoint_", type)) %>%
-    pivot_wider(names_from = type, values_from = value) %>% 
-    mutate(guideline = guideline_name,
-           disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
-           mo = ifelse(mo == "", mo_sheet, mo)) %>% 
-    filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>% 
+    pivot_longer(c("MIC_S", "MIC_R", "disk_S", "disk_R"), "type") %>%
+    mutate(
+      method = ifelse(type %like% "MIC", "MIC", "DISK"),
+      type = gsub("^.*_", "breakpoint_", type)
+    ) %>%
+    pivot_wider(names_from = type, values_from = value) %>%
+    mutate(
+      guideline = guideline_name,
+      disk_dose = ifelse(method == "DISK", disk_dose, NA_character_),
+      mo = ifelse(mo == "", mo_sheet, mo)
+    ) %>%
+    filter(!(is.na(breakpoint_S) & is.na(breakpoint_R))) %>%
    # comply with rsi_translation for now
    transmute(guideline,
-              method, 
-              site = case_when(uti ~ "UTI",
-                               systemic ~ "Systemic",
-                               TRUE ~ administration),
-              mo, ab, 
-              ref_tbl = sheet.bak, 
-              disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
-              breakpoint_S, 
-              breakpoint_R) %>% 
+      method,
+      site = case_when(
+        uti ~ "UTI",
+        systemic ~ "Systemic",
+        TRUE ~ administration
+      ),
+      mo, ab,
+      ref_tbl = sheet.bak,
+      disk_dose = ifelse(!is.na(disk_dose), paste0(disk_dose, "ug"), NA_character_),
+      breakpoint_S,
+      breakpoint_R
+    ) %>%
    as.data.frame(stringsAsFactors = FALSE)
-  
+
  if (!is.null(uncertainties)) {
    print(uncertainties %>% distinct(input, mo, .keep_all = TRUE))
  }
@ -231,24 +261,33 @@ guideline_name <- "EUCAST 2021"
 sheets_to_analyse <- sheets[!sheets %in% c("Content", "Changes", "Notes", "Guidance", "Dosages", "Technical uncertainty", "Topical agents")]

 # takes the longest time:
-new_EUCAST <- read_EUCAST(sheet = sheets_to_analyse[1],
-                          file = file, 
-                          guideline_name = guideline_name) 
+new_EUCAST <- read_EUCAST(
+  sheet = sheets_to_analyse[1],
+  file = file,
+  guideline_name = guideline_name
+)
 for (i in 2:length(sheets_to_analyse)) {
  tryCatch(
-    new_EUCAST <<- bind_rows(new_EUCAST,
-                             read_EUCAST(sheet = sheets_to_analyse[i],
-                                         file = file, 
-                                         guideline_name = guideline_name))
-    , error = function(e) message(e$message))
+    new_EUCAST <<- bind_rows(
+      new_EUCAST,
+      read_EUCAST(
+        sheet = sheets_to_analyse[i],
+        file = file,
+        guideline_name = guideline_name
+      )
+    ),
+    error = function(e) message(e$message)
+  )
 }

 # 2021-07-12 fix for Morganellaceae (check other lines too next time)
 morg <- rsi_translation %>%
  as_tibble() %>%
-  filter(ab == "IPM",
-         guideline == "EUCAST 2021",
-         mo == as.mo("Enterobacterales")) %>% 
+  filter(
+    ab == "IPM",
+    guideline == "EUCAST 2021",
+    mo == as.mo("Enterobacterales")
+  ) %>%
  mutate(mo = as.mo("Morganellaceae"))
 morg[which(morg$method == "MIC"), "breakpoint_S"] <- 0.001
 morg[which(morg$method == "MIC"), "breakpoint_R"] <- 4
@ -258,5 +297,5 @@ morg[which(morg$method == "DISK"), "breakpoint_R"] <- 19
 rsi_translation <- rsi_translation %>%
  bind_rows(morg) %>%
  bind_rows(morg %>%
-              mutate(guideline = "EUCAST 2020")) %>%
+    mutate(guideline = "EUCAST 2020")) %>%
  arrange(desc(guideline), ab, mo, method)
--- a/data-raw/reproduction_of_antibiotics.R
+++ b/data-raw/reproduction_of_antibiotics.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -33,34 +33,43 @@ library(dplyr)
 library(readxl)
 DRGLST <- read_excel("DRGLST.xlsx")
 abx <- DRGLST %>%
-  select(ab = WHON5_CODE,
-         name = ANTIBIOTIC) %>%
+  select(
+    ab = WHON5_CODE,
+    name = ANTIBIOTIC
+  ) %>%
  # remove the ones without WHONET code
  filter(!is.na(ab)) %>%
  distinct(name, .keep_all = TRUE) %>%
  # add the ones without WHONET code
  bind_rows(
    DRGLST %>%
-      select(ab = WHON5_CODE,
-             name = ANTIBIOTIC) %>%
+      select(
+        ab = WHON5_CODE,
+        name = ANTIBIOTIC
+      ) %>%
      filter(is.na(ab)) %>%
      distinct(name, .keep_all = TRUE)
-      # add new ab code later
+    # add new ab code later
  ) %>%
  arrange(name)

 # add old ATC codes
 ab_old <- AMR::antibiotics %>%
-  mutate(official = gsub("( and |, )", "/", official),
-         abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
-                      ifelse(is.na(certe), "", certe),
-                      ifelse(is.na(umcg), "", umcg),
-                      sep = "|")))
+  mutate(
+    official = gsub("( and |, )", "/", official),
+    abbr = tolower(paste(ifelse(is.na(abbr), "", abbr),
+      ifelse(is.na(certe), "", certe),
+      ifelse(is.na(umcg), "", umcg),
+      sep = "|"
+    ))
+  )
 for (i in 1:nrow(ab_old)) {
  abbr <- ab_old[i, "abbr"]
-  abbr <- strsplit(abbr, "|", fixed = TRUE) %>% unlist() %>% unique()
+  abbr <- strsplit(abbr, "|", fixed = TRUE) %>%
+    unlist() %>%
+    unique()
  abbr <- abbr[abbr != ""]
-  #print(abbr)
+  # print(abbr)
  if (length(abbr) == 0) {
    ab_old[i, "abbr"] <- NA_character_
  } else {
@ -72,50 +81,54 @@ for (i in 1:nrow(ab_old)) {
 abx_atc1 <- abx %>%
  mutate(name_lower = tolower(name)) %>%
  left_join(ab_old %>%
-              select(ears_net, atc), by = c(ab = "ears_net")) %>%
+    select(ears_net, atc), by = c(ab = "ears_net")) %>%
  rename(atc1 = atc) %>%
  left_join(ab_old %>%
-              mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
-              transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
+    mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
+    transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
  rename(atc2 = atc) %>%
  left_join(ab_old %>%
-              mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
-              mutate(official = gsub("f", "ph", official)) %>%
-              transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
+    mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
+    mutate(official = gsub("f", "ph", official)) %>%
+    transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
  rename(atc3 = atc) %>%
  left_join(ab_old %>%
-              mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
-              mutate(official = gsub("t", "th", official)) %>%
-              transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
+    mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
+    mutate(official = gsub("t", "th", official)) %>%
+    transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
  rename(atc4 = atc) %>%
  left_join(ab_old %>%
-              mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
-              mutate(official = gsub("f", "ph", official)) %>%
-              mutate(official = gsub("t", "th", official)) %>%
-              transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
+    mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
+    mutate(official = gsub("f", "ph", official)) %>%
+    mutate(official = gsub("t", "th", official)) %>%
+    transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
  rename(atc5 = atc) %>%
  left_join(ab_old %>%
-              mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
-              mutate(official = gsub("f", "ph", official)) %>%
-              mutate(official = gsub("t", "th", official)) %>%
-              mutate(official = gsub("ine$", "in", official)) %>%
-              transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
+    mutate(official = gsub(", combinations", "", official, fixed = TRUE)) %>%
+    mutate(official = gsub("f", "ph", official)) %>%
+    mutate(official = gsub("t", "th", official)) %>%
+    mutate(official = gsub("ine$", "in", official)) %>%
+    transmute(official = tolower(official), atc), by = c(name_lower = "official")) %>%
  rename(atc6 = atc) %>%
-  mutate(atc = case_when(!is.na(atc1) ~ atc1,
-                         !is.na(atc2) ~ atc2,
-                         !is.na(atc3) ~ atc3,
-                         !is.na(atc4) ~ atc4,
-                         !is.na(atc4) ~ atc5,
-                         TRUE ~ atc6)) %>%
+  mutate(atc = case_when(
+    !is.na(atc1) ~ atc1,
+    !is.na(atc2) ~ atc2,
+    !is.na(atc3) ~ atc3,
+    !is.na(atc4) ~ atc4,
+    !is.na(atc4) ~ atc5,
+    TRUE ~ atc6
+  )) %>%
  distinct(ab, name, .keep_all = TRUE) %>%
  select(ab, atc, name)

 abx_atc2 <- ab_old %>%
-  filter(!atc %in% abx_atc1$atc,
-         is.na(ears_net),
-         !is.na(atc_group1),
-         atc_group1 %unlike% ("virus|vaccin|viral|immun"),
-         official %unlike% "(combinations| with )") %>%
+  filter(
+    !atc %in% abx_atc1$atc,
+    is.na(ears_net),
+    !is.na(atc_group1),
+    atc_group1 %unlike% ("virus|vaccin|viral|immun"),
+    official %unlike% "(combinations| with )"
+  ) %>%
  mutate(ab = NA_character_) %>%
  as.data.frame(stringsAsFactors = FALSE) %>%
  select(ab, atc, name = official)
@ -125,12 +138,15 @@ abx2 <- bind_rows(abx_atc1, abx_atc2)
 rm(abx_atc1)
 rm(abx_atc2)

-abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub("[/0-9-]",
-                                                   " ",
-                                                   abx2$name[is.na(abx2$ab)]),
-                                              minlength = 3,
-                                              method = "left.kept",
-                                              strict = TRUE))
+abx2$ab[is.na(abx2$ab)] <- toupper(abbreviate(gsub(
+  "[/0-9-]",
+  " ",
+  abx2$name[is.na(abx2$ab)]
+),
+minlength = 3,
+method = "left.kept",
+strict = TRUE
+))

 n_distinct(abx2$ab)

@ -150,7 +166,9 @@ for (i in 2:nrow(abx2)) {
    abx2[i, "ab"] <- paste0(abx2[i, "ab", drop = TRUE], abx2[i, "seqnr", drop = TRUE])
  }
 }
-abx2 <- abx2 %>% select(-seqnr) %>% arrange(name)
+abx2 <- abx2 %>%
+  select(-seqnr) %>%
+  arrange(name)

 # everything unique??
 nrow(abx2) == n_distinct(abx2$ab)
@ -158,8 +176,10 @@ nrow(abx2) == n_distinct(abx2$ab)
 # get ATC properties
 abx2 <- abx2 %>%
  left_join(ab_old %>%
-              select(atc, abbr, atc_group1, atc_group2,
-                     oral_ddd, oral_units, iv_ddd, iv_units))
+    select(
+      atc, abbr, atc_group1, atc_group2,
+      oral_ddd, oral_units, iv_ddd, iv_units
+    ))

 abx2$abbr <- lapply(as.list(abx2$abbr), function(x) unlist(strsplit(x, "|", fixed = TRUE)))

@ -171,29 +191,41 @@ get_CID <- function(ab) {
    p$tick()$print()

    CID[i] <- tryCatch(
-      data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
-                               URLencode(ab[i], reserved = TRUE),
-                               "/cids/TXT?name_type=complete"),
-                        showProgress = FALSE)[[1]][1],
-      error = function(e) NA_integer_)
+      data.table::fread(paste0(
+        "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
+        URLencode(ab[i], reserved = TRUE),
+        "/cids/TXT?name_type=complete"
+      ),
+      showProgress = FALSE
+      )[[1]][1],
+      error = function(e) NA_integer_
+    )
    if (is.na(CID[i])) {
      # try with removing the text in brackets
      CID[i] <- tryCatch(
-        data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
-                                 URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
-                                 "/cids/TXT?name_type=complete"),
-                          showProgress = FALSE)[[1]][1],
-        error = function(e) NA_integer_)
+        data.table::fread(paste0(
+          "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
+          URLencode(trimws(gsub("[(].*[)]", "", ab[i])), reserved = TRUE),
+          "/cids/TXT?name_type=complete"
+        ),
+        showProgress = FALSE
+        )[[1]][1],
+        error = function(e) NA_integer_
+      )
    }
    if (is.na(CID[i])) {
      # try match on word and take the lowest CID value (sorted)
      ab[i] <- gsub("[^a-z0-9]+", " ", ab[i], ignore.case = TRUE)
      CID[i] <- tryCatch(
-        data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
-                                 URLencode(ab[i], reserved = TRUE),
-                                 "/cids/TXT?name_type=word"),
-                          showProgress = FALSE)[[1]][1],
-        error = function(e) NA_integer_)
+        data.table::fread(paste0(
+          "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/",
+          URLencode(ab[i], reserved = TRUE),
+          "/cids/TXT?name_type=word"
+        ),
+        showProgress = FALSE
+        )[[1]][1],
+        error = function(e) NA_integer_
+      )
    }
    Sys.sleep(0.1)
  }
@ -203,15 +235,15 @@ get_CID <- function(ab) {
 # get CIDs (2-3 min)
 CIDs <- get_CID(abx2$name)
 # These could not be found:
-abx2[is.na(CIDs),] %>% View()
+abx2[is.na(CIDs), ] %>% View()

 # returns list with synonyms (brand names), with CIDs as names
 get_synonyms <- function(CID, clean = TRUE) {
  synonyms <- rep(NA_character_, length(CID))
-  #p <- progress_ticker(n = length(CID), min_time = 0)
+  # p <- progress_ticker(n = length(CID), min_time = 0)

  for (i in 1:length(CID)) {
-    #p$tick()$print()
+    # p$tick()$print()

    synonyms_txt <- ""

@ -220,27 +252,37 @@ get_synonyms <- function(CID, clean = TRUE) {
    }

    synonyms_txt <- tryCatch(
-      data.table::fread(paste0("https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
-                               CID[i],
-                               "/synonyms/TXT"),
-                        sep = "\n",
-                        showProgress = FALSE)[[1]],
-      error = function(e) NA_character_)
+      data.table::fread(paste0(
+        "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/fastidentity/cid/",
+        CID[i],
+        "/synonyms/TXT"
+      ),
+      sep = "\n",
+      showProgress = FALSE
+      )[[1]],
+      error = function(e) NA_character_
+    )

    Sys.sleep(0.1)

    if (clean == TRUE) {
      # remove text between brackets
-      synonyms_txt <- trimws(gsub("[(].*[)]", "",
-                                  gsub("[[].*[]]", "",
-                                       gsub("[(].*[]]", "",
-                                            gsub("[[].*[)]", "", synonyms_txt)))))
+      synonyms_txt <- trimws(gsub(
+        "[(].*[)]", "",
+        gsub(
+          "[[].*[]]", "",
+          gsub(
+            "[(].*[]]", "",
+            gsub("[[].*[)]", "", synonyms_txt)
+          )
+        )
+      ))
      synonyms_txt <- gsub("Co-", "Co", synonyms_txt, fixed = TRUE)
      # only length 6 to 20 and no txt with reading marks or numbers and must start with capital letter (= brand)
-      synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20)
-                                   & !grepl("[-&{},_0-9/]", synonyms_txt)
-                                   & grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
-      synonyms_txt <- unlist(strsplit(synonyms_txt,  ";", fixed = TRUE))
+      synonyms_txt <- synonyms_txt[nchar(synonyms_txt) %in% c(6:20) &
+        !grepl("[-&{},_0-9/]", synonyms_txt) &
+        grepl("^[A-Z]", synonyms_txt, ignore.case = FALSE)]
+      synonyms_txt <- unlist(strsplit(synonyms_txt, ";", fixed = TRUE))
    }
    synonyms_txt <- unique(trimws(synonyms_txt[tolower(synonyms_txt) %in% unique(tolower(synonyms_txt))]))
    synonyms[i] <- list(sort(synonyms_txt))
@ -251,52 +293,56 @@ get_synonyms <- function(CID, clean = TRUE) {

 # get brand names from PubChem (2-3 min)
 synonyms <- get_synonyms(CIDs)
-synonyms <- lapply(synonyms,
-                   function(x) {
-                     if (length(x) == 0 | all(is.na(x))) {
-                       ""
-                     } else {
-                       x
-                     }})
+synonyms <- lapply(
+  synonyms,
+  function(x) {
+    if (length(x) == 0 | all(is.na(x))) {
+      ""
+    } else {
+      x
+    }
+  }
+)

 # add them to data set
 antibiotics <- abx2 %>%
  left_join(DRGLST %>%
-              select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
-              distinct(ab, .keep_all = TRUE), by = "ab") %>%
+    select(ab = WHON5_CODE, CLASS, SUBCLASS) %>%
+    distinct(ab, .keep_all = TRUE), by = "ab") %>%
  transmute(ab,
-            atc,
-            cid = CIDs,
-            # no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
-            name = name %>%
-              gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
-              gsub("edta", "EDTA", ., ignore.case = TRUE),
-            group = case_when(
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
-              name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
-              paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
-              TRUE ~ "Other antibacterials"
-            ),
-            atc_group1, atc_group2,
-            abbreviations = unname(abbr),
-            synonyms = unname(synonyms),
-            oral_ddd, oral_units,
-            iv_ddd, iv_units) %>%
+    atc,
+    cid = CIDs,
+    # no capital after a slash: Ampicillin/Sulbactam -> Ampicillin/sulbactam
+    name = name %>%
+      gsub("([/-])([A-Z])", "\\1\\L\\2", ., perl = TRUE) %>%
+      gsub("edta", "EDTA", ., ignore.case = TRUE),
+    group = case_when(
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "am(ph|f)enicol" ~ "Amphenicols",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "aminoglycoside" ~ "Aminoglycosides",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "carbapenem" | name %like% "(imipenem|meropenem)" ~ "Carbapenems",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "First-generation cephalosporin" ~ "Cephalosporins (1st gen.)",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Second-generation cephalosporin" ~ "Cephalosporins (2nd gen.)",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Third-generation cephalosporin" ~ "Cephalosporins (3rd gen.)",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "Fourth-generation cephalosporin" ~ "Cephalosporins (4th gen.)",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(tuberculosis|mycobacter)" ~ "Antimycobacterials",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "cephalosporin" ~ "Cephalosporins",
+      name %like% "^Ce" & is.na(atc_group1) & paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "beta-?lactam" ~ "Cephalosporins",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(beta-?lactam|penicillin)" ~ "Beta-lactams/penicillins",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "quinolone" ~ "Quinolones",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "glycopeptide" ~ "Glycopeptides",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "macrolide" ~ "Macrolides/lincosamides",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "tetracycline" ~ "Tetracyclines",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "trimethoprim" ~ "Trimethoprims",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "polymyxin" ~ "Polymyxins",
+      paste(atc_group1, atc_group2, CLASS, SUBCLASS) %like% "(fungal|mycot)" ~ "Antifungals/antimycotics",
+      TRUE ~ "Other antibacterials"
+    ),
+    atc_group1, atc_group2,
+    abbreviations = unname(abbr),
+    synonyms = unname(synonyms),
+    oral_ddd, oral_units,
+    iv_ddd, iv_units
+  ) %>%
  as.data.frame(stringsAsFactors = FALSE)

 # some exceptions
@ -329,13 +375,15 @@ antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <-
 antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
 # Add cefoxitin screening
 class(antibiotics$ab) <- "character"
-antibiotics <- rbind(antibiotics,data.frame(ab = "FOX1", atc = NA, cid = NA, 
-                                            name = "Cefoxitin screening",
-                                            group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA, 
-                                            abbreviations = "cfsc", synonyms = NA, 
-                                            oral_ddd = NA, oral_units  = NA, iv_ddd  = NA, iv_units = NA,
-                                            loinc = NA,
-                                            stringsAsFactors = FALSE))
+antibiotics <- rbind(antibiotics, data.frame(
+  ab = "FOX1", atc = NA, cid = NA,
+  name = "Cefoxitin screening",
+  group = "Cephalosporins (2nd gen.)", atc_group1 = NA, atc_group2 = NA,
+  abbreviations = "cfsc", synonyms = NA,
+  oral_ddd = NA, oral_units = NA, iv_ddd = NA, iv_units = NA,
+  loinc = NA,
+  stringsAsFactors = FALSE
+))
 # More GLIMS codes
 antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "AMB"), "abbreviations"][[1]], "amf"))
 antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
@ -520,27 +568,33 @@ antibiotics[which(antibiotics$ab == "RFP"), "abbreviations"][[1]] <- list(sort(c
 antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "RTP"), "abbreviations"][[1]], "RET")))
 antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]] <- list(sort(c(antibiotics[which(antibiotics$ab == "TYL1"), "abbreviations"][[1]], "TVN")))

-antibiotics <- antibiotics %>% 
-  mutate(ab = as.character(ab)) %>% 
-  rbind(antibiotics %>% 
-          filter(ab == "GEH") %>% 
-          mutate(ab = "AMH",
-                 name = "Amphotericin B-high",
-                 abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high")))) %>% 
-  rbind(antibiotics %>% 
-          filter(ab == "GEH") %>% 
-          mutate(ab = "TOH",
-                 name = "Tobramycin-high",
-                 abbreviations = list(c("tohl", "tobra high", "tobramycin high")))) %>% 
-  rbind(antibiotics %>% 
-          filter(ab == "BUT") %>% 
-          mutate(ab = "CIX",
-                 atc = "D01AE14",
-                 name = "Ciclopirox",
-                 group = "Antifungals/antimycotics",
-                 atc_group1 = "Antifungals for topical use",
-                 atc_group2 = "Other antifungals for topical use",
-                 abbreviations = list(c("cipx"))))
+antibiotics <- antibiotics %>%
+  mutate(ab = as.character(ab)) %>%
+  rbind(antibiotics %>%
+    filter(ab == "GEH") %>%
+    mutate(
+      ab = "AMH",
+      name = "Amphotericin B-high",
+      abbreviations = list(c("amhl", "amfo b high", "ampho b high", "amphotericin high"))
+    )) %>%
+  rbind(antibiotics %>%
+    filter(ab == "GEH") %>%
+    mutate(
+      ab = "TOH",
+      name = "Tobramycin-high",
+      abbreviations = list(c("tohl", "tobra high", "tobramycin high"))
+    )) %>%
+  rbind(antibiotics %>%
+    filter(ab == "BUT") %>%
+    mutate(
+      ab = "CIX",
+      atc = "D01AE14",
+      name = "Ciclopirox",
+      group = "Antifungals/antimycotics",
+      atc_group1 = "Antifungals for topical use",
+      atc_group2 = "Other antifungals for topical use",
+      abbreviations = list(c("cipx"))
+    ))
 antibiotics[which(antibiotics$ab == "SSS"), "name"] <- "Sulfonamide"
 # ESBL E-test codes:
 antibiotics[which(antibiotics$ab == "CCV"), "abbreviations"][[1]] <- list(c("xtzl"))
@ -600,13 +654,13 @@ antibiotics[which(antibiotics$ab == "RXT"), "name"] <- "Roxithromycin"
 antibiotics[which(antibiotics$ab == "PEN"), "atc"] <- "J01CE01"

 # WHONET cleanup
-antibiotics[which(antibiotics$ab == "BCZ"), "name"] <-  "Bicyclomycin"
-antibiotics[which(antibiotics$ab == "CCL"), "name"] <-  "Cefetecol"
-antibiotics[which(antibiotics$ab == "ENV"), "name"] <-  "Enviomycin"
-antibiotics[which(antibiotics$ab == "KIT"), "name"] <-  "Kitasamycin"
-antibiotics[which(antibiotics$ab == "LSP"), "name"] <-  "Linco-spectin"
-antibiotics[which(antibiotics$ab == "MEC"), "name"] <-  "Mecillinam"
-antibiotics[which(antibiotics$ab == "PMR"), "name"] <-  "Pimaricin"
+antibiotics[which(antibiotics$ab == "BCZ"), "name"] <- "Bicyclomycin"
+antibiotics[which(antibiotics$ab == "CCL"), "name"] <- "Cefetecol"
+antibiotics[which(antibiotics$ab == "ENV"), "name"] <- "Enviomycin"
+antibiotics[which(antibiotics$ab == "KIT"), "name"] <- "Kitasamycin"
+antibiotics[which(antibiotics$ab == "LSP"), "name"] <- "Linco-spectin"
+antibiotics[which(antibiotics$ab == "MEC"), "name"] <- "Mecillinam"
+antibiotics[which(antibiotics$ab == "PMR"), "name"] <- "Pimaricin"
 antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "BCZ"), "abbreviations"][[1]], "Bicozamycin"))))
 antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "CCL"), "abbreviations"][[1]], "Cefcatacol"))))
 antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]] <- list(sort(unique(c(antibiotics[which(antibiotics$ab == "ENV"), "abbreviations"][[1]], "Tuberactinomycin"))))
@ -617,7 +671,7 @@ antibiotics[which(antibiotics$ab == "PMR"), "abbreviations"][[1]] <- list(sort(u


 # set cephalosporins groups for the ones that could not be determined automatically:
-antibiotics <- antibiotics %>% 
+antibiotics <- antibiotics %>%
  mutate(group = case_when(
    name == "Cefcapene" ~ "Cephalosporins (3rd gen.)",
    name == "Cefcapene pivoxil" ~ "Cephalosporins (3rd gen.)",
@ -650,21 +704,24 @@ antibiotics <- antibiotics %>%
    name == "Ceftolozane/enzyme inhibitor" ~ "Cephalosporins (5th gen.)",
    name == "Ceftolozane/tazobactam" ~ "Cephalosporins (5th gen.)",
    name == "Cefuroxime axetil" ~ "Cephalosporins (2nd gen.)",
-    TRUE ~ group))
+    TRUE ~ group
+  ))
 antibiotics[which(antibiotics$ab %in% c("CYC", "LNZ", "THA", "TZD")), "group"] <- "Oxazolidinones"

 # add pretomanid
 antibiotics <- antibiotics %>%
-  mutate(ab = as.character(ab)) %>% 
+  mutate(ab = as.character(ab)) %>%
  bind_rows(antibiotics %>%
-              mutate(ab = as.character(ab)) %>% 
-              filter(ab == "SMF") %>%
-              mutate(ab = "PMD",
-                     atc = "J04AK08",
-                     cid = 456199, 
-                     name = "Pretomanid", 
-                     abbreviations = list(""),
-                     oral_ddd = NA_real_))
+    mutate(ab = as.character(ab)) %>%
+    filter(ab == "SMF") %>%
+    mutate(
+      ab = "PMD",
+      atc = "J04AK08",
+      cid = 456199,
+      name = "Pretomanid",
+      abbreviations = list(""),
+      oral_ddd = NA_real_
+    ))



@ -675,25 +732,24 @@ antibiotics <- antibiotics %>%
 updated_atc <- as.list(antibiotics$atc)

 get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {
-  
  ab_name <- gsub("/", " and ", tolower(ab_name), fixed = TRUE)
-  
+
  # we will do a search on their website, which means:
-  
+
  # go to the url
-  atc_tbl <- read_html(url) %>% 
+  atc_tbl <- read_html(url) %>%
    # get all forms
    html_form() %>%
    # get the second form (the first form is a global website form)
-    .[[2]] %>% 
+    .[[2]] %>%
    # set the name input box to our search parameter
-    html_form_set(name = ab_name) %>% 
+    html_form_set(name = ab_name) %>%
    # hit Submit
-    html_form_submit() %>% 
+    html_form_submit() %>%
    # read the resulting page
-    read_html() %>% 
+    read_html() %>%
    # retrieve the table on it
-    html_node("table") %>% 
+    html_node("table") %>%
    # transform it to an R data set
    html_table(header = FALSE)
  # and get the ATCs (first column) of only exact hits
@ -702,9 +758,10 @@ get_atcs <- function(ab_name, url = "https://www.whocc.no/atc_ddd_index/") {

 # this takes around 4 minutes (some are skipped and go faster)
 for (i in seq_len(nrow(antibiotics))) {
-  message(percentage(i / nrow(antibiotics), digits = 1), 
-          " - Downloading ", antibiotics$name[i],
-          appendLF = FALSE)
+  message(percentage(i / nrow(antibiotics), digits = 1),
+    " - Downloading ", antibiotics$name[i],
+    appendLF = FALSE
+  )
  atcs <- get_atcs(antibiotics$name[i])
  if (length(atcs) > 0) {
    updated_atc[[i]] <- atcs
--- a/data-raw/reproduction_of_antivirals.R
+++ b/data-raw/reproduction_of_antivirals.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -28,8 +28,10 @@
 get_atc_table <- function(atc_group) {
  # give as input J0XXX, like atc_group = "J05AB"
  downloaded <- read_html(paste0("https://www.whocc.no/atc_ddd_index/?code=", atc_group, "&showdescription=no"))
-  table_title <- downloaded %>% html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>% html_text()
-  table_content <- downloaded %>% 
+  table_title <- downloaded %>%
+    html_nodes(paste0('a[href="./?code=', atc_group, '"]')) %>%
+    html_text()
+  table_content <- downloaded %>%
    html_nodes("table") %>%
    html_table(header = TRUE) %>%
    # returns list, so make data.frame out of it
@ -37,8 +39,8 @@ get_atc_table <- function(atc_group) {
    # select right columns
    select(atc = ATC.code, name = Name, ddd = DDD, unit = U, ddd_type = Adm.R) %>%
    # fill empty rows
-    mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>% 
-    pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>% 
+    mutate(atc = ifelse(atc == "", lag(atc), atc), name = ifelse(name == "", lag(name), name)) %>%
+    pivot_wider(names_from = ddd_type, values_from = c(ddd, unit)) %>%
    mutate(atc_group = table_title)
  if (!"ddd_O" %in% colnames(table_content)) {
    table_content <- table_content %>% mutate(ddd_O = NA_real_, unit_O = NA_character_)
@ -46,9 +48,10 @@ get_atc_table <- function(atc_group) {
  if (!"ddd_P" %in% colnames(table_content)) {
    table_content <- table_content %>% mutate(ddd_P = NA_real_, unit_P = NA_character_)
  }
-  table_content %>% select(atc, name, atc_group, 
-                           oral_ddd = ddd_O, oral_units = unit_O,
-                           iv_ddd = ddd_P, iv_units = unit_P)
+  table_content %>% select(atc, name, atc_group,
+    oral_ddd = ddd_O, oral_units = unit_O,
+    iv_ddd = ddd_P, iv_units = unit_P
+  )
 }

 # these are the relevant groups for input: https://www.whocc.no/atc_ddd_index/?code=J05A (J05 only contains J05A)
@ -62,32 +65,38 @@ for (i in 2:length(atc_groups)) {
 }

 # arrange on name, untibble it
-antivirals <- antivirals %>% arrange(name) %>% as.data.frame(stringsAsFactors = FALSE)
+antivirals <- antivirals %>%
+  arrange(name) %>%
+  as.data.frame(stringsAsFactors = FALSE)

 # add PubChem Compound ID (cid) and their trade names - functions are in file to create `antibiotics` data set
 CIDs <- get_CID(antivirals$name)
 # these could not be found:
-antivirals[is.na(CIDs),] %>% View()
-# get brand names from PubChem 
+antivirals[is.na(CIDs), ] %>% View()
+# get brand names from PubChem
 synonyms <- get_synonyms(CIDs)
-synonyms <- lapply(synonyms,
-                   function(x) {
-                     if (length(x) == 0 | all(is.na(x))) {
-                       ""
-                     } else {
-                       x
-                     }})
+synonyms <- lapply(
+  synonyms,
+  function(x) {
+    if (length(x) == 0 | all(is.na(x))) {
+      ""
+    } else {
+      x
+    }
+  }
+)

 antivirals <- antivirals %>%
  transmute(atc,
-            cid = CIDs,
-            name,
-            atc_group,
-            synonyms = unname(synonyms),
-            oral_ddd,
-            oral_units,
-            iv_ddd,
-            iv_units)
+    cid = CIDs,
+    name,
+    atc_group,
+    synonyms = unname(synonyms),
+    oral_ddd,
+    oral_units,
+    iv_ddd,
+    iv_units
+  )

 # save it
 usethis::use_data(antivirals, overwrite = TRUE)
--- a/data-raw/reproduction_of_dosage.R
+++ b/data-raw/reproduction_of_dosage.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -32,37 +32,45 @@ library(cleaner)
 # download the PDF file, open in Acrobat Pro and export as Excel workbook
 breakpoints_version <- 11

-dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>% 
-  format_names(snake_case = TRUE, penicillins = "drug") %>% 
-  filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>% 
-  filter(!is.na(standard_dosage)) %>% 
+dosage_source <- read_excel("data-raw/Dosages_v_11.0_Breakpoint_Tables.xlsx", skip = 5, na = "None") %>%
+  format_names(snake_case = TRUE, penicillins = "drug") %>%
+  filter(!tolower(standard_dosage) %in% c("standard dosage_source", "under review")) %>%
+  filter(!is.na(standard_dosage)) %>%
  # keep only one drug in the table
-  arrange(desc(drug)) %>% 
-  mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>% 
-  #distinct(drug, .keep_all = TRUE) %>% 
-  arrange(drug) %>% 
-  mutate(ab = as.ab(drug),
-         ab_name = ab_name(ab, language = NULL))
+  arrange(desc(drug)) %>%
+  mutate(drug = gsub("(.*) ([(]|iv|oral).*", "\\1", drug)) %>%
+  # distinct(drug, .keep_all = TRUE) %>%
+  arrange(drug) %>%
+  mutate(
+    ab = as.ab(drug),
+    ab_name = ab_name(ab, language = NULL)
+  )

 dosage_source <- bind_rows(
  # oral
-  dosage_source %>% 
+  dosage_source %>%
    filter(standard_dosage %like% " oral") %>%
-    mutate(standard_dosage = gsub("oral.*", "oral", standard_dosage),
-           high_dosage = if_else(high_dosage %like% "oral", 
-                                 gsub("oral.*", "oral", high_dosage),
-                                 NA_character_)),
+    mutate(
+      standard_dosage = gsub("oral.*", "oral", standard_dosage),
+      high_dosage = if_else(high_dosage %like% "oral",
+        gsub("oral.*", "oral", high_dosage),
+        NA_character_
+      )
+    ),
  # iv
-  dosage_source %>% 
+  dosage_source %>%
    filter(standard_dosage %like% " iv") %>%
-    mutate(standard_dosage = gsub(".* or ", "", standard_dosage),
-           high_dosage = if_else(high_dosage %like% "( or | iv)", 
-                                 gsub(".* or ", "", high_dosage),
-                                 NA_character_)),
+    mutate(
+      standard_dosage = gsub(".* or ", "", standard_dosage),
+      high_dosage = if_else(high_dosage %like% "( or | iv)",
+        gsub(".* or ", "", high_dosage),
+        NA_character_
+      )
+    ),
  # im
-  dosage_source %>% 
+  dosage_source %>%
    filter(standard_dosage %like% " im")
-) %>% 
+) %>%
  arrange(drug)


@ -71,34 +79,36 @@ get_dosage_lst <- function(col_data) {
    # remove new lines
    gsub(" ?(\n|\t)+ ?", " ", .) %>%
    # keep only the first suggestion, replace all after 'or' and more informative texts
-    gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>% 
+    gsub("(.*?) (or|with|loading|depending|over|by) .*", "\\1", .) %>%
    # remove (1 MU)
-    gsub(" [(][0-9] [A-Z]+[)]", "", .) %>% 
+    gsub(" [(][0-9] [A-Z]+[)]", "", .) %>%
    # remove parentheses
-    gsub("[)(]", "", .) %>% 
+    gsub("[)(]", "", .) %>%
    # remove drug names
-    gsub(" [a-z]{5,99}( |$)", " ", .) %>% 
-    gsub(" [a-z]{5,99}( |$)", " ", .) %>% 
-    gsub(" (acid|dose)", "", .)# %>% 
-    # keep lowest value only (25-30 mg -> 25 mg)
-   # gsub("[-].*? ", " ", .)
-  
-  dosage_lst <- lapply(strsplit(standard, " x "), 
-                         function(x) {
-                           dose <- x[1]
-                           if (dose %like% "under") {
-                             dose <- NA_character_
-                           }
-                           admin <- x[2]
-                           
-                           list(
-                             dose = trimws(dose),
-                             dose_times = gsub("^([0-9.]+).*", "\\1", admin),
-                             administration = clean_character(admin),
-                             notes = "",
-                             original_txt = ""
-                           )
-                         })
+    gsub(" [a-z]{5,99}( |$)", " ", .) %>%
+    gsub(" [a-z]{5,99}( |$)", " ", .) %>%
+    gsub(" (acid|dose)", "", .) # %>%
+  # keep lowest value only (25-30 mg -> 25 mg)
+  # gsub("[-].*? ", " ", .)
+
+  dosage_lst <- lapply(
+    strsplit(standard, " x "),
+    function(x) {
+      dose <- x[1]
+      if (dose %like% "under") {
+        dose <- NA_character_
+      }
+      admin <- x[2]
+
+      list(
+        dose = trimws(dose),
+        dose_times = gsub("^([0-9.]+).*", "\\1", admin),
+        administration = clean_character(admin),
+        notes = "",
+        original_txt = ""
+      )
+    }
+  )
  for (i in seq_len(length(col_data))) {
    dosage_lst[[i]]$original_txt <- gsub("\n", " ", col_data[i])
    if (col_data[i] %like% " (or|with|loading|depending|over) ") {
@ -147,12 +157,15 @@ dosage <- bind_rows(
    notes = sapply(uti, function(x) x$notes),
    original_txt = sapply(uti, function(x) x$original_txt),
    stringsAsFactors = FALSE
-  )) %>% 
-  mutate(eucast_version = breakpoints_version,
-         dose_times = as.integer(dose_times),
-         administration = gsub("([a-z]+) .*", "\\1", administration)) %>% 
-  arrange(name, administration, type) %>% 
-  filter(!is.na(dose), dose != ".") %>% 
+  )
+) %>%
+  mutate(
+    eucast_version = breakpoints_version,
+    dose_times = as.integer(dose_times),
+    administration = gsub("([a-z]+) .*", "\\1", administration)
+  ) %>%
+  arrange(name, administration, type) %>%
+  filter(!is.na(dose), dose != ".") %>%
  as.data.frame(stringsAsFactors = FALSE)
 rownames(dosage) <- NULL

--- a/data-raw/reproduction_of_example_isolates_unclean.R
+++ b/data-raw/reproduction_of_example_isolates_unclean.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -25,74 +25,118 @@

 patients <- unlist(lapply(LETTERS, paste0, 1:10))

-patients_table <- data.frame(patient_id = patients,
-                             gender = c(rep("M", 135),
-                                        rep("F", 125)))
+patients_table <- data.frame(
+  patient_id = patients,
+  gender = c(
+    rep("M", 135),
+    rep("F", 125)
+  )
+)

 dates <- seq(as.Date("2011-01-01"), as.Date("2020-01-01"), by = "day")

-bacteria_a <- c("E. coli", "S. aureus",
-                "S. pneumoniae", "K. pneumoniae")
+bacteria_a <- c(
+  "E. coli", "S. aureus",
+  "S. pneumoniae", "K. pneumoniae"
+)

 bacteria_b <- c("esccol", "staaur", "strpne", "klepne")

-bacteria_c <- c("Escherichia coli", "Staphylococcus aureus",
-                "Streptococcus pneumoniae", "Klebsiella pneumoniae")
+bacteria_c <- c(
+  "Escherichia coli", "Staphylococcus aureus",
+  "Streptococcus pneumoniae", "Klebsiella pneumoniae"
+)

 ab_interpretations <- c("S", "I", "R")

-ab_interpretations_messy = c("R", "< 0.5 S", "I")
+ab_interpretations_messy <- c("R", "< 0.5 S", "I")

 sample_size <- 1000

-data_a <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
-                     hospital = "A",
-                     bacteria = sample(bacteria_a, size = sample_size, replace = TRUE,
-                                       prob = c(0.50, 0.25, 0.15, 0.10)),
-                     AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.60, 0.05, 0.35)),
-                     AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.75, 0.10, 0.15)),
-                     CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.80, 0.00, 0.20)),
-                     GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.92, 0.00, 0.08)))
+data_a <- data.frame(
+  date = sample(dates, size = sample_size, replace = TRUE),
+  hospital = "A",
+  bacteria = sample(bacteria_a,
+    size = sample_size, replace = TRUE,
+    prob = c(0.50, 0.25, 0.15, 0.10)
+  ),
+  AMX = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.60, 0.05, 0.35)
+  ),
+  AMC = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.75, 0.10, 0.15)
+  ),
+  CIP = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.80, 0.00, 0.20)
+  ),
+  GEN = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.92, 0.00, 0.08)
+  )
+)


-data_b <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
-                     hospital = "B",
-                     bacteria = sample(bacteria_b, size = sample_size, replace = TRUE,
-                                       prob = c(0.50, 0.25, 0.15, 0.10)),
-                     AMX = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
-                                  prob = c(0.60, 0.05, 0.35)),
-                     AMC = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
-                                  prob = c(0.75, 0.10, 0.15)),
-                     CIP = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
-                                  prob = c(0.80, 0.00, 0.20)),
-                     GEN = sample(ab_interpretations_messy, size = sample_size, replace = TRUE,
-                                  prob = c(0.92, 0.00, 0.08))) 
+data_b <- data.frame(
+  date = sample(dates, size = sample_size, replace = TRUE),
+  hospital = "B",
+  bacteria = sample(bacteria_b,
+    size = sample_size, replace = TRUE,
+    prob = c(0.50, 0.25, 0.15, 0.10)
+  ),
+  AMX = sample(ab_interpretations_messy,
+    size = sample_size, replace = TRUE,
+    prob = c(0.60, 0.05, 0.35)
+  ),
+  AMC = sample(ab_interpretations_messy,
+    size = sample_size, replace = TRUE,
+    prob = c(0.75, 0.10, 0.15)
+  ),
+  CIP = sample(ab_interpretations_messy,
+    size = sample_size, replace = TRUE,
+    prob = c(0.80, 0.00, 0.20)
+  ),
+  GEN = sample(ab_interpretations_messy,
+    size = sample_size, replace = TRUE,
+    prob = c(0.92, 0.00, 0.08)
+  )
+)

-data_c <- data.frame(date = sample(dates, size = sample_size, replace = TRUE),
-                     hospital = "C",
-                     bacteria = sample(bacteria_c, size = sample_size, replace = TRUE,
-                                       prob = c(0.50, 0.25, 0.15, 0.10)),
-                     AMX = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.60, 0.05, 0.35)),
-                     AMC = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.75, 0.10, 0.15)),
-                     CIP = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.80, 0.00, 0.20)),
-                     GEN = sample(ab_interpretations, size = sample_size, replace = TRUE,
-                                  prob = c(0.92, 0.00, 0.08))) 
+data_c <- data.frame(
+  date = sample(dates, size = sample_size, replace = TRUE),
+  hospital = "C",
+  bacteria = sample(bacteria_c,
+    size = sample_size, replace = TRUE,
+    prob = c(0.50, 0.25, 0.15, 0.10)
+  ),
+  AMX = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.60, 0.05, 0.35)
+  ),
+  AMC = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.75, 0.10, 0.15)
+  ),
+  CIP = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.80, 0.00, 0.20)
+  ),
+  GEN = sample(ab_interpretations,
+    size = sample_size, replace = TRUE,
+    prob = c(0.92, 0.00, 0.08)
+  )
+)


-example_isolates_unclean <- data_a %>% 
+example_isolates_unclean <- data_a %>%
  bind_rows(data_b, data_c)

 example_isolates_unclean$patient_id <- sample(patients, size = nrow(example_isolates_unclean), replace = TRUE)

-example_isolates_unclean <- example_isolates_unclean %>% 
-  select(patient_id, hospital, date, bacteria, everything()) %>% 
+example_isolates_unclean <- example_isolates_unclean %>%
+  select(patient_id, hospital, date, bacteria, everything()) %>%
  dataset_UTF8_to_ASCII()

 usethis::use_data(example_isolates_unclean, overwrite = TRUE, internal = FALSE, version = 2, compress = "xz")
--- a/data-raw/reproduction_of_intrinsic_resistant.R
+++ b/data-raw/reproduction_of_intrinsic_resistant.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -32,19 +32,22 @@ for (i in seq_len(nrow(antibiotics))) {
 }

 int_resis <- eucast_rules(int_resis,
-                          eucast_rules_df = subset(AMR:::EUCAST_RULES_DF,
-                                                   is.na(have_these_values) & reference.version == 3.3),
-                          info = FALSE)
+  eucast_rules_df = subset(
+    AMR:::EUCAST_RULES_DF,
+    is.na(have_these_values) & reference.version == 3.3
+  ),
+  info = FALSE
+)

-int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>% 
+int_resis2 <- int_resis[, sapply(int_resis, function(x) any(!is.rsi(x) | x == "R")), drop = FALSE] %>%
  tidyr::pivot_longer(-mo) %>%
-  filter(value == "R") %>% 
+  filter(value == "R") %>%
  select(mo, ab = name)

 # remove lab drugs
 untreatable <- antibiotics[which(antibiotics$name %like% "-high|EDTA|polysorbate|macromethod|screening|/nacubactam"), "ab", drop = TRUE]
-int_resis2 <- int_resis2 %>% 
-  filter(!ab %in% untreatable) %>% 
+int_resis2 <- int_resis2 %>%
+  filter(!ab %in% untreatable) %>%
  arrange(mo, ab)

 intrinsic_resistant <- as.data.frame(int_resis2, stringsAsFactors = FALSE)
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
--- a/data-raw/reproduction_of_microorganisms_update.R
+++ b/data-raw/reproduction_of_microorganisms_update.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -39,7 +39,7 @@ test_mo <- microorganisms$mo

 get_author_year <- function(ref) {
  # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
-  
+
  authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
  authors2 <- gsub(" ?\\(Approved Lists [0-9]+\\) ?", " () ", authors2)
  authors2 <- gsub(" [)(]+ $", "", authors2)
@ -47,14 +47,16 @@ get_author_year <- function(ref) {
  authors2 <- trimws(gsub("^[(](.*)[)]$", "\\1", authors2))
  # only take part after brackets if there's a name
  authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
-                     gsub(".*[)] (.*)", "\\1", authors2),
-                     authors2)
+    gsub(".*[)] (.*)", "\\1", authors2),
+    authors2
+  )
  # get year from last 4 digits
-  lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
+  lastyear <- as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
  # can never be later than now
-  lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
-                    NA,
-                    lastyear)
+  lastyear <- ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
+    NA,
+    lastyear
+  )
  # get authors without last year
  authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
  # remove nonsense characters from names
@ -72,18 +74,19 @@ get_author_year <- function(ref) {
  authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
  # combine author and year if year is available
  ref <- ifelse(!is.na(lastyear),
-                paste0(authors, ", ", lastyear),
-                authors)
+    paste0(authors, ", ", lastyear),
+    authors
+  )
  # fix beginning and ending
  ref <- gsub(", $", "", ref)
  ref <- gsub("^, ", "", ref)
  ref <- gsub("^(emend|et al.,?)", "", ref)
  ref <- trimws(ref)
  ref <- gsub("'", "", ref)
-  
+
  # a lot start with a lowercase character - fix that
  ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
-  # specific one for the French that are named dOrbigny 
+  # specific one for the French that are named dOrbigny
  ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
  ref <- gsub(" +", " ", ref)
  ref
@ -92,21 +95,23 @@ get_author_year <- function(ref) {
 df_remove_nonASCII <- function(df) {
  # Remove non-ASCII characters (these are not allowed by CRAN)
  df %>%
-    mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>% 
+    mutate_if(is.character, iconv, from = "UTF-8", to = "ASCII//TRANSLIT") %>%
    # also remove invalid characters
-    mutate_if(is.character, ~gsub("[\"'`]+", "", .)) %>%
+    mutate_if(is.character, ~ gsub("[\"'`]+", "", .)) %>%
    AMR:::dataset_UTF8_to_ASCII()
 }

 abbreviate_mo <- function(x, minlength = 5, prefix = "", ...) {
  # keep a starting Latin ae
  suppressWarnings(
-    gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>% 
-      abbreviate(minlength = minlength,
-                 use.classes = TRUE,
-                 method = "both.sides", ...) %>% 
-      paste0(prefix, .) %>% 
-      toupper() %>% 
+    gsub("^ae", "\u00E6\u00E6", x, ignore.case = TRUE) %>%
+      abbreviate(
+        minlength = minlength,
+        use.classes = TRUE,
+        method = "both.sides", ...
+      ) %>%
+      paste0(prefix, .) %>%
+      toupper() %>%
      gsub("(\u00C6|\u00E6)+", "AE", .)
  )
 }
@ -119,63 +124,74 @@ taxonomy <- read_csv(file_location)

 new_synonyms <- taxonomy %>%
  left_join(taxonomy,
-            by = c("record_lnk" = "record_no"), 
-            suffix = c("", ".new")) %>% 
-  filter(!is.na(record_lnk)) %>% 
-  mutate_all(~ifelse(is.na(.), "", .)) %>% 
-  transmute(fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
-            fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
-            ref = get_author_year(authors),
-            prevalence = 0) %>% 
-  distinct(fullname, .keep_all = TRUE) %>% 
-  filter(fullname != fullname_new) %>% 
-  # this part joins this table to itself to correct for entries that had >1 renames, 
+    by = c("record_lnk" = "record_no"),
+    suffix = c("", ".new")
+  ) %>%
+  filter(!is.na(record_lnk)) %>%
+  mutate_all(~ ifelse(is.na(.), "", .)) %>%
+  transmute(
+    fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
+    fullname_new = trimws(paste(genus_name.new, sp_epithet.new, subsp_epithet.new)),
+    ref = get_author_year(authors),
+    prevalence = 0
+  ) %>%
+  distinct(fullname, .keep_all = TRUE) %>%
+  filter(fullname != fullname_new) %>%
+  # this part joins this table to itself to correct for entries that had >1 renames,
  # such as:
  # Bacteroides tectum -> Bacteroides tectus -> Bacteroides pyogenes
-  left_join(., ., 
-            by = c("fullname_new" = "fullname"), 
-            suffix = c("", ".2")) %>%
-  mutate(fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
-         ref = ifelse(!is.na(ref.2), ref.2, ref)) %>%
-  select(-ends_with(".2")) 
+  left_join(., .,
+    by = c("fullname_new" = "fullname"),
+    suffix = c("", ".2")
+  ) %>%
+  mutate(
+    fullname_new = ifelse(!is.na(fullname_new.2), fullname_new.2, fullname_new),
+    ref = ifelse(!is.na(ref.2), ref.2, ref)
+  ) %>%
+  select(-ends_with(".2"))

-mo_became_synonym <- microorganisms %>% 
+mo_became_synonym <- microorganisms %>%
  filter(fullname %in% new_synonyms$fullname)

 updated_microorganisms <- taxonomy %>%
-  filter(is.na(record_lnk)) %>% 
-  mutate_all(~ifelse(is.na(.), "", .)) %>% 
-  transmute(mo = "",
-            fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
-            kingdom = "Bacteria",
-            phylum = "",
-            class = "",
-            order = "",
-            family = "",
-            genus = trimws(genus_name),
-            species = trimws(replace_na(sp_epithet, "")),
-            subspecies = trimws(replace_na(subsp_epithet, "")),
-            rank = case_when(subspecies == "" & species == "" ~ "genus",
-                             subspecies == "" ~ "species",
-                             TRUE ~ "subsp."),
-            ref = get_author_year(authors),
-            species_id = as.character(record_no),
-            source = "LPSN",
-            prevalence = 0,
-            snomed = NA)
+  filter(is.na(record_lnk)) %>%
+  mutate_all(~ ifelse(is.na(.), "", .)) %>%
+  transmute(
+    mo = "",
+    fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet)),
+    kingdom = "Bacteria",
+    phylum = "",
+    class = "",
+    order = "",
+    family = "",
+    genus = trimws(genus_name),
+    species = trimws(replace_na(sp_epithet, "")),
+    subspecies = trimws(replace_na(subsp_epithet, "")),
+    rank = case_when(
+      subspecies == "" & species == "" ~ "genus",
+      subspecies == "" ~ "species",
+      TRUE ~ "subsp."
+    ),
+    ref = get_author_year(authors),
+    species_id = as.character(record_no),
+    source = "LPSN",
+    prevalence = 0,
+    snomed = NA
+  )

-new_microorganisms <- updated_microorganisms %>% 
+new_microorganisms <- updated_microorganisms %>%
  filter(!fullname %in% microorganisms$fullname)

 genera_with_mo_code <- updated_microorganisms %>%
  filter(genus %in% (microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% pull(genus))) %>%
-  distinct(genus) %>% 
+  distinct(genus) %>%
  left_join(microorganisms %>% filter(kingdom == "Bacteria", rank == "genus") %>% select(mo, genus),
-            by = "genus")
-  
-genera_without_mo_code <- updated_microorganisms %>% 
-  filter(!genus %in% genera_with_mo_code$genus) %>% 
-  pull(genus) %>% 
+    by = "genus"
+  )
+
+genera_without_mo_code <- updated_microorganisms %>%
+  filter(!genus %in% genera_with_mo_code$genus) %>%
+  pull(genus) %>%
  unique()

 genera_without_mo_code_abbr <- genera_without_mo_code %>% abbreviate_mo(5, prefix = "B_")
@ -184,65 +200,73 @@ genera_without_mo_code_abbr[genera_without_mo_code_abbr %in% microorganisms$mo]
 # all unique??
 sum(genera_without_mo_code_abbr %in% microorganisms$mo) == 0

-genus_abb <- tibble(genus = genera_without_mo_code,
-                    abbr = genera_without_mo_code_abbr) %>% 
+genus_abb <- tibble(
+  genus = genera_without_mo_code,
+  abbr = genera_without_mo_code_abbr
+) %>%
  bind_rows(microorganisms %>%
-              filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
-              transmute(genus, abbr = as.character(mo))) %>% 
+    filter(kingdom == "Bacteria", rank == "genus", !genus %in% genera_without_mo_code) %>%
+    transmute(genus, abbr = as.character(mo))) %>%
  arrange(genus)


 # Update taxonomy ---------------------------------------------------------

 # fill in the taxonomy of new genera
-updated_taxonomy <- tibble(phylum = character(0),
-                           class = character(0),
-                           order = character(0),
-                           family = character(0),
-                           genus = character(0))
+updated_taxonomy <- tibble(
+  phylum = character(0),
+  class = character(0),
+  order = character(0),
+  family = character(0),
+  genus = character(0)
+)
 for (page in LETTERS) {
  message("Downloading page ", page, "... ", appendLF = FALSE)
  url <- paste0("https://lpsn.dsmz.de/genus?page=", page)
-  
-  x <- xml2::read_html(url) %>% 
-    rvest::html_node(".main-list") %>% 
+
+  x <- xml2::read_html(url) %>%
+    rvest::html_node(".main-list") %>%
    # evety list element with a set <id> attribute
    rvest::html_nodes("li[id]")
  for (i in seq_len(length(x))) {
-    txt <- x %>% 
-      magrittr::extract2(i) %>% 
+    txt <- x %>%
+      magrittr::extract2(i) %>%
      rvest::html_text() %>%
-      gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>% 
-      gsub("Candidatus ", "", ., fixed = TRUE) %>% 
+      gsub("\\[[A-Za-z]+, no [a-z]+\\]", "NA", .) %>%
+      gsub("Candidatus ", "", ., fixed = TRUE) %>%
      gsub("[ \t\r\n\"]+", "|", .) %>%
-      gsub("\\|ShowHide.*", "", .) %>% 
-      gsub("[\\[\\]]", "", ., fixed = TRUE) %>% 
-      gsub("^\\|", "", .) %>% 
-      strsplit("|", fixed = TRUE) %>% 
+      gsub("\\|ShowHide.*", "", .) %>%
+      gsub("[\\[\\]]", "", ., fixed = TRUE) %>%
+      gsub("^\\|", "", .) %>%
+      strsplit("|", fixed = TRUE) %>%
      unlist()
    txt[txt == "NA"] <- ""
    txt <- gsub("[^A-Za-z]+", "", txt)
-    updated_taxonomy <- updated_taxonomy %>% 
-      bind_rows(tibble(phylum = txt[2],
-                       class = txt[3],
-                       order = txt[4],
-                       family = txt[5],
-                       genus = txt[6]))
+    updated_taxonomy <- updated_taxonomy %>%
+      bind_rows(tibble(
+        phylum = txt[2],
+        class = txt[3],
+        order = txt[4],
+        family = txt[5],
+        genus = txt[6]
+      ))
  }
  message(length(x), " entries (total ", nrow(updated_taxonomy), ")")
 }

 # Create new microorganisms -----------------------------------------------

-new_microorganisms <- new_microorganisms %>% 
-  left_join(genus_abb, by = "genus") %>% 
-  group_by(genus) %>% 
-  mutate(species_abb = abbreviate_mo(species, 4)) %>% 
-  group_by(genus, species) %>% 
-  mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>% 
-  ungroup() %>% 
-  mutate(mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
-         mo = gsub("_+$", "", mo)) %>% 
+new_microorganisms <- new_microorganisms %>%
+  left_join(genus_abb, by = "genus") %>%
+  group_by(genus) %>%
+  mutate(species_abb = abbreviate_mo(species, 4)) %>%
+  group_by(genus, species) %>%
+  mutate(subspecies_abb = abbreviate_mo(subspecies, 4)) %>%
+  ungroup() %>%
+  mutate(
+    mo = paste(abbr, species_abb, subspecies_abb, sep = "_"),
+    mo = gsub("_+$", "", mo)
+  ) %>%
  select(-matches("abb"))

 # add taxonomy new microorganisms
@ -256,128 +280,150 @@ MOs$mo[which(duplicated(MOs$mo))] <- paste0(MOs$mo[which(duplicated(MOs$mo))], 1
 # all unique?
 !any(duplicated(MOs$mo))

-MOs <- MOs %>% 
+MOs <- MOs %>%
  # remove entries that are now a synonym
-  filter(!fullname %in% new_synonyms$fullname) %>% 
+  filter(!fullname %in% new_synonyms$fullname) %>%
  # update the taxonomy
  left_join(updated_taxonomy, by = "genus", suffix = c("", ".new")) %>%
-  mutate(phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
-         class = ifelse(!is.na(class.new), class.new, class),
-         order = ifelse(!is.na(order.new), order.new, order),
-         family = ifelse(!is.na(family.new), family.new, family)) %>% 
+  mutate(
+    phylum = ifelse(!is.na(phylum.new), phylum.new, phylum),
+    class = ifelse(!is.na(class.new), class.new, class),
+    order = ifelse(!is.na(order.new), order.new, order),
+    family = ifelse(!is.na(family.new), family.new, family)
+  ) %>%
  select(-ends_with(".new")) %>%
  # update prevalence based on taxonomy (Berends et al., 2021)
  mutate(prevalence = case_when(
-    class == "Gammaproteobacteria"
-    | genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
+    class == "Gammaproteobacteria" |
+      genus %in% c("Enterococcus", "Staphylococcus", "Streptococcus")
    ~ 1,
-    kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi")
-    & (phylum %in% c("Proteobacteria",
-                     "Firmicutes",
-                     "Actinobacteria",
-                     "Sarcomastigophora")
-       | genus %in% MO_PREVALENT_GENERA
-       | rank %in% c("kingdom", "phylum", "class", "order", "family"))
+    kingdom %in% c("Archaea", "Bacteria", "Chromista", "Fungi") &
+      (phylum %in% c(
+        "Proteobacteria",
+        "Firmicutes",
+        "Actinobacteria",
+        "Sarcomastigophora"
+      ) |
+        genus %in% MO_PREVALENT_GENERA |
+        rank %in% c("kingdom", "phylum", "class", "order", "family"))
    ~ 2,
    TRUE ~ 3
  ))

 # add all mssing genera, families and orders
-MOs <- MOs %>% 
+MOs <- MOs %>%
  bind_rows(
-    MOs %>% 
+    MOs %>%
      arrange(genus, species) %>%
      distinct(genus, .keep_all = TRUE) %>%
      filter(rank == "species", source != "manually added") %>%
-      mutate(mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
-             fullname = genus, 
-             species = "", 
-             subspecies = "", 
-             rank = "genus", 
-             species_id = "",
-             snomed = NA,
-             ref = NA_character_),
-    MOs %>% 
+      mutate(
+        mo = gsub("^([A-Z]_[A-Z]+)_.*", "\\1", mo),
+        fullname = genus,
+        species = "",
+        subspecies = "",
+        rank = "genus",
+        species_id = "",
+        snomed = NA,
+        ref = NA_character_
+      ),
+    MOs %>%
      group_by(family) %>%
      filter(!any(rank == "family") & n() > 1) %>%
-      ungroup() %>% 
+      ungroup() %>%
      arrange(family) %>%
-      distinct(family, .keep_all = TRUE) %>% 
+      distinct(family, .keep_all = TRUE) %>%
      filter(!family %in% c("", NA), source != "manually added") %>%
-      mutate(mo = paste0(substr(kingdom, 1, 1), "_[FAM]_",
-                         abbreviate(family,
-                                    minlength = 8,
-                                    use.classes = TRUE,
-                                    method = "both.sides",
-                                    strict = FALSE)),
-             mo = toupper(mo),
-             fullname = family,
-             genus = "",
-             species = "", 
-             subspecies = "", 
-             rank = "family", 
-             species_id = "",
-             snomed = NA,
-             ref = NA_character_),
-    MOs %>% 
+      mutate(
+        mo = paste0(
+          substr(kingdom, 1, 1), "_[FAM]_",
+          abbreviate(family,
+            minlength = 8,
+            use.classes = TRUE,
+            method = "both.sides",
+            strict = FALSE
+          )
+        ),
+        mo = toupper(mo),
+        fullname = family,
+        genus = "",
+        species = "",
+        subspecies = "",
+        rank = "family",
+        species_id = "",
+        snomed = NA,
+        ref = NA_character_
+      ),
+    MOs %>%
      group_by(order) %>%
      filter(!any(rank == "order") & n() > 1) %>%
-      ungroup() %>% 
+      ungroup() %>%
      arrange(order) %>%
-      distinct(order, .keep_all = TRUE) %>% 
+      distinct(order, .keep_all = TRUE) %>%
      filter(!order %in% c("", NA), source != "manually added") %>%
-      mutate(mo = paste0(substr(kingdom, 1, 1), "_[ORD]_",
-                         abbreviate(order,
-                                    minlength = 8,
-                                    use.classes = TRUE,
-                                    method = "both.sides",
-                                    strict = FALSE)),
-             mo = toupper(mo),
-             fullname = order,
-             family = "",
-             genus = "",
-             species = "", 
-             subspecies = "", 
-             rank = "order", 
-             species_id = "",
-             snomed = NA,
-             ref = NA_character_)
+      mutate(
+        mo = paste0(
+          substr(kingdom, 1, 1), "_[ORD]_",
+          abbreviate(order,
+            minlength = 8,
+            use.classes = TRUE,
+            method = "both.sides",
+            strict = FALSE
+          )
+        ),
+        mo = toupper(mo),
+        fullname = order,
+        family = "",
+        genus = "",
+        species = "",
+        subspecies = "",
+        rank = "order",
+        species_id = "",
+        snomed = NA,
+        ref = NA_character_
+      )
  ) %>%
  arrange(fullname)

 # clean up
-MOs <- MOs %>% 
+MOs <- MOs %>%
  df_remove_nonASCII()

 # Add LPSN record IDs -----------------------------------------------------

 records_ids <- taxonomy %>%
-  mutate(across(1:3, function(x) { x[is.na(x)] <- ""; x}),
-         fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))) %>%
-  transmute(fullname, species_id = as.numeric(record_no)) %>% 
-  arrange(fullname, species_id) %>% 
+  mutate(across(1:3, function(x) {
+    x[is.na(x)] <- ""
+    x
+  }),
+  fullname = trimws(paste(genus_name, sp_epithet, subsp_epithet))
+  ) %>%
+  transmute(fullname, species_id = as.numeric(record_no)) %>%
+  arrange(fullname, species_id) %>%
  distinct(fullname, .keep_all = TRUE)
 message("Adding ", sum(records_ids$fullname %in% microorganisms$fullname), " LPSN record IDs")
 MOs <- MOs %>%
-  select(-species_id) %>% 
+  select(-species_id) %>%
  left_join(records_ids, by = "fullname") %>%
-  relocate(species_id, .after = ref) %>% 
-  mutate(source = case_when(!is.na(species_id) ~ "LPSN",
-                            source %unlike% "manual" ~ "CoL",
-                            TRUE ~ source))
+  relocate(species_id, .after = ref) %>%
+  mutate(source = case_when(
+    !is.na(species_id) ~ "LPSN",
+    source %unlike% "manual" ~ "CoL",
+    TRUE ~ source
+  ))

 # Merge synonyms ----------------------------------------------------------

 # remove synonyms that are now valid names
-MOs.old <- microorganisms.old %>% 
+MOs.old <- microorganisms.old %>%
  # add new synonyms
-  bind_rows(new_synonyms) %>% 
-  filter(!fullname %in% MOs$fullname) %>% 
-  arrange(fullname) %>% 
-  distinct(fullname, fullname_new, .keep_all = TRUE) %>% 
+  bind_rows(new_synonyms) %>%
+  filter(!fullname %in% MOs$fullname) %>%
+  arrange(fullname) %>%
+  distinct(fullname, fullname_new, .keep_all = TRUE) %>%
  # add prevalence to old taxonomic names
-  select(-prevalence) %>% 
-  left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>% 
+  select(-prevalence) %>%
+  left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname")) %>%
  # clean up
  df_remove_nonASCII()

@ -397,16 +443,18 @@ microorganisms.old <- MOs.old
 # we keep them both
 microorganisms <- microorganisms %>%
  bind_rows(microorganisms %>%
-              filter(fullname == "Branhamella catarrhalis") %>%
-              mutate(mo = "B_MRXLL_CTRR",
-                     fullname = "Moraxella catarrhalis",
-                     genus = "Moraxella",
-                     ref = "Henriksen et al., 1968",
-                     species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
-                     snomed = as.list(24226003))) %>% 
-  arrange(fullname) %>% 
+    filter(fullname == "Branhamella catarrhalis") %>%
+    mutate(
+      mo = "B_MRXLL_CTRR",
+      fullname = "Moraxella catarrhalis",
+      genus = "Moraxella",
+      ref = "Henriksen et al., 1968",
+      species_id = "a374f6f0868e05f9c0f5077b60ee0a6c",
+      snomed = as.list(24226003)
+    )) %>%
+  arrange(fullname) %>%
  df_remove_nonASCII()
-microorganisms.old <- microorganisms.old %>% 
+microorganisms.old <- microorganisms.old %>%
  filter(fullname != "Moraxella catarrhalis")
 # ---

@ -444,7 +492,7 @@ rm(intrinsic_resistant)

 # load new data sets again
 devtools::load_all(".")
-source("data-raw/pre-commit-hook.R")
+source("data-raw/_pre_commit_hook.R")
 devtools::load_all(".")


--- a/data-raw/reproduction_of_poorman.R
+++ b/data-raw/reproduction_of_poorman.R
@ -25,26 +25,28 @@ sapply(files, function(file) {
  contents <<- c(contents, readLines(file))
  invisible()
 })
-contents <- c(intro,
-              copyright,
-              "",
-              contents)
+contents <- c(
+  intro,
+  copyright,
+  "",
+  contents
+)

 # remove lines starting with "#'" and NULL and write to file
 contents <- contents[!grepl("^(#'|NULL|\"_PACKAGE)", contents)]

 # now make it independent on UseMethod, since we will not export these functions
-
 contents <- gsub('UseMethod[(]"(.*?)"[)]',
-                 'if ("grouped_data" %in% class(.data)) {|||    \\1.grouped_data(.data, ...)|||  } else {|||    \\1.default(.data, ...)|||  }', 
-                 paste(contents, collapse = "|||"),
-                 perl = TRUE) %>%
+  'if ("grouped_data" %in% class(.data)) {|||    \\1.grouped_data(.data, ...)|||  } else {|||    \\1.default(.data, ...)|||  }',
+  paste(contents, collapse = "|||"),
+  perl = TRUE
+) %>%
  # add commit to intro part
  gsub("{commit}", commit, ., fixed = TRUE) %>%
  # add date to intro part
  gsub("{date}", format(Sys.Date(), "%e %B %Y"), ., fixed = TRUE) %>%
  strsplit(split = "|||", fixed = TRUE) %>%
-  unlist() %>% 
+  unlist() %>%
  # add "pm_" as prefix to all functions
  gsub("^([a-z_.]+) <- function", "pm_\\1 <- function", .)

@ -56,7 +58,7 @@ for (i in seq_len(length(new_pm_names))) {
  contents <- gsub(paste0("( |\\[|\\()", new_pm_names[i], "($|[^a-z]|\\))"), paste0("\\1pm_", new_pm_names[i], "\\2"), contents)
 }

-# replace %>% with %pm>% 
+# replace %>% with %pm>%
 contents <- gsub("%>%", "%pm>%", contents, fixed = TRUE)
 # fix for new lines, since n() also existed
 contents <- gsub("\\pm_n", "\\n", contents, fixed = TRUE)
@ -70,6 +72,8 @@ contents <- gsub("context", "pm_context", contents, fixed = TRUE)
 contents <- gsub("(pm_)+", "pm_", contents)
 # special case for pm_distinct(), we need '.keep_all' to work
 contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_distinct <- function(.data, ...)", contents, fixed = TRUE)
+# pm_pull does not correct for tibbles, misses the drop argument
+contents[contents == ".data[, var]"] <- ".data[, var, drop = TRUE]"

 # who needs US spelling?
 contents <- contents[!grepl("summarize", contents)]
--- a/data-raw/reproduction_of_rsi_translation.R
+++ b/data-raw/reproduction_of_rsi_translation.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -40,88 +40,104 @@ ORGLIST <- read_tsv("data-raw/WHONET/Codes/ORGLIST.txt", na = c("", "NA", "-"),
 # create data set for generic rules (i.e., AB-specific but not MO-specific)
 rsi_generic <- DRGLST %>%
  filter(CLSI == "X" | EUCST == "X") %>%
-  select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>% 
-  mutate(ab = as.ab(ab),
-         across(matches("(CLSI|EUCST)"), as.double)) %>%
-  pivot_longer(-c(ab, disk_dose), names_to = "method") %>% 
-  separate(method, into = c("guideline", "method"), sep = "_") %>% 
+  select(ab = ANTIBIOTIC, disk_dose = POTENCY, matches("^(CLSI|EUCST)[0-9]")) %>%
+  mutate(
+    ab = as.ab(ab),
+    across(matches("(CLSI|EUCST)"), as.double)
+  ) %>%
+  pivot_longer(-c(ab, disk_dose), names_to = "method") %>%
+  separate(method, into = c("guideline", "method"), sep = "_") %>%
  mutate(method = ifelse(method %like% "D",
-                         gsub("D", "DISK_", method, fixed = TRUE),
-                         gsub("M", "MIC_", method, fixed = TRUE))) %>% 
-  separate(method, into = c("method", "rsi"), sep = "_") %>% 
+    gsub("D", "DISK_", method, fixed = TRUE),
+    gsub("M", "MIC_", method, fixed = TRUE)
+  )) %>%
+  separate(method, into = c("method", "rsi"), sep = "_") %>%
  # I is in the middle, so we only need R and S (saves data)
-  filter(rsi %in% c("R", "S")) %>% 
+  filter(rsi %in% c("R", "S")) %>%
  pivot_wider(names_from = rsi, values_from = value) %>%
-  transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
-            method,
-            site = NA_character_,
-            mo = as.mo("UNKNOWN"),
-            ab,
-            ref_tbl = "Generic rules",
-            disk_dose,
-            breakpoint_S = S,
-            breakpoint_R = R,
-            uti = FALSE) %>% 
+  transmute(
+    guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", guideline)),
+    method,
+    site = NA_character_,
+    mo = as.mo("UNKNOWN"),
+    ab,
+    ref_tbl = "Generic rules",
+    disk_dose,
+    breakpoint_S = S,
+    breakpoint_R = R,
+    uti = FALSE
+  ) %>%
  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
 rsi_generic

 # create data set for AB-specific and MO-specific rules
-rsi_specific <- DRGLST1 %>% 
+rsi_specific <- DRGLST1 %>%
  # only support guidelines for humans (for now)
-  filter(HOST == "Human" & SITE_INF %unlike% "canine|feline",
-         # only CLSI and EUCAST
-         GUIDELINES %like% "(CLSI|EUCST)") %>% 
+  filter(
+    HOST == "Human" & SITE_INF %unlike% "canine|feline",
+    # only CLSI and EUCAST
+    GUIDELINES %like% "(CLSI|EUCST)"
+  ) %>%
  # get microorganism names from another WHONET table
-  mutate(ORG_CODE = tolower(ORG_CODE)) %>% 
+  mutate(ORG_CODE = tolower(ORG_CODE)) %>%
  left_join(ORGLIST %>%
-              transmute(ORG_CODE = tolower(ORG),
-                        SCT_TEXT = case_when(is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
-                                             is.na(SCT_TEXT) ~ ORGANISM,
-                                             TRUE ~ SCT_TEXT)) %>% 
-              # WHO for 'Generic'
-              bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>% 
-              # WHO for 'Enterobacterales'
-              bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))
-  ) %>% 
+    transmute(
+      ORG_CODE = tolower(ORG),
+      SCT_TEXT = case_when(
+        is.na(SCT_TEXT) & is.na(ORGANISM) ~ ORG_CODE,
+        is.na(SCT_TEXT) ~ ORGANISM,
+        TRUE ~ SCT_TEXT
+      )
+    ) %>%
+    # WHO for 'Generic'
+    bind_rows(tibble(ORG_CODE = "gen", SCT_TEXT = "Unknown")) %>%
+    # WHO for 'Enterobacterales'
+    bind_rows(tibble(ORG_CODE = "ebc", SCT_TEXT = "Enterobacterales"))) %>%
  # still some manual cleaning required
-  filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>% 
-  transmute(guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
-            method = toupper(TESTMETHOD),
-            site = SITE_INF,
-            mo = as.mo(SCT_TEXT),
-            ab = as.ab(WHON5_CODE),
-            ref_tbl = REF_TABLE,
-            disk_dose = POTENCY,
-            breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
-            breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
-            uti = site %like% "(UTI|urinary|urine)") %>% 
+  filter(!SCT_TEXT %in% c("Anaerobic Actinomycetes")) %>%
+  transmute(
+    guideline = gsub("([0-9]+)$", " 20\\1", gsub("EUCST", "EUCAST", GUIDELINES)),
+    method = toupper(TESTMETHOD),
+    site = SITE_INF,
+    mo = as.mo(SCT_TEXT),
+    ab = as.ab(WHON5_CODE),
+    ref_tbl = REF_TABLE,
+    disk_dose = POTENCY,
+    breakpoint_S = as.double(ifelse(method == "DISK", DISK_S, MIC_S)),
+    breakpoint_R = as.double(ifelse(method == "DISK", DISK_R, MIC_R)),
+    uti = site %like% "(UTI|urinary|urine)"
+  ) %>%
  filter(!(is.na(breakpoint_S) & is.na(breakpoint_R)), !is.na(mo), !is.na(ab))
 rsi_specific

-rsi_translation <- rsi_generic %>% 
-  bind_rows(rsi_specific) %>% 
+rsi_translation <- rsi_generic %>%
+  bind_rows(rsi_specific) %>%
  # add the taxonomic rank index, used for sorting (so subspecies match first, order matches last)
-  mutate(rank_index = case_when(mo_rank(mo) %like% "(infra|sub)" ~ 1,
-                                mo_rank(mo) == "species" ~ 2,
-                                mo_rank(mo) == "genus" ~ 3,
-                                mo_rank(mo) == "family" ~ 4,
-                                mo_rank(mo) == "order" ~ 5,
-                                TRUE ~ 6),
-         .after = mo) %>% 
-  arrange(desc(guideline), ab, mo, method) %>% 
-  distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>% 
+  mutate(
+    rank_index = case_when(
+      mo_rank(mo) %like% "(infra|sub)" ~ 1,
+      mo_rank(mo) == "species" ~ 2,
+      mo_rank(mo) == "genus" ~ 3,
+      mo_rank(mo) == "family" ~ 4,
+      mo_rank(mo) == "order" ~ 5,
+      TRUE ~ 6
+    ),
+    .after = mo
+  ) %>%
+  arrange(desc(guideline), ab, mo, method) %>%
+  distinct(guideline, ab, mo, method, site, .keep_all = TRUE) %>%
  as.data.frame(stringsAsFactors = FALSE)

 # disks MUST be 6-50 mm, so correct where that is wrong:
 rsi_translation[which(rsi_translation$method == "DISK" &
-                        (is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
+  (is.na(rsi_translation$breakpoint_S) | rsi_translation$breakpoint_S > 50)), "breakpoint_S"] <- 50
 rsi_translation[which(rsi_translation$method == "DISK" &
-                        (is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
+  (is.na(rsi_translation$breakpoint_R) | rsi_translation$breakpoint_R < 6)), "breakpoint_R"] <- 6
 m <- unique(as.double(as.mic(levels(as.mic(1)))))
 rsi_translation[which(rsi_translation$method == "MIC" &
-                        is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
+  is.na(rsi_translation$breakpoint_S)), "breakpoint_S"] <- min(m)
 rsi_translation[which(rsi_translation$method == "MIC" &
-                        is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)
+  is.na(rsi_translation$breakpoint_R)), "breakpoint_R"] <- max(m)

 # WHONET has no >1024 but instead uses 1025, 513, etc, so raise these one higher valid MIC factor level:
 rsi_translation[which(rsi_translation$breakpoint_R == 129), "breakpoint_R"] <- m[which(m == 128) + 1]
@ -134,18 +150,18 @@ rsi_translation[which(rsi_translation$breakpoint_R == 1025), "breakpoint_R"] <-
 #           WHONET file: S <= 8 and R >= 16
 # this will make an MIC of 12 I, which should be R, so:
 eucast_mics <- which(rsi_translation$guideline %like% "EUCAST" &
-                       rsi_translation$method == "MIC" &
-                       log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
-                       !is.na(rsi_translation$breakpoint_R))
+  rsi_translation$method == "MIC" &
+  log2(as.double(rsi_translation$breakpoint_R)) - log2(as.double(rsi_translation$breakpoint_S)) != 0 &
+  !is.na(rsi_translation$breakpoint_R))
 old_R <- rsi_translation[eucast_mics, "breakpoint_R", drop = TRUE]
 old_S <- rsi_translation[eucast_mics, "breakpoint_S", drop = TRUE]
-new_R <- 2 ^ (log2(old_R) - 1)
+new_R <- 2^(log2(old_R) - 1)
 new_R[new_R < old_S | is.na(as.mic(new_R))] <- old_S[new_R < old_S | is.na(as.mic(new_R))]
 rsi_translation[eucast_mics, "breakpoint_R"] <- new_R
 eucast_disks <- which(rsi_translation$guideline %like% "EUCAST" &
-                        rsi_translation$method == "DISK" &
-                        rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
-                        !is.na(rsi_translation$breakpoint_R))
+  rsi_translation$method == "DISK" &
+  rsi_translation$breakpoint_S - rsi_translation$breakpoint_R != 0 &
+  !is.na(rsi_translation$breakpoint_R))
 rsi_translation[eucast_disks, "breakpoint_R"] <- rsi_translation[eucast_disks, "breakpoint_R", drop = TRUE] + 1

 # Greek symbols and EM dash symbols are not allowed by CRAN, so replace them with ASCII:
--- a/data-raw/snomed.R
+++ b/data-raw/snomed.R
@ -9,7 +9,7 @@
 # (c) 2018-2022 Berends MS, Luz CF et al.                              #
 # Developed at the University of Groningen, the Netherlands, in        #
 # collaboration with non-profit organisations Certe Medical            #
-# Diagnostics & Advice, and University Medical Center Groningen.       # 
+# Diagnostics & Advice, and University Medical Center Groningen.       #
 #                                                                      #
 # This R package is free software; you can freely use and distribute   #
 # it for both personal and commercial purposes under the terms of the  #
@ -31,8 +31,8 @@ library(tidyverse)
 # - go to https://phinvads.cdc.gov/vads/ViewValueSet.action?oid=2.16.840.1.114222.4.11.1009
 # - check that current online version is higher than SNOMED_VERSION$current_version
 # - if so, click on 'Download Value Set', choose 'TXT'
-snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>% 
-  select(1:2) %>% 
+snomed <- read_tsv("data-raw/SNOMED_PHVS_Microorganism_CDC_V12.txt", skip = 3) %>%
+  select(1:2) %>%
  set_names(c("snomed", "mo"))

 # save all valid genera, species and subspecies
@ -41,17 +41,21 @@ vctr <- tolower(vctr[vctr %like% "^[a-z]+$"])

 # remove all parts of the name that are no valid values in genera, species or subspecies
 # this takes ~20 seconds
-snomed <- snomed %>% 
-  mutate(fullname = vapply(FUN.VALUE = character(1),
-                           # split on space and/or comma
-                           strsplit(tolower(mo), "[ ,]"),
-                           function(x) trimws(paste0(x[x %in% vctr], collapse = " "))),
-         # remove " group"
-         fullname = gsub(" group", "", fullname, fixed = TRUE))
+snomed <- snomed %>%
+  mutate(
+    fullname = vapply(
+      FUN.VALUE = character(1),
+      # split on space and/or comma
+      strsplit(tolower(mo), "[ ,]"),
+      function(x) trimws(paste0(x[x %in% vctr], collapse = " "))
+    ),
+    # remove " group"
+    fullname = gsub(" group", "", fullname, fixed = TRUE)
+  )

-snomed_keep <- snomed %>% 
-  filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>% 
-  group_by(fullname_lower = fullname) %>% 
+snomed_keep <- snomed %>%
+  filter(fullname %in% tolower(c(microorganisms$fullname, microorganisms.old$fullname))) %>%
+  group_by(fullname_lower = fullname) %>%
  summarise(snomed = list(snomed))

 message(nrow(snomed_keep), " MO's will get a SNOMED code.")
@ -65,10 +69,9 @@ microorganisms <- microorganisms %>%
  # join new snomed
  left_join(snomed_keep) %>%
  # remove dummy var
-  select(-fullname_lower) %>% 
+  select(-fullname_lower) %>%
  AMR:::dataset_UTF8_to_ASCII()

 # don't forget to update the version number in SNOMED_VERSION in ./R/globals.R!

 # usethis::use_data(microorganisms, overwrite = TRUE, version = 2, compress = "xz")
-
--- a/data-raw/toxoplasma.R
+++ b/data-raw/toxoplasma.R
@ -1,68 +1,77 @@
-microorganisms <- microorganisms |> bind_rows(
-  # Toxoplasma
-  data.frame(mo = "P_TXPL_GOND", # species
-             fullname = "Toxoplasma gondii",
-             kingdom = "(unknown kingdom)",
-             phylum = "Apicomplexa",
-             class = "Conoidasida",
-             order = "Eucoccidiorida",
-             family = "Sarcocystidae",
-             genus = "Toxoplasma",
-             species = "gondii",
-             subspecies = "",
-             rank = "species",
-             ref = "Nicolle et al., 1908",
-             species_id = NA_real_,
-             source = "manually added",
-             prevalence = 2,
-             stringsAsFactors = FALSE),
-  data.frame(mo = "P_TXPL", # genus
-             fullname = "Toxoplasma",
-             kingdom = "(unknown kingdom)",
-             phylum = "Apicomplexa",
-             class = "Conoidasida",
-             order = "Eucoccidiorida",
-             family = "Sarcocystidae",
-             genus = "Toxoplasma",
-             species = "",
-             subspecies = "",
-             rank = "genus",
-             ref = "Nicolle et al., 1909",
-             species_id = NA_real_,
-             source = "manually added",
-             prevalence = 2,
-             stringsAsFactors = FALSE),
-  data.frame(mo = "[FAM]_SRCCYSTD", # family
-             fullname = "Sarcocystidae",
-             kingdom = "(unknown kingdom)",
-             phylum = "Apicomplexa",
-             class = "Conoidasida",
-             order = "Eucoccidiorida",
-             family = "Sarcocystidae",
-             genus = "",
-             species = "",
-             subspecies = "",
-             rank = "family",
-             ref = "Poche, 1913",
-             species_id = NA_real_,
-             source = "manually added",
-             prevalence = 2,
-             stringsAsFactors = FALSE),
-  data.frame(mo = "[ORD]_EUCCCDRD", # order
-             fullname = "Eucoccidiorida",
-             kingdom = "(unknown kingdom)",
-             phylum = "Apicomplexa",
-             class = "Conoidasida",
-             order = "Eucoccidiorida",
-             family = "",
-             genus = "",
-             species = "",
-             subspecies = "",
-             rank = "order",
-             ref = "Leger et al., 1910",
-             species_id = NA_real_,
-             source = "manually added",
-             prevalence = 2,
-             stringsAsFactors = FALSE),
-  ) |> 
+microorganisms <- microorganisms |>
+  bind_rows(
+    # Toxoplasma
+    data.frame(
+      mo = "P_TXPL_GOND", # species
+      fullname = "Toxoplasma gondii",
+      kingdom = "(unknown kingdom)",
+      phylum = "Apicomplexa",
+      class = "Conoidasida",
+      order = "Eucoccidiorida",
+      family = "Sarcocystidae",
+      genus = "Toxoplasma",
+      species = "gondii",
+      subspecies = "",
+      rank = "species",
+      ref = "Nicolle et al., 1908",
+      species_id = NA_real_,
+      source = "manually added",
+      prevalence = 2,
+      stringsAsFactors = FALSE
+    ),
+    data.frame(
+      mo = "P_TXPL", # genus
+      fullname = "Toxoplasma",
+      kingdom = "(unknown kingdom)",
+      phylum = "Apicomplexa",
+      class = "Conoidasida",
+      order = "Eucoccidiorida",
+      family = "Sarcocystidae",
+      genus = "Toxoplasma",
+      species = "",
+      subspecies = "",
+      rank = "genus",
+      ref = "Nicolle et al., 1909",
+      species_id = NA_real_,
+      source = "manually added",
+      prevalence = 2,
+      stringsAsFactors = FALSE
+    ),
+    data.frame(
+      mo = "[FAM]_SRCCYSTD", # family
+      fullname = "Sarcocystidae",
+      kingdom = "(unknown kingdom)",
+      phylum = "Apicomplexa",
+      class = "Conoidasida",
+      order = "Eucoccidiorida",
+      family = "Sarcocystidae",
+      genus = "",
+      species = "",
+      subspecies = "",
+      rank = "family",
+      ref = "Poche, 1913",
+      species_id = NA_real_,
+      source = "manually added",
+      prevalence = 2,
+      stringsAsFactors = FALSE
+    ),
+    data.frame(
+      mo = "[ORD]_EUCCCDRD", # order
+      fullname = "Eucoccidiorida",
+      kingdom = "(unknown kingdom)",
+      phylum = "Apicomplexa",
+      class = "Conoidasida",
+      order = "Eucoccidiorida",
+      family = "",
+      genus = "",
+      species = "",
+      subspecies = "",
+      rank = "order",
+      ref = "Leger et al., 1910",
+      species_id = NA_real_,
+      source = "manually added",
+      prevalence = 2,
+      stringsAsFactors = FALSE
+    ),
+  ) |>
  arrange(fullname)