AMR/R/atc.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis                              #
#                                                                      #
# AUTHORS                                                              #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
#                                                                      #
# LICENCE                                                              #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0,    #
# as published by the Free Software Foundation.                        #
#                                                                      #
# This program is distributed in the hope that it will be useful,      #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
# ==================================================================== #

#' Properties of an ATC code
#'
#' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. \strong{This function requires an internet connection.}
#' @param atc_code a character or character vector with ATC code(s) of antibiotic(s)
#' @param property property of an ATC code. Valid values are \code{"ATC code"}, \code{"Name"}, \code{"DDD"}, \code{"U"} (\code{"unit"}), \code{"Adm.R"} en \code{"Note"}.
#' @param administration type of administration, see \emph{Details}
#' @param url url of website of the WHO. The sign \code{\%s} can be used as a placeholder for ATC codes.
#' @details
#' Abbreviations for the property \code{"Adm.R"} (parameter \code{administration}):
#' \itemize{
#'   \item{\code{"Implant"}}{ = Implant}
#'   \item{\code{"Inhal"}}{ = Inhalation}
#'   \item{\code{"Instill"}}{ = Instillation}
#'   \item{\code{"N"}}{ = nasal}
#'   \item{\code{"O"}}{ = oral}
#'   \item{\code{"P"}}{ = parenteral}
#'   \item{\code{"R"}}{ = rectal}
#'   \item{\code{"SL"}}{ = sublingual/buccal}
#'   \item{\code{"TD"}}{ = transdermal}
#'   \item{\code{"V"}}{ = vaginal}
#' }
#'
#' Abbreviations for the property \code{"U"} (unit):
#' \itemize{
#'   \item{\code{"g"}}{ = gram}
#'   \item{\code{"mg"}}{ = milligram}
#'   \item{\code{"mcg"}}{ = microgram}
#'   \item{\code{"U"}}{ = unit}
#'   \item{\code{"TU"}}{ = thousand units}
#'   \item{\code{"MU"}}{ = million units}
#'   \item{\code{"mmol"}}{ = millimole}
#'   \item{\code{"ml"}}{ = milliliter (e.g. eyedrops)}
#' }
#' @export
#' @importFrom dplyr %>% progress_estimated
#' @importFrom xml2 read_html
#' @importFrom rvest html_nodes html_table
#' @source \url{https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/}
#' @examples
#' \donttest{
#' atc_property("J01CA04", "DDD", "O") # oral DDD (Defined Daily Dose) of amoxicillin
#' atc_property("J01CA04", "DDD", "P") # parenteral DDD (Defined Daily Dose) of amoxicillin
#' }
atc_property <- function(atc_code,
                         property,
                         administration = 'O',
                         url = 'https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no') {

  # property <- property %>% tolower()
  #
  if (property %like% 'unit') {
    property <- 'U'
  }

  # validation of properties
  valid_properties.bak <- c("ATC code", "Name", "DDD", "U", "Adm.R", "Note")
  valid_properties <- valid_properties.bak #%>% tolower()
  if (!property %in% valid_properties) {
    stop('Invalid `property`, use one of ', paste(valid_properties, collapse = ", "), '.')
  }

  returnvalue <- rep(NA_character_, length(atc_code))
  if (property == 'DDD') {
    returnvalue <- rep(NA_real_, length(atc_code))
  }

  progress <- progress_estimated(n = length(atc_code))

  for (i in 1:length(atc_code)) {

    progress$tick()$print()

    atc_url <- sub('%s', atc_code[i], url, fixed = TRUE)
    tbl <- xml2::read_html(atc_url) %>%
      rvest::html_nodes('table') %>%
      rvest::html_table(header = TRUE)

    if (length(tbl) == 0) {
      warning('ATC not found: ', atc_code[i], '. Please check ', atc_url, '.', call. = FALSE)
      returnvalue[i] <- NA
      next
    }

    tbl <- tbl[[1]]

    if (property == 'Name') {
      returnvalue[i] <- tbl[1, 2]
    } else {

      names(returnvalue)[i] <- tbl[1, 2] %>% as.character()

      if (!'Adm.R' %in% colnames(tbl) | is.na(tbl[1, 'Adm.R'])) {
        returnvalue[i] <- NA
        next
      } else {
        for (j in 1:nrow(tbl)) {
          if (tbl[j, 'Adm.R'] == administration) {
            returnvalue[i] <- tbl[j, property]
          }
        }
      }
    }
  }

  cat('\n')
  returnvalue

}

#' Name of an antibiotic
#'
#' Convert antibiotic codes (from a laboratory information system like MOLIS or GLIMS) to a (trivial) antibiotic name or ATC code, or vice versa. This uses the data from \code{\link{antibiotics}}.
#' @param abcode a code or name, like \code{"AMOX"}, \code{"AMCL"} or \code{"J01CA04"}
#' @param from,to type to transform from and to. See \code{\link{antibiotics}} for its column names. WIth \code{from = "guess"} the from will be guessed from \code{"atc"}, \code{"molis"} and \code{"umcg"}.
#' @param textbetween text to put between multiple returned texts
#' @param tolower return output as lower case with function \code{\link{tolower}}.
#' @keywords ab antibiotics
#' @source \code{\link{antibiotics}}
#' @export
#' @importFrom dplyr %>% filter select slice
#' @examples
#' abname("AMCL")
#' # "amoxicillin and enzyme inhibitor"
#'
#' abname("AMCL+GENT")
#' # "amoxicillin and enzyme inhibitor + gentamicin"
#'
#' abname(c("AMCL", "GENT"))
#' # "amoxicillin and enzyme inhibitor" "gentamicin"
#'
#' abname("AMCL", to = "trivial_nl")
#' # "Amoxicilline/clavulaanzuur"
#'
#' abname("AMCL", to = "atc")
#' # "J01CR02"
#'
#' abname("J01CR02", from = "atc", to = "umcg")
#' # "AMCL"
abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'official', textbetween = ' + ', tolower = FALSE) {

  antibiotics <- AMR::antibiotics

  from <- from[1]
  if (from == "guess") {
    for (i in 1:3) {
      if (abcode[1] %in% (antibiotics %>% pull(i))) {
        from <- colnames(antibiotics)[i]
      }
    }
    if (from == "guess") {
      from <- "umcg"
    }
  }

  colnames(antibiotics) <- colnames(antibiotics) %>% tolower()
  from <- from %>% tolower()
  to <- to %>% tolower()

  if (!from %in% colnames(antibiotics) |
      !to %in% colnames(antibiotics)) {
    stop(paste0('Invalid `from` or `to`. Choose one of ',
                colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)
  }

  abcode <- as.character(abcode)

  for (i in 1:length(abcode)) {
    drug <- abcode[i]
    if (!grepl('+', drug, fixed = TRUE) & !grepl(' en ', drug, fixed = TRUE)) {
      # only 1 drug
      if (drug %in% (antibiotics %>% pull(from))) {
        abcode[i] <-
          antibiotics %>%
          filter(.[, from] == drug) %>%
          select(to) %>%
          slice(1) %>%
          as.character()
      } else {
        # not found
        warning('Code "', drug, '" not found in antibiotics list.', call. = FALSE)
        abcode[i] <- NA
      }
    } else {
      # more than 1 drug
      if (grepl('+', drug, fixed = TRUE)) {
        drug.group <-
          strsplit(drug, '+', fixed = TRUE) %>%
          unlist() %>%
          trimws('both')
      } else if (grepl(' en ', drug, fixed = TRUE)) {
        drug.group <-
          strsplit(drug, ' en ', fixed = TRUE) %>%
          unlist() %>%
          trimws('both')
      } else {
        warning('Invalid concat.')
        abcode[i] <- NA
        next
      }

      for (j in 1:length(drug.group)) {
        drug.group[j] <-
          antibiotics %>%
          filter(.[, from] == drug.group[j]) %>%
          select(to) %>%
          slice(1) %>%
          as.character()
        if (j > 1 & to %in% c('official', 'trivial_nl')) {
          drug.group[j] <- drug.group[j] %>% tolower()
        }
      }
      abcode[i] <- paste(drug.group, collapse = textbetween)
    }
  }

  if (tolower == TRUE) {
    abcode <- abcode %>% tolower()
  }

  abcode
}

#' Find bacteria ID based on genus/species
#'
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
#' @param x character vector to determine \code{bactid}
#' @export
#' @importFrom dplyr %>% filter slice pull
#' @return Character (vector).
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' guess_bactid("stau")
#' guess_bactid("STAU")
#' guess_bactid("staaur")
#' guess_bactid("S. aureus")
#' guess_bactid("S aureus")
#' guess_bactid("Staphylococcus aureus")
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
guess_bactid <- function(x) {
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z ]+", "", x)
  # but spaces before and after should be omitted
  x <- trimws(x, which = "both")
  x.bak <- x
  # replace space by regex sign
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # add start and stop
  x_species <- paste(x, 'species')
  x <- paste0('^', x, '$')

  for (i in 1:length(x)) {
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
    }
    if (tolower(x[i]) == '^h.*influenzae$') {
      # avoid detection of Haematobacter influenzae in case of H. influenzae
      x[i] <- 'Haemophilus influenzae'
    }
    if (tolower(x[i]) == '^st.*au$'
        | tolower(x[i]) == '^stau$'
        | tolower(x[i]) == '^staaur$') {
      # avoid detection of Staphylococcus auricularis in case of S. aureus
      x[i] <- 'Staphylococcus aureus'
    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }

    # translate known trivial names to genus+species
    if (toupper(x.bak[i]) == 'MRSA'
        | toupper(x.bak[i]) == 'VISA'
        | toupper(x.bak[i]) == 'VRSA') {
      x[i] <- 'Staphylococcus aureus'
    }
    if (toupper(x.bak[i]) == 'MRSE') {
      x[i] <- 'Staphylococcus epidermidis'
    }
    if (toupper(x.bak[i]) == 'VRE') {
      x[i] <- 'Enterococcus'
    }
    if (toupper(x.bak[i]) == 'MRPA') {
      # multi resistant P. aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    if (toupper(x.bak[i]) == 'PISP'
        | toupper(x.bak[i]) == 'PRSP') {
      # peni resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }
    if (toupper(x.bak[i]) == 'VISP'
        | toupper(x.bak[i]) == 'VRSP') {
      # vanco resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }

    # let's try the ID's first
    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])

    if (nrow(found) == 0) {
      # now try exact match
      found <- AMR::microorganisms %>% filter(fullname == x[i])
    }
    if (nrow(found) == 0) {
      # try any match
      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
    }
    if (nrow(found) == 0) {
      # try only genus, with 'species' attached
      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
    }
    if (nrow(found) == 0) {
      # search for GLIMS code
      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
      }
    }
    if (nrow(found) == 0) {
      # try splitting of characters and then find ID
      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
      x_split <- x
      x_length <- nchar(x.bak[i])
      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
                     '.* ',
                     x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
    }
    if (nrow(found) == 0) {
      # try any match with text before and after original search string
      # so "negative rods" will be "GNR"
      if (x.bak[i] %like% "^Gram") {
        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
        # remove leading and trailing spaces again
        x.bak[i] <- trimws(x.bak[i], which = "both")
      }
      found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
    }

    if (nrow(found) != 0) {
      x[i] <- found %>%
        slice(1) %>%
        pull(bactid)
    } else {
      x[i] <- ""
    }
  }
  x
}
first commit 2018-02-21 11:52:31 +01:00			`# ==================================================================== #`
			`# TITLE #`
			`# Antimicrobial Resistance (AMR) Analysis #`
			`# #`
			`# AUTHORS #`
			`# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #`
			`# #`
			`# LICENCE #`
			`# This program is free software; you can redistribute it and/or modify #`
			`# it under the terms of the GNU General Public License version 2.0, #`
			`# as published by the Free Software Foundation. #`
			`# #`
			`# This program is distributed in the hope that it will be useful, #`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of #`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #`
			`# GNU General Public License for more details. #`
			`# ==================================================================== #`

			`#' Properties of an ATC code`
			`#'`
Minor text changes for DDD definition 2018-03-13 11:48:30 +01:00			`#' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. \strong{This function requires an internet connection.}`
first commit 2018-02-21 11:52:31 +01:00			`#' @param atc_code a character or character vector with ATC code(s) of antibiotic(s)`
			`#' @param property property of an ATC code. Valid values are \code{"ATC code"}, \code{"Name"}, \code{"DDD"}, \code{"U"} (\code{"unit"}), \code{"Adm.R"} en \code{"Note"}.`
			`#' @param administration type of administration, see \emph{Details}`
			`#' @param url url of website of the WHO. The sign \code{\%s} can be used as a placeholder for ATC codes.`
			`#' @details`
			`#' Abbreviations for the property \code{"Adm.R"} (parameter \code{administration}):`
			`#' \itemize{`
			`#' \item{\code{"Implant"}}{ = Implant}`
			`#' \item{\code{"Inhal"}}{ = Inhalation}`
			`#' \item{\code{"Instill"}}{ = Instillation}`
			`#' \item{\code{"N"}}{ = nasal}`
			`#' \item{\code{"O"}}{ = oral}`
			`#' \item{\code{"P"}}{ = parenteral}`
			`#' \item{\code{"R"}}{ = rectal}`
			`#' \item{\code{"SL"}}{ = sublingual/buccal}`
			`#' \item{\code{"TD"}}{ = transdermal}`
			`#' \item{\code{"V"}}{ = vaginal}`
			`#' }`
			`#'`
			`#' Abbreviations for the property \code{"U"} (unit):`
			`#' \itemize{`
			`#' \item{\code{"g"}}{ = gram}`
			`#' \item{\code{"mg"}}{ = milligram}`
			`#' \item{\code{"mcg"}}{ = microgram}`
			`#' \item{\code{"U"}}{ = unit}`
			`#' \item{\code{"TU"}}{ = thousand units}`
			`#' \item{\code{"MU"}}{ = million units}`
			`#' \item{\code{"mmol"}}{ = millimole}`
			`#' \item{\code{"ml"}}{ = milliliter (e.g. eyedrops)}`
			`#' }`
			`#' @export`
			`#' @importFrom dplyr %>% progress_estimated`
			`#' @importFrom xml2 read_html`
			`#' @importFrom rvest html_nodes html_table`
			`#' @source \url{https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/}`
more MOs 2018-04-03 16:07:32 +02:00			`#' @examples`
First CRAN submission edits 2018-02-22 20:48:48 +01:00			`#' \donttest{`
Minor text changes for DDD definition 2018-03-13 11:48:30 +01:00			`#' atc_property("J01CA04", "DDD", "O") # oral DDD (Defined Daily Dose) of amoxicillin`
			`#' atc_property("J01CA04", "DDD", "P") # parenteral DDD (Defined Daily Dose) of amoxicillin`
First CRAN submission edits 2018-02-22 20:48:48 +01:00			`#' }`
first commit 2018-02-21 11:52:31 +01:00			`atc_property <- function(atc_code,`
			`property,`
			`administration = 'O',`
			`url = 'https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no') {`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`# property <- property %>% tolower()`
			`#`
			`if (property %like% 'unit') {`
			`property <- 'U'`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`# validation of properties`
			`valid_properties.bak <- c("ATC code", "Name", "DDD", "U", "Adm.R", "Note")`
			`valid_properties <- valid_properties.bak #%>% tolower()`
			`if (!property %in% valid_properties) {`
			stop('Invalid `property`, use one of ', paste(valid_properties, collapse = ", "), '.')
			`}`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`returnvalue <- rep(NA_character_, length(atc_code))`
			`if (property == 'DDD') {`
			`returnvalue <- rep(NA_real_, length(atc_code))`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`progress <- progress_estimated(n = length(atc_code))`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`for (i in 1:length(atc_code)) {`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`progress$tick()$print()`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`atc_url <- sub('%s', atc_code[i], url, fixed = TRUE)`
			`tbl <- xml2::read_html(atc_url) %>%`
			`rvest::html_nodes('table') %>%`
			`rvest::html_table(header = TRUE)`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`if (length(tbl) == 0) {`
			`warning('ATC not found: ', atc_code[i], '. Please check ', atc_url, '.', call. = FALSE)`
			`returnvalue[i] <- NA`
			`next`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`tbl <- tbl[[1]]`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`if (property == 'Name') {`
			`returnvalue[i] <- tbl[1, 2]`
			`} else {`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`names(returnvalue)[i] <- tbl[1, 2] %>% as.character()`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`if (!'Adm.R' %in% colnames(tbl) \| is.na(tbl[1, 'Adm.R'])) {`
			`returnvalue[i] <- NA`
			`next`
			`} else {`
			`for (j in 1:nrow(tbl)) {`
			`if (tbl[j, 'Adm.R'] == administration) {`
			`returnvalue[i] <- tbl[j, property]`
			`}`
			`}`
			`}`
			`}`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`cat('\n')`
			`returnvalue`
more MOs 2018-04-03 16:07:32 +02:00
first commit 2018-02-21 11:52:31 +01:00			`}`
abname, logo 2018-02-21 12:10:00 +01:00
			`#' Name of an antibiotic`
			`#'`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`#' Convert antibiotic codes (from a laboratory information system like MOLIS or GLIMS) to a (trivial) antibiotic name or ATC code, or vice versa. This uses the data from \code{\link{antibiotics}}.`
misc 2018-02-26 15:53:09 +01:00			`#' @param abcode a code or name, like \code{"AMOX"}, \code{"AMCL"} or \code{"J01CA04"}`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`#' @param from,to type to transform from and to. See \code{\link{antibiotics}} for its column names. WIth \code{from = "guess"} the from will be guessed from \code{"atc"}, \code{"molis"} and \code{"umcg"}.`
abname, logo 2018-02-21 12:10:00 +01:00			`#' @param textbetween text to put between multiple returned texts`
			`#' @param tolower return output as lower case with function \code{\link{tolower}}.`
			`#' @keywords ab antibiotics`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`#' @source \code{\link{antibiotics}}`
abname, logo 2018-02-21 12:10:00 +01:00			`#' @export`
more MOs 2018-04-03 16:07:32 +02:00			`#' @importFrom dplyr %>% filter select slice`
abname, logo 2018-02-21 12:10:00 +01:00			`#' @examples`
			`#' abname("AMCL")`
			`#' # "amoxicillin and enzyme inhibitor"`
			`#'`
			`#' abname("AMCL+GENT")`
			`#' # "amoxicillin and enzyme inhibitor + gentamicin"`
			`#'`
			`#' abname(c("AMCL", "GENT"))`
more MOs 2018-04-03 16:07:32 +02:00			`#' # "amoxicillin and enzyme inhibitor" "gentamicin"`
abname, logo 2018-02-21 12:10:00 +01:00			`#'`
misc 2018-02-26 15:53:09 +01:00			`#' abname("AMCL", to = "trivial_nl")`
abname, logo 2018-02-21 12:10:00 +01:00			`#' # "Amoxicilline/clavulaanzuur"`
			`#'`
			`#' abname("AMCL", to = "atc")`
			`#' # "J01CR02"`
			`#'`
			`#' abname("J01CR02", from = "atc", to = "umcg")`
			`#' # "AMCL"`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'official', textbetween = ' + ', tolower = FALSE) {`
more MOs 2018-04-03 16:07:32 +02:00
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`antibiotics <- AMR::antibiotics`
more MOs 2018-04-03 16:07:32 +02:00
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`from <- from[1]`
			`if (from == "guess") {`
			`for (i in 1:3) {`
			`if (abcode[1] %in% (antibiotics %>% pull(i))) {`
			`from <- colnames(antibiotics)[i]`
			`}`
			`}`
			`if (from == "guess") {`
			`from <- "umcg"`
			`}`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`colnames(antibiotics) <- colnames(antibiotics) %>% tolower()`
abname, logo 2018-02-21 12:10:00 +01:00			`from <- from %>% tolower()`
			`to <- to %>% tolower()`
more MOs 2018-04-03 16:07:32 +02:00
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`if (!from %in% colnames(antibiotics) \|`
			`!to %in% colnames(antibiotics)) {`
more MOs 2018-04-03 16:07:32 +02:00			stop(paste0('Invalid `from` or `to`. Choose one of ',
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`colnames(antibiotics) %>% paste(collapse = ","), '.'), call. = FALSE)`
abname, logo 2018-02-21 12:10:00 +01:00			`}`
more MOs 2018-04-03 16:07:32 +02:00
abname, logo 2018-02-21 12:10:00 +01:00			`abcode <- as.character(abcode)`
more MOs 2018-04-03 16:07:32 +02:00
abname, logo 2018-02-21 12:10:00 +01:00			`for (i in 1:length(abcode)) {`
			`drug <- abcode[i]`
			`if (!grepl('+', drug, fixed = TRUE) & !grepl(' en ', drug, fixed = TRUE)) {`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`# only 1 drug`
			`if (drug %in% (antibiotics %>% pull(from))) {`
abname, logo 2018-02-21 12:10:00 +01:00			`abcode[i] <-`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`antibiotics %>%`
abname, logo 2018-02-21 12:10:00 +01:00			`filter(.[, from] == drug) %>%`
			`select(to) %>%`
			`slice(1) %>%`
			`as.character()`
			`} else {`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`# not found`
abname, logo 2018-02-21 12:10:00 +01:00			`warning('Code "', drug, '" not found in antibiotics list.', call. = FALSE)`
			`abcode[i] <- NA`
			`}`
			`} else {`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`# more than 1 drug`
abname, logo 2018-02-21 12:10:00 +01:00			`if (grepl('+', drug, fixed = TRUE)) {`
			`drug.group <-`
			`strsplit(drug, '+', fixed = TRUE) %>%`
			`unlist() %>%`
			`trimws('both')`
			`} else if (grepl(' en ', drug, fixed = TRUE)) {`
			`drug.group <-`
			`strsplit(drug, ' en ', fixed = TRUE) %>%`
			`unlist() %>%`
			`trimws('both')`
			`} else {`
			`warning('Invalid concat.')`
			`abcode[i] <- NA`
			`next`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
abname, logo 2018-02-21 12:10:00 +01:00			`for (j in 1:length(drug.group)) {`
			`drug.group[j] <-`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`antibiotics %>%`
abname, logo 2018-02-21 12:10:00 +01:00			`filter(.[, from] == drug.group[j]) %>%`
			`select(to) %>%`
			`slice(1) %>%`
			`as.character()`
misc 2018-02-26 15:53:09 +01:00			`if (j > 1 & to %in% c('official', 'trivial_nl')) {`
abname, logo 2018-02-21 12:10:00 +01:00			`drug.group[j] <- drug.group[j] %>% tolower()`
			`}`
			`}`
			`abcode[i] <- paste(drug.group, collapse = textbetween)`
			`}`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
abname, logo 2018-02-21 12:10:00 +01:00			`if (tolower == TRUE) {`
			`abcode <- abcode %>% tolower()`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
abname, logo 2018-02-21 12:10:00 +01:00			`abcode`
			`}`
more MOs 2018-04-03 16:07:32 +02:00
			`#' Find bacteria ID based on genus/species`
			`#'`
			`#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.`
			`#' @param x character vector to determine \code{bactid}`
			`#' @export`
			`#' @importFrom dplyr %>% filter slice pull`
			`#' @return Character (vector).`
			`#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.`
			`#' @examples`
			`#' # These examples all return "STAAUR", the ID of S. aureus:`
			`#' guess_bactid("stau")`
			`#' guess_bactid("STAU")`
			`#' guess_bactid("staaur")`
			`#' guess_bactid("S. aureus")`
			`#' guess_bactid("S aureus")`
			`#' guess_bactid("Staphylococcus aureus")`
			`#' guess_bactid("MRSA") # Methicillin-resistant S. aureus`
			`#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus`
			`guess_bactid <- function(x) {`
			`# remove dots and other non-text in case of "E. coli" except spaces`
			`x <- gsub("[^a-zA-Z ]+", "", x)`
			`# but spaces before and after should be omitted`
			`x <- trimws(x, which = "both")`
			`x.bak <- x`
			`# replace space by regex sign`
			`x <- gsub(" ", ".*", x, fixed = TRUE)`
			`# add start and stop`
			`x_species <- paste(x, 'species')`
			`x <- paste0('^', x, '$')`

			`for (i in 1:length(x)) {`
			`if (tolower(x[i]) == '^e.*coli$') {`
			`# avoid detection of Entamoeba coli in case of E. coli`
			`x[i] <- 'Escherichia coli'`
			`}`
			`if (tolower(x[i]) == '^h.*influenzae$') {`
			`# avoid detection of Haematobacter influenzae in case of H. influenzae`
			`x[i] <- 'Haemophilus influenzae'`
			`}`
			`if (tolower(x[i]) == '^st.*au$'`
			`\| tolower(x[i]) == '^stau$'`
			`\| tolower(x[i]) == '^staaur$') {`
			`# avoid detection of Staphylococcus auricularis in case of S. aureus`
			`x[i] <- 'Staphylococcus aureus'`
			`}`
			`if (tolower(x[i]) == '^p.*aer$') {`
			`# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa`
			`x[i] <- 'Pseudomonas aeruginosa'`
			`}`

			`# translate known trivial names to genus+species`
			`if (toupper(x.bak[i]) == 'MRSA'`
			`\| toupper(x.bak[i]) == 'VISA'`
			`\| toupper(x.bak[i]) == 'VRSA') {`
			`x[i] <- 'Staphylococcus aureus'`
			`}`
			`if (toupper(x.bak[i]) == 'MRSE') {`
			`x[i] <- 'Staphylococcus epidermidis'`
			`}`
			`if (toupper(x.bak[i]) == 'VRE') {`
			`x[i] <- 'Enterococcus'`
			`}`
			`if (toupper(x.bak[i]) == 'MRPA') {`
			`# multi resistant P. aeruginosa`
			`x[i] <- 'Pseudomonas aeruginosa'`
			`}`
			`if (toupper(x.bak[i]) == 'PISP'`
			`\| toupper(x.bak[i]) == 'PRSP') {`
			`# peni resistant S. pneumoniae`
			`x[i] <- 'Streptococcus pneumoniae'`
			`}`
			`if (toupper(x.bak[i]) == 'VISP'`
			`\| toupper(x.bak[i]) == 'VRSP') {`
			`# vanco resistant S. pneumoniae`
			`x[i] <- 'Streptococcus pneumoniae'`
			`}`

			`# let's try the ID's first`
			`found <- AMR::microorganisms %>% filter(bactid == x.bak[i])`

			`if (nrow(found) == 0) {`
			`# now try exact match`
			`found <- AMR::microorganisms %>% filter(fullname == x[i])`
			`}`
			`if (nrow(found) == 0) {`
			`# try any match`
			`found <- AMR::microorganisms %>% filter(fullname %like% x[i])`
			`}`
			`if (nrow(found) == 0) {`
			`# try only genus, with 'species' attached`
			`found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])`
			`}`
			`if (nrow(found) == 0) {`
			`# search for GLIMS code`
			`if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {`
			`found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))`
			`}`
			`}`
			`if (nrow(found) == 0) {`
			`# try splitting of characters and then find ID`
			`# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus`
			`x_split <- x`
			`x_length <- nchar(x.bak[i])`
			`x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),`
			`'.* ',`
			`x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())`
			`found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))`
			`}`
			`if (nrow(found) == 0) {`
			`# try any match with text before and after original search string`
			`# so "negative rods" will be "GNR"`
			`if (x.bak[i] %like% "^Gram") {`
			`x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)`
			`# remove leading and trailing spaces again`
			`x.bak[i] <- trimws(x.bak[i], which = "both")`
			`}`
			`found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])`
			`}`

			`if (nrow(found) != 0) {`
			`x[i] <- found %>%`
			`slice(1) %>%`
			`pull(bactid)`
			`} else {`
			`x[i] <- ""`
			`}`
			`}`
			`x`
			`}`