AMR/R/ab.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis                              #
#                                                                      #
# SOURCE                                                               #
# https://gitlab.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2018-2020 Berends MS, Luz CF et al.                              #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
#                                                                      #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
# Visit our website for more info: https://msberends.gitlab.io/AMR.    #
# ==================================================================== #

#' Transform to antibiotic ID
#'
#' Use this function to determine the antibiotic code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names).
#' @inheritSection lifecycle Maturing lifecycle
#' @param x character vector to determine to antibiotic ID
#' @param ... arguments passed on to internal functions
#' @rdname as.ab
#' @inheritSection WHOCC WHOCC
#' @importFrom dplyr %>% filter slice pull
#' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
#'
#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples.
#' @section Source:
#' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/}
#'
#' WHONET 2019 software: \url{http://www.whonet.org/software.html}
#'
#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}
#' @aliases ab
#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`.
#' @seealso [antibiotics] for the dataframe that is being used to determine ATCs.
#' @inheritSection AMR Read more on our website!
#' @export
#' @examples
#' # these examples all return "ERY", the ID of erythromycin:
#' as.ab("J01FA01")
#' as.ab("J 01 FA 01")
#' as.ab("Erythromycin")
#' as.ab("eryt")
#' as.ab("   eryt 123")
#' as.ab("ERYT")
#' as.ab("ERY")
#' as.ab("eritromicine") # spelled wrong, yet works
#' as.ab("Erythrocin")   # trade name
#' as.ab("Romycin")      # trade name
#' 
#' # spelling from different languages and dyslexia are no problem
#' ab_atc("ceftriaxon")
#' ab_atc("cephtriaxone")
#' ab_atc("cephthriaxone")
#' ab_atc("seephthriaaksone")
#'
#' # use ab_* functions to get a specific properties (see ?ab_property);
#' # they use as.ab() internally:
#' ab_name("J01FA01")    # "Erythromycin"
#' ab_name("eryt")       # "Erythromycin"
as.ab <- function(x, ...) {
  if (is.ab(x)) {
    return(x)
  }

  if (all(toupper(x) %in% AMR::antibiotics$ab)) {
    # valid AB code, but not yet right class
    return(structure(.Data = toupper(x),
                     class = "ab"))
  }

  x_bak <- x
  # remove diacritics
  x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")
  x <- gsub('"', "", x, fixed = TRUE)
  # remove suffices
  x_bak_clean <- gsub("_(mic|rsi|dis[ck])$", "", x, ignore.case = TRUE)
  # remove disk concentrations, like LVX_NM -> LVX
  x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean, ignore.case = TRUE)
  # remove part between brackets if that's followed by another string
  x_bak_clean <- gsub("(.*)+ [(].*[)]", "\\1", x_bak_clean)
  # keep only max 1 space
  x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean, ignore.case = TRUE))
  # non-character, space or number should be a slash
  x_bak_clean <- gsub("[^A-Za-z0-9 -]", "/", x_bak_clean)
  # spaces around non-characters must be removed: amox + clav -> amox/clav
  x_bak_clean <- gsub("(.*[a-zA-Z0-9]) ([^a-zA-Z0-9].*)", "\\1\\2", x_bak_clean)
  x_bak_clean <- gsub("(.*[^a-zA-Z0-9]) ([a-zA-Z0-9].*)", "\\1\\2", x_bak_clean)
  # remove hyphen after a starting "co"
  x_bak_clean <- gsub("^co-", "co", x_bak_clean, ignore.case = TRUE)
  # replace text 'and' with a slash
  x_bak_clean <- gsub(" and ", "/", x_bak_clean, ignore.case = TRUE)

  x <- unique(x_bak_clean)
  x_new <- rep(NA_character_, length(x))
  x_unknown <- character(0)

  for (i in seq_len(length(x))) {
    if (is.na(x[i]) | is.null(x[i])) {
      next
    }
    if (identical(x[i], "")) {
      x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
      next
    }
    # prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it
    if (identical(tolower(x[i]), "bacteria")) {
      x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
      next
    }

    # exact AB code
    found <- AMR::antibiotics[which(AMR::antibiotics$ab == toupper(x[i])), ]$ab
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # exact ATC code
    found <- AMR::antibiotics[which(AMR::antibiotics$atc == toupper(x[i])), ]$ab
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # exact CID code
    found <- AMR::antibiotics[which(AMR::antibiotics$cid == x[i]), ]$ab
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # exact name
    found <- AMR::antibiotics[which(toupper(AMR::antibiotics$name) == toupper(x[i])), ]$ab
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # exact synonym
    synonym_found <- unlist(lapply(AMR::antibiotics$synonyms,
                                   function(s) if (toupper(x[i]) %in% toupper(s)) {
                                     TRUE
                                   } else {
                                     FALSE
                                   }))
    found <- AMR::antibiotics$ab[synonym_found == TRUE]
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # exact abbreviation
    abbr_found <- unlist(lapply(AMR::antibiotics$abbreviations,
                                function(a) if (toupper(x[i]) %in% toupper(a)) {
                                  TRUE
                                } else {
                                  FALSE
                                }))
    found <- AMR::antibiotics$ab[abbr_found == TRUE]
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # first >=4 characters of name
    if (nchar(x[i]) >= 4) {
      found <- AMR::antibiotics[which(toupper(AMR::antibiotics$name) %like% paste0("^", x[i])), ]$ab
      if (length(found) > 0) {
        x_new[i] <- found[1L]
        next
      }
    }

    # allow characters that resemble others, but only continue when having more than 3 characters
    if (nchar(x[i]) <= 3) {
      x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
      next
    }
    x_spelling <- tolower(x[i])
    x_spelling <- gsub("[iy]+", "[iy]+", x_spelling)
    x_spelling <- gsub("(c|k|q|qu|s|z|x|ks)+", "(c|k|q|qu|s|z|x|ks)+", x_spelling)
    x_spelling <- gsub("(ph|f|v)+", "(ph|f|v)+", x_spelling)
    x_spelling <- gsub("(th|t)+", "(th|t)+", x_spelling)
    x_spelling <- gsub("a+", "a+", x_spelling)
    x_spelling <- gsub("e+", "e+", x_spelling)
    x_spelling <- gsub("o+", "o+", x_spelling)
    # allow any ending of -in/-ine and -im/-ime
    x_spelling <- gsub("(\\[iy\\]\\+(n|m)|\\[iy\\]\\+(n|m)e\\+)$", "[iy]+(n|m)e*", x_spelling)
    # allow any ending of -ol/-ole
    x_spelling <- gsub("(o\\+l|o\\+le\\+)$", "o+le*", x_spelling)
    # allow any ending of -on/-one
    x_spelling <- gsub("(o\\+n|o\\+ne\\+)$", "o+ne*", x_spelling)
    # replace multiple same characters to single one with '+', like "ll" -> "l+"
    x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling)
  
    # try if name starts with it
    found <- AMR::antibiotics[which(AMR::antibiotics$name %like% paste0("^", x_spelling)), ]$ab
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }
    # and try if any synonym starts with it
    synonym_found <- unlist(lapply(AMR::antibiotics$synonyms,
                                   function(s) if (any(s %like% paste0("^", x_spelling))) {
                                     TRUE
                                   } else {
                                     FALSE
                                   }))
    found <- AMR::antibiotics$ab[synonym_found == TRUE]
    if (length(found) > 0) {
      x_new[i] <- found[1L]
      next
    }

    # try by removing all spaces
    if (x[i] %like% " ") {
      found <- suppressWarnings(as.ab(gsub(" +", "", x[i])))
      if (length(found) > 0 & !is.na(found)) {
        x_new[i] <- found[1L]
        next
      }
    }

    # try by removing all spaces and numbers
    if (x[i] %like% " " | x[i] %like% "[0-9]") {
      found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i])))
      if (length(found) > 0 & !is.na(found)) {
        x_new[i] <- found[1L]
        next
      }
    }
    
    if (!isFALSE(list(...)$initial_search)) {
      # transform back from other languages and try again
      x_translated <- paste(lapply(strsplit(x[i], "[^a-zA-Z0-9 ]"),
                                   function(y) {
                                     for (i in seq_len(length(y))) {
                                       y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement),
                                                      translations_file[which(tolower(translations_file$replacement) == tolower(y[i]) &
                                                                                !isFALSE(translations_file$fixed)), "pattern"],
                                                      y[i])
                                     }
                                     y
                                   })[[1]],
                            collapse = "/")
      x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
      if (!is.na(x_translated_guess)) {
        x_new[i] <- x_translated_guess
        next
      }
      
      if (!isFALSE(list(...)$initial_search2)) {
        # now also try to coerce brandname combinations like "Amoxy/clavulanic acid"
        x_translated <- paste(lapply(strsplit(x_translated, "[^a-zA-Z0-9 ]"),
                                     function(y) {
                                       for (i in seq_len(length(y))) {
                                         y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE, initial_search2 = FALSE))
                                         y[i] <- ifelse(!is.na(y_name),
                                                        y_name,
                                                        y[i])
                                       }
                                       y
                                     })[[1]],
                              collapse = "/")
        x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))
        if (!is.na(x_translated_guess)) {
          x_new[i] <- x_translated_guess
          next
        }
      }
    }
    
    # not found
    x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])
  }

  # take failed ATC codes apart from rest
  x_unknown_ATCs <- x_unknown[x_unknown %like% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"]
  x_unknown <- x_unknown[!x_unknown %in% x_unknown_ATCs]
  if (length(x_unknown_ATCs) > 0) {
    warning("These ATC codes are not (yet) in the antibiotics data set: ",
            paste('"', sort(unique(x_unknown_ATCs)), '"', sep = "", collapse = ", "),
            ".",
            call. = FALSE)
  }

  if (length(x_unknown) > 0) {
    warning("These values could not be coerced to a valid antimicrobial ID: ",
            paste('"', sort(unique(x_unknown)), '"', sep = "", collapse = ", "),
            ".",
            call. = FALSE)
  }

  x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>%
    left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>%
    pull(x_new)

  if (length(x_result) == 0) {
    x_result <- NA_character_
  }

  structure(.Data = x_result,
            class = "ab")
}

#' @rdname as.ab
#' @export
is.ab <- function(x) {
  identical(class(x), "ab")
}

#' @exportMethod print.ab
#' @export
#' @noRd
print.ab <- function(x, ...) {
  cat("Class 'ab'\n")
  print(as.character(x), quote = FALSE)
}

#' @exportMethod as.data.frame.ab
#' @export
#' @noRd
as.data.frame.ab <- function(x, ...) {
  # same as as.data.frame.character but with removed stringsAsFactors
  nm <- paste(deparse(substitute(x), width.cutoff = 500L),
              collapse = " ")
  if (!"nm" %in% names(list(...))) {
    as.data.frame.vector(x, ..., nm = nm)
  } else {
    as.data.frame.vector(x, ...)
  }
}

#' @exportMethod [.ab
#' @export
#' @noRd
"[.ab" <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(x)
  y
}
#' @exportMethod [[.ab
#' @export
#' @noRd
"[[.ab" <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(x)
  y
}
#' @exportMethod [<-.ab
#' @export
#' @noRd
"[<-.ab" <- function(i, j, ..., value) {
  y <- NextMethod()
  attributes(y) <- attributes(i)
  class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)
}
#' @exportMethod [[<-.ab
#' @export
#' @noRd
"[[<-.ab" <- function(i, j, ..., value) {
  y <- NextMethod()
  attributes(y) <- attributes(i)
  class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)
}
#' @exportMethod c.ab
#' @export
#' @noRd
c.ab <- function(x, ...) {
  y <- NextMethod()
  attributes(y) <- attributes(x)
  class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)
}

#' @importFrom pillar type_sum
#' @export
type_sum.ab <- function(x) {
  "ab"
}

#' @importFrom pillar pillar_shaft
#' @export
pillar_shaft.ab <- function(x, ...) {
  out <- format(x)
  out[is.na(x)] <- pillar::style_na("NA")
  pillar::new_pillar_shaft_simple(out, align = "left", min_width = 4)
}
new antibiotics 2019-05-10 16:44:59 +02:00			`# ==================================================================== #`
			`# TITLE #`
			`# Antimicrobial Resistance (AMR) Analysis #`
			`# #`
			`# SOURCE #`
			`# https://gitlab.com/msberends/AMR #`
			`# #`
			`# LICENCE #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# (c) 2018-2020 Berends MS, Luz CF et al. #`
new antibiotics 2019-05-10 16:44:59 +02:00			`# #`
			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
			`# #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
new antibiotics 2019-05-10 16:44:59 +02:00			`# Visit our website for more info: https://msberends.gitlab.io/AMR. #`
			`# ==================================================================== #`

			`#' Transform to antibiotic ID`
			`#'`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' Use this function to determine the antibiotic code of one or more antibiotics. The data set [antibiotics] will be searched for abbreviations, official names and synonyms (brand names).`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`#' @inheritSection lifecycle Maturing lifecycle`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @param x character vector to determine to antibiotic ID`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`#' @param ... arguments passed on to internal functions`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @rdname as.ab`
			`#' @inheritSection WHOCC WHOCC`
			`#' @importFrom dplyr %>% filter slice pull`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			#' @details All entries in the [antibiotics] data set have three different identifiers: a human readable EARS-Net code (column `ab`, used by ECDC and WHONET), an ATC code (column `atc`, used by WHO), and a CID code (column `cid`, Compound ID, used by PubChem). The data set contains more than 5,000 official brand names from many different countries, as found in PubChem.
CI tests 2019-05-13 10:10:16 +02:00			`#'`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples.`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @section Source:`
			`#' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/}`
			`#'`
			`#' WHONET 2019 software: \url{http://www.whonet.org/software.html}`
			`#'`
			`#' European Commission Public Health PHARMACEUTICALS - COMMUNITY REGISTER: \url{http://ec.europa.eu/health/documents/community-register/html/atc.htm}`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' @aliases ab`
			#' @return Character (vector) with class [`ab`]. Unknown values will return `NA`.
			`#' @seealso [antibiotics] for the dataframe that is being used to determine ATCs.`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @inheritSection AMR Read more on our website!`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' @export`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @examples`
(v0.9.0.9009) antibiotics data set update 2020-01-08 11:30:33 +01:00			`#' # these examples all return "ERY", the ID of erythromycin:`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' as.ab("J01FA01")`
			`#' as.ab("J 01 FA 01")`
			`#' as.ab("Erythromycin")`
			`#' as.ab("eryt")`
			`#' as.ab(" eryt 123")`
			`#' as.ab("ERYT")`
			`#' as.ab("ERY")`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`#' as.ab("eritromicine") # spelled wrong, yet works`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' as.ab("Erythrocin") # trade name`
			`#' as.ab("Romycin") # trade name`
(v0.9.0.9009) antibiotics data set update 2020-01-08 11:30:33 +01:00			`#'`
			`#' # spelling from different languages and dyslexia are no problem`
			`#' ab_atc("ceftriaxon")`
			`#' ab_atc("cephtriaxone")`
			`#' ab_atc("cephthriaxone")`
			`#' ab_atc("seephthriaaksone")`
new antibiotics 2019-05-10 16:44:59 +02:00			`#'`
(v0.9.0.9009) antibiotics data set update 2020-01-08 11:30:33 +01:00			`#' # use ab_* functions to get a specific properties (see ?ab_property);`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' # they use as.ab() internally:`
			`#' ab_name("J01FA01") # "Erythromycin"`
			`#' ab_name("eryt") # "Erythromycin"`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`as.ab <- function(x, ...) {`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (is.ab(x)) {`
			`return(x)`
			`}`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00
			`if (all(toupper(x) %in% AMR::antibiotics$ab)) {`
			`# valid AB code, but not yet right class`
			`return(structure(.Data = toupper(x),`
			`class = "ab"))`
			`}`

new antibiotics 2019-05-10 16:44:59 +02:00			`x_bak <- x`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`# remove diacritics`
			`x <- iconv(x, from = "UTF-8", to = "ASCII//TRANSLIT")`
			`x <- gsub('"', "", x, fixed = TRUE)`
new antibiotics 2019-05-10 16:44:59 +02:00			`# remove suffices`
(v0.7.0.9005) ab algorithm update 2019-06-11 14:18:25 +02:00			`x_bak_clean <- gsub("_(mic\|rsi\|dis[ck])$", "", x, ignore.case = TRUE)`
new antibiotics 2019-05-10 16:44:59 +02:00			`# remove disk concentrations, like LVX_NM -> LVX`
(v0.8.0.9021) update vignettes 2019-11-09 11:33:22 +01:00			`x_bak_clean <- gsub("_[A-Z]{2}[0-9_.]{0,3}$", "", x_bak_clean, ignore.case = TRUE)`
(v0.7.0.9005) ab algorithm update 2019-06-11 14:18:25 +02:00			`# remove part between brackets if that's followed by another string`
			`x_bak_clean <- gsub("(.)+ [(].[)]", "\\1", x_bak_clean)`
			`# keep only max 1 space`
			`x_bak_clean <- trimws(gsub(" +", " ", x_bak_clean, ignore.case = TRUE))`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`# non-character, space or number should be a slash`
(v0.7.1.9093) as.ab() fix 2019-10-06 21:07:38 +02:00			`x_bak_clean <- gsub("[^A-Za-z0-9 -]", "/", x_bak_clean)`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`# spaces around non-characters must be removed: amox + clav -> amox/clav`
			`x_bak_clean <- gsub("(.[a-zA-Z0-9]) ([^a-zA-Z0-9].)", "\\1\\2", x_bak_clean)`
			`x_bak_clean <- gsub("(.[^a-zA-Z0-9]) ([a-zA-Z0-9].)", "\\1\\2", x_bak_clean)`
(v0.9.0.9009) antibiotics data set update 2020-01-08 11:30:33 +01:00			`# remove hyphen after a starting "co"`
			`x_bak_clean <- gsub("^co-", "co", x_bak_clean, ignore.case = TRUE)`
			`# replace text 'and' with a slash`
			`x_bak_clean <- gsub(" and ", "/", x_bak_clean, ignore.case = TRUE)`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00
new antibiotics 2019-05-10 16:44:59 +02:00			`x <- unique(x_bak_clean)`
			`x_new <- rep(NA_character_, length(x))`
			`x_unknown <- character(0)`

(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`for (i in seq_len(length(x))) {`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (is.na(x[i]) \| is.null(x[i])) {`
			`next`
			`}`
			`if (identical(x[i], "")) {`
			`x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])`
			`next`
			`}`
(v0.7.0.9001) new pathovars, small fixes 2019-06-07 22:47:37 +02:00			`# prevent "bacteria" from coercing to TMP, since Bacterial is a brand name of it`
			`if (identical(tolower(x[i]), "bacteria")) {`
			`x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])`
			`next`
			`}`
new antibiotics 2019-05-10 16:44:59 +02:00
			`# exact AB code`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(AMR::antibiotics$ab == toupper(x[i])), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# exact ATC code`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(AMR::antibiotics$atc == toupper(x[i])), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# exact CID code`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(AMR::antibiotics$cid == x[i]), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# exact name`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(toupper(AMR::antibiotics$name) == toupper(x[i])), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# exact synonym`
			`synonym_found <- unlist(lapply(AMR::antibiotics$synonyms,`
			`function(s) if (toupper(x[i]) %in% toupper(s)) {`
			`TRUE`
			`} else {`
			`FALSE`
			`}))`
			`found <- AMR::antibiotics$ab[synonym_found == TRUE]`
			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# exact abbreviation`
			`abbr_found <- unlist(lapply(AMR::antibiotics$abbreviations,`
			`function(a) if (toupper(x[i]) %in% toupper(a)) {`
			`TRUE`
			`} else {`
			`FALSE`
			`}))`
			`found <- AMR::antibiotics$ab[abbr_found == TRUE]`
			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

			`# first >=4 characters of name`
			`if (nchar(x[i]) >= 4) {`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(toupper(AMR::antibiotics$name) %like% paste0("^", x[i])), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`
			`}`

			`# allow characters that resemble others, but only continue when having more than 3 characters`
			`if (nchar(x[i]) <= 3) {`
			`x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])`
			`next`
			`}`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`x_spelling <- tolower(x[i])`
			`x_spelling <- gsub("[iy]+", "[iy]+", x_spelling)`
(v0.7.0.9001) new pathovars, small fixes 2019-06-07 22:47:37 +02:00			`x_spelling <- gsub("(c\|k\|q\|qu\|s\|z\|x\|ks)+", "(c\|k\|q\|qu\|s\|z\|x\|ks)+", x_spelling)`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`x_spelling <- gsub("(ph\|f\|v)+", "(ph\|f\|v)+", x_spelling)`
			`x_spelling <- gsub("(th\|t)+", "(th\|t)+", x_spelling)`
			`x_spelling <- gsub("a+", "a+", x_spelling)`
			`x_spelling <- gsub("e+", "e+", x_spelling)`
			`x_spelling <- gsub("o+", "o+", x_spelling)`
new antibiotics 2019-05-10 16:44:59 +02:00			`# allow any ending of -in/-ine and -im/-ime`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`x_spelling <- gsub("(\\[iy\\]\\+(n\|m)\|\\[iy\\]\\+(n\|m)e\\+)$", "[iy]+(n\|m)e*", x_spelling)`
new antibiotics 2019-05-10 16:44:59 +02:00			`# allow any ending of -ol/-ole`
ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`x_spelling <- gsub("(o\\+l\|o\\+le\\+)$", "o+le*", x_spelling)`
			`# allow any ending of -on/-one`
			`x_spelling <- gsub("(o\\+n\|o\\+ne\\+)$", "o+ne*", x_spelling)`
(v0.7.0.9001) new pathovars, small fixes 2019-06-07 22:47:37 +02:00			`# replace multiple same characters to single one with '+', like "ll" -> "l+"`
			`x_spelling <- gsub("(.)\\1+", "\\1+", x_spelling)`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00
new antibiotics 2019-05-10 16:44:59 +02:00			`# try if name starts with it`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`found <- AMR::antibiotics[which(AMR::antibiotics$name %like% paste0("^", x_spelling)), ]$ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`
			`# and try if any synonym starts with it`
			`synonym_found <- unlist(lapply(AMR::antibiotics$synonyms,`
			`function(s) if (any(s %like% paste0("^", x_spelling))) {`
			`TRUE`
			`} else {`
			`FALSE`
			`}))`
			`found <- AMR::antibiotics$ab[synonym_found == TRUE]`
			`if (length(found) > 0) {`
			`x_new[i] <- found[1L]`
			`next`
			`}`

(v0.7.0.9005) ab algorithm update 2019-06-11 14:18:25 +02:00			`# try by removing all spaces`
			`if (x[i] %like% " ") {`
			`found <- suppressWarnings(as.ab(gsub(" +", "", x[i])))`
(v0.7.0.9006) ab algorithm fix for ATC codes 2019-06-11 15:31:32 +02:00			`if (length(found) > 0 & !is.na(found)) {`
(v0.7.0.9005) ab algorithm update 2019-06-11 14:18:25 +02:00			`x_new[i] <- found[1L]`
			`next`
			`}`
			`}`

			`# try by removing all spaces and numbers`
			`if (x[i] %like% " " \| x[i] %like% "[0-9]") {`
			`found <- suppressWarnings(as.ab(gsub("[ 0-9]", "", x[i])))`
(v0.7.0.9006) ab algorithm fix for ATC codes 2019-06-11 15:31:32 +02:00			`if (length(found) > 0 & !is.na(found)) {`
(v0.7.0.9005) ab algorithm update 2019-06-11 14:18:25 +02:00			`x_new[i] <- found[1L]`
			`next`
			`}`
			`}`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00
			`if (!isFALSE(list(...)$initial_search)) {`
			`# transform back from other languages and try again`
			`x_translated <- paste(lapply(strsplit(x[i], "[^a-zA-Z0-9 ]"),`
			`function(y) {`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`for (i in seq_len(length(y))) {`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`y[i] <- ifelse(tolower(y[i]) %in% tolower(translations_file$replacement),`
			`translations_file[which(tolower(translations_file$replacement) == tolower(y[i]) &`
			`!isFALSE(translations_file$fixed)), "pattern"],`
			`y[i])`
			`}`
			`y`
			`})[[1]],`
			`collapse = "/")`
			`x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))`
			`if (!is.na(x_translated_guess)) {`
			`x_new[i] <- x_translated_guess`
			`next`
			`}`
(v0.7.1.9093) as.ab() fix 2019-10-06 21:07:38 +02:00
			`if (!isFALSE(list(...)$initial_search2)) {`
			`# now also try to coerce brandname combinations like "Amoxy/clavulanic acid"`
			`x_translated <- paste(lapply(strsplit(x_translated, "[^a-zA-Z0-9 ]"),`
			`function(y) {`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`for (i in seq_len(length(y))) {`
(v0.7.1.9093) as.ab() fix 2019-10-06 21:07:38 +02:00			`y_name <- suppressWarnings(ab_name(y[i], language = NULL, initial_search = FALSE, initial_search2 = FALSE))`
			`y[i] <- ifelse(!is.na(y_name),`
			`y_name,`
			`y[i])`
			`}`
			`y`
			`})[[1]],`
			`collapse = "/")`
			`x_translated_guess <- suppressWarnings(as.ab(x_translated, initial_search = FALSE))`
			`if (!is.na(x_translated_guess)) {`
			`x_new[i] <- x_translated_guess`
			`next`
			`}`
(v0.7.1.9092) as.ab() improvements 2019-10-04 15:36:12 +02:00			`}`
			`}`

new antibiotics 2019-05-10 16:44:59 +02:00			`# not found`
			`x_unknown <- c(x_unknown, x_bak[x[i] == x_bak_clean][1])`
			`}`

(v0.7.0.9006) ab algorithm fix for ATC codes 2019-06-11 15:31:32 +02:00			`# take failed ATC codes apart from rest`
			`x_unknown_ATCs <- x_unknown[x_unknown %like% "[A-Z][0-9][0-9][A-Z][A-Z][0-9][0-9]"]`
			`x_unknown <- x_unknown[!x_unknown %in% x_unknown_ATCs]`
			`if (length(x_unknown_ATCs) > 0) {`
			`warning("These ATC codes are not (yet) in the antibiotics data set: ",`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`paste('"', sort(unique(x_unknown_ATCs)), '"', sep = "", collapse = ", "),`
(v0.7.0.9006) ab algorithm fix for ATC codes 2019-06-11 15:31:32 +02:00			`".",`
			`call. = FALSE)`
			`}`

new antibiotics 2019-05-10 16:44:59 +02:00			`if (length(x_unknown) > 0) {`
(v0.7.1.9035) cephalosporins and unit tests 2019-08-11 19:07:26 +02:00			`warning("These values could not be coerced to a valid antimicrobial ID: ",`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`paste('"', sort(unique(x_unknown)), '"', sep = "", collapse = ", "),`
new antibiotics 2019-05-10 16:44:59 +02:00			`".",`
			`call. = FALSE)`
			`}`

			`x_result <- data.frame(x = x_bak_clean, stringsAsFactors = FALSE) %>%`
			`left_join(data.frame(x = x, x_new = x_new, stringsAsFactors = FALSE), by = "x") %>%`
			`pull(x_new)`

ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`if (length(x_result) == 0) {`
			`x_result <- NA_character_`
			`}`

new antibiotics 2019-05-10 16:44:59 +02:00			`structure(.Data = x_result,`
			`class = "ab")`
			`}`

ab_info, other bug fixes 2019-05-16 21:20:00 +02:00			`#' @rdname as.ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @export`
			`is.ab <- function(x) {`
			`identical(class(x), "ab")`
			`}`

			`#' @exportMethod print.ab`
			`#' @export`
			`#' @noRd`
			`print.ab <- function(x, ...) {`
			`cat("Class 'ab'\n")`
(v0.7.1.9027) tibble printing 2019-08-07 15:37:39 +02:00			`print(as.character(x), quote = FALSE)`
new antibiotics 2019-05-10 16:44:59 +02:00			`}`

			`#' @exportMethod as.data.frame.ab`
			`#' @export`
			`#' @noRd`
(v0.7.1.9102) lintr 2019-10-11 17:21:02 +02:00			`as.data.frame.ab <- function(x, ...) {`
new antibiotics 2019-05-10 16:44:59 +02:00			`# same as as.data.frame.character but with removed stringsAsFactors`
			`nm <- paste(deparse(substitute(x), width.cutoff = 500L),`
			`collapse = " ")`
			`if (!"nm" %in% names(list(...))) {`
			`as.data.frame.vector(x, ..., nm = nm)`
			`} else {`
			`as.data.frame.vector(x, ...)`
			`}`
			`}`

(v0.7.1.9036) preserve ab/mo classes in subsetting 2019-08-12 14:48:09 +02:00			`#' @exportMethod [.ab`
new antibiotics 2019-05-10 16:44:59 +02:00			`#' @export`
			`#' @noRd`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`"[.ab" <- function(x, ...) {`
(v0.7.1.9036) preserve ab/mo classes in subsetting 2019-08-12 14:48:09 +02:00			`y <- NextMethod()`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`attributes(y) <- attributes(x)`
			`y`
			`}`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`#' @exportMethod [[.ab`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`#' @export`
			`#' @noRd`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`"[[.ab" <- function(x, ...) {`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`y <- NextMethod()`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`attributes(y) <- attributes(x)`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`y`
			`}`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`#' @exportMethod [<-.ab`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`#' @export`
			`#' @noRd`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`"[<-.ab" <- function(i, j, ..., value) {`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`y <- NextMethod()`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`attributes(y) <- attributes(i)`
			`class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`}`
			`#' @exportMethod [[<-.ab`
			`#' @export`
			`#' @noRd`
			`"[[<-.ab" <- function(i, j, ..., value) {`
			`y <- NextMethod()`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`attributes(y) <- attributes(i)`
			`class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)`
(v0.7.1.9056) mo and ab subsetting 2019-08-14 14:57:06 +02:00			`}`
			`#' @exportMethod c.ab`
			`#' @export`
			`#' @noRd`
			`c.ab <- function(x, ...) {`
			`y <- NextMethod()`
			`attributes(y) <- attributes(x)`
(v0.7.1.9062) mo/ab assignment improvements 2019-08-26 16:02:03 +02:00			`class_integrity_check(y, "antimicrobial code", AMR::antibiotics$ab)`
new antibiotics 2019-05-10 16:44:59 +02:00			`}`
(v0.7.1.9027) tibble printing 2019-08-07 15:37:39 +02:00
			`#' @importFrom pillar type_sum`
			`#' @export`
			`type_sum.ab <- function(x) {`
			`"ab"`
			`}`

			`#' @importFrom pillar pillar_shaft`
			`#' @export`
			`pillar_shaft.ab <- function(x, ...) {`
			`out <- format(x)`
(v0.7.1.9031) include_unknown for first_isolate() 2019-08-08 22:39:42 +02:00			`out[is.na(x)] <- pillar::style_na("NA")`
(v0.7.1.9027) tibble printing 2019-08-07 15:37:39 +02:00			`pillar::new_pillar_shaft_simple(out, align = "left", min_width = 4)`
			`}`