ab_* functions, mo_* functions, 180 new microorganisms, speed improvement for bactid

2025-11-25 00:51:09 +01:00 · 2018-08-28 13:51:13 +02:00
parent 2acdb1981c
commit 972fc4f6c7
24 changed files with 571 additions and 306 deletions
--- a/R/ab_property.R
+++ b/R/ab_property.R
@@ -0,0 +1,84 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# AUTHORS                                                              #
+# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
+#                                                                      #
+# LICENCE                                                              #
+# This program is free software; you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License version 2.0,    #
+# as published by the Free Software Foundation.                        #
+#                                                                      #
+# This program is distributed in the hope that it will be useful,      #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
+# GNU General Public License for more details.                         #
+# ==================================================================== #
+
+#' Property of an antibiotic
+#'
+#' Use these functions to return a specific property of an antibiotic from the \code{\link{antibiotics}} data set, based on their ATC code. Get such a code with \code{\link{as.atc}}.
+#' @param x a (vector of a) valid \code{\link{atc}} code or any text that can be coerced to a valid atc with \code{\link{as.atc}}
+#' @param property one of the column names of one of the \code{\link{antibiotics}} data set, like \code{"atc"} and \code{"official"}
+#' @rdname ab_property
+#' @export
+#' @importFrom dplyr %>% left_join pull
+#' @seealso \code{\link{antibiotics}}
+#' @examples
+#' ab_atc("amcl")         # J01CR02
+#' ab_official("amcl")    # Amoxicillin and beta-lactamase inhibitor
+#' ab_official_nl("amcl") # Amoxicilline met enzymremmer
+#' ab_trivial_nl("amcl")  # Amoxicilline/clavulaanzuur
+#' ab_certe("amcl")       # amcl
+#' ab_umcg("amcl")        # AMCL
+ab_property <- function(x, property = 'official') {
+  property <- property[1]
+  if (!property %in% colnames(antibiotics)) {
+    stop("invalid property: ", property, " - use a column name of `antibiotics`")
+  }
+  if (!is.atc(x)) {
+    x <- as.atc(x) # this will give a warning if x cannot be coerced
+  }
+  suppressWarnings(
+    data.frame(atc = x, stringsAsFactors = FALSE) %>%
+      left_join(AMR::antibiotics, by = "atc") %>%
+      pull(property)
+  )
+}
+
+#' @rdname ab_property
+#' @export
+ab_atc <- function(x) {
+  as.character(as.atc(x))
+}
+
+#' @rdname ab_property
+#' @export
+ab_official <- function(x) {
+  ab_property(x, "official")
+}
+
+#' @rdname ab_property
+#' @export
+ab_official_nl <- function(x) {
+  ab_property(x, "official_nl")
+}
+
+#' @rdname ab_property
+#' @export
+ab_trivial_nl <- function(x) {
+  ab_property(x, "trivial_nl")
+}
+
+#' @rdname ab_property
+#' @export
+ab_certe <- function(x) {
+  ab_property(x, "certe")
+}
+
+#' @rdname ab_property
+#' @export
+ab_umcg <- function(x) {
+  ab_property(x, "umcg")
+}
--- a/R/atc.R
+++ b/R/atc.R
@@ -26,7 +26,9 @@
 #' @keywords atc
 #' @export
 #' @importFrom dplyr %>% filter slice pull
-#' @details In the ATC classification system, the active substances are classified in a hierarchy with five different levels.  The system has fourteen main anatomical/pharmacological groups or 1st levels. Each ATC main group is divided into 2nd levels which could be either pharmacological or therapeutic groups.  The 3rd and 4th levels are chemical, pharmacological or therapeutic subgroups and the 5th level is the chemical substance.  The 2nd, 3rd and 4th levels are often used to identify pharmacological subgroups when that is considered more appropriate than therapeutic or chemical subgroups.
+#' @details Use the \code{\link{ab_property}} functions to get properties based on the returned ATC code, see Examples.
+#'
+#' In the ATC classification system, the active substances are classified in a hierarchy with five different levels.  The system has fourteen main anatomical/pharmacological groups or 1st levels. Each ATC main group is divided into 2nd levels which could be either pharmacological or therapeutic groups.  The 3rd and 4th levels are chemical, pharmacological or therapeutic subgroups and the 5th level is the chemical substance.  The 2nd, 3rd and 4th levels are often used to identify pharmacological subgroups when that is considered more appropriate than therapeutic or chemical subgroups.
 #'   Source: \url{https://www.whocc.no/atc/structure_and_principles/}
 #' @return Character (vector) with class \code{"act"}. Unknown values will return \code{NA}.
 #' @seealso \code{\link{antibiotics}} for the dataframe that is being used to determine ATC's.
@@ -40,6 +42,11 @@
 #' as.atc("Erythrocin") # Trade name
 #' as.atc("Eryzole")    # Trade name
 #' as.atc("Pediamycin") # Trade name
+#'
+#' # Use ab_* functions to get a specific property based on an ATC code
+#' Cipro <- as.atc("cipro") # returns `J01MA02`
+#' ab_official(Cipro)       # returns "Ciprofloxacin"
+#' ab_umcg(Cipro)           # returns "CIPR", the code used in the UMCG
 as.atc <- function(x) {

  x.new <- rep(NA_character_, length(x))
@@ -90,6 +97,15 @@ as.atc <- function(x) {
      x.new[is.na(x.new) & x.bak == x[i]] <- found[1L]
    }

+    # nothing helped, try first chars of official name, but only if nchar > 4 (cipro, nitro, fosfo)
+    if (nchar(x[i]) > 4) {
+      found <- AMR::antibiotics[which(AMR::antibiotics$official %like% paste0("^", substr(x[i], 1, 5))),]$atc
+      if (length(found) > 0) {
+        fail <- FALSE
+        x.new[is.na(x.new) & x.bak == x[i]] <- found[1L]
+      }
+    }
+
    # not found
    if (fail == TRUE) {
      failures <- c(failures, x[i])
@@ -149,52 +165,6 @@ pull.atc <- function(.data, ...) {
  pull(as.data.frame(.data), ...)
 }

-atc_get_property <- function(atc, param) {
-  if (!is.atc(atc)) {
-    atc <- as.atc(atc)
-  }
-  suppressWarnings(
-    data.frame(atc = atc, stringsAsFactors = FALSE) %>%
-      left_join(AMR::antibiotics, by = "atc") %>%
-      pull(param)
-  )
-}
-
-#' Get antibiotic property based on ATC
-#'
-#' Use these functions to return a specific property of an antibiotic from the \code{\link{antibiotics}} data set, based on their ATC code.
-#' @param atc a valid ATC code, created with \code{\link{as.atc}}
-#' @rdname atc.property
-#' @name atc.property
-#' @export
-atc.official <- function(atc) {
-  atc_get_property(atc, "official")
-}
-
-#' @rdname atc.property
-#' @export
-atc.official_nl <- function(atc) {
-  atc_get_property(atc, "official_nl")
-}
-
-#' @rdname atc.property
-#' @export
-atc.trivial_nl <- function(atc) {
-  atc_get_property(atc, "trivial_nl")
-}
-
-#' @rdname atc.property
-#' @export
-atc.certe <- function(atc) {
-  atc_get_property(atc, "certe")
-}
-
-#' @rdname atc.property
-#' @export
-atc.umcg <- function(atc) {
-  atc_get_property(atc, "umcg")
-}
-
 #' Properties of an ATC code
 #'
 #' Gets data from the WHO to determine properties of an ATC (e.g. an antibiotic) like name, defined daily dose (DDD) or standard unit. \cr \strong{This function requires an internet connection.}
--- a/R/bactid.R
+++ b/R/bactid.R
@@ -23,9 +23,12 @@
 #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".
 #' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. Groups D and E will be ignored, since they are \emph{Enterococci}.
 #' @rdname as.bactid
+#' @aliases bactid
 #' @keywords bactid Becker becker Lancefield lancefield guess
 #' @details \code{guess_bactid} is an alias of \code{as.bactid}.
 #'
+#' Use the \code{\link{mo_property}} functions to get properties based on the returned bactid, see Examples.
+#'
 #' Some exceptions have been built in to get more logical results, based on prevalence of human pathogens. These are:
 #' \itemize{
 #'   \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
@@ -41,7 +44,7 @@
 #' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \cr
 #'     \url{https://dx.doi.org/10.1084/jem.57.4.571}
 #' @export
-#' @importFrom dplyr %>% filter pull
+#' @importFrom dplyr %>% pull left_join
 #' @return Character (vector) with class \code{"bactid"}. Unknown values will return \code{NA}.
 #' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
 #' @examples
@@ -56,12 +59,18 @@
 #' as.bactid("VISA") # Vancomycin Intermediate S. aureus
 #' as.bactid("VRSA") # Vancomycin Resistant S. aureus
 #'
+#' # guess_bactid is an alias of as.bactid and works the same
 #' guess_bactid("S. epidermidis")                 # will remain species: STAEPI
 #' guess_bactid("S. epidermidis", Becker = TRUE)  # will not remain species: STACNS
 #'
 #' guess_bactid("S. pyogenes")                    # will remain species: STCAGA
 #' guess_bactid("S. pyogenes", Lancefield = TRUE) # will not remain species: STCGRA
 #'
+#' # Use mo_* functions to get a specific property based on a bactid
+#' Ecoli <- as.bactid("E. coli") # returns `ESCCOL`
+#' mo_genus(Ecoli)               # returns "Escherichia"
+#' mo_gramstain(Ecoli)           # returns "Negative rods"
+#'
 #' \dontrun{
 #' df$bactid <- as.bactid(df$microorganism_name)
 #'
@@ -82,7 +91,6 @@
 #' }
 as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {

-  failures <- character(0)

  if (NCOL(x) == 2) {
    # support tidyverse selection like: df %>% select(colA, colB)
@@ -103,12 +111,19 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
    }
  }

-  x.fullbackup <- x
+  MOs <- AMR::microorganisms %>% filter(!bactid %like% '^_FAM') # dont search in those
+  failures <- character(0)
+  x_input <- x
+
+  # only check the uniques, which is way faster
+  x <- unique(x)
+
+  x_backup <- x
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z0-9 ]+", "", x)
  # but spaces before and after should be omitted
  x <- trimws(x, which = "both")
-  x.backup <- x
+  x_trimmed <- x
  # replace space by regex sign
  x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE)
  x <- gsub(" ", ".*", x, fixed = TRUE)
@@ -121,7 +136,7 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
  for (i in 1:length(x)) {

    if (Becker == TRUE | Becker == "all") {
-      mo <- suppressWarnings(guess_bactid(x.fullbackup[i]))
+      mo <- suppressWarnings(guess_bactid(x_backup[i]))
      if (mo %like% '^STA') {
        # See Source. It's this figure:
        # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4187637/figure/F3/
@@ -152,7 +167,7 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
    }

    if (Lancefield == TRUE) {
-      mo <- suppressWarnings(guess_bactid(x.fullbackup[i]))
+      mo <- suppressWarnings(guess_bactid(x_backup[i]))
      if (mo %like% '^STC') {
        # See Source
        species <- left_join_microorganisms(mo)$species
@@ -184,20 +199,20 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
      }
    }

-    if (identical(x.backup[i], "")) {
+    if (identical(x_trimmed[i], "")) {
      # empty values
      x[i] <- NA
-      failures <- c(failures, x.fullbackup[i])
+      failures <- c(failures, x_backup[i])
      next
    }
-    if (x.fullbackup[i] %in% AMR::microorganisms$bactid) {
+    if (x_backup[i] %in% MOs$bactid) {
      # is already a valid bactid
-      x[i] <- x.fullbackup[i]
+      x[i] <- x_backup[i]
      next
    }
-    if (x.backup[i] %in% AMR::microorganisms$bactid) {
+    if (x_trimmed[i] %in% MOs$bactid) {
      # is already a valid bactid
-      x[i] <- x.backup[i]
+      x[i] <- x_trimmed[i]
      next
    }

@@ -232,27 +247,27 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
    }

    # translate known trivial names to genus+species
-    if (!is.na(x.backup[i])) {
-      if (toupper(x.backup[i]) == 'MRSA'
-          | toupper(x.backup[i]) == 'VISA'
-          | toupper(x.backup[i]) == 'VRSA') {
+    if (!is.na(x_trimmed[i])) {
+      if (toupper(x_trimmed[i]) == 'MRSA'
+          | toupper(x_trimmed[i]) == 'VISA'
+          | toupper(x_trimmed[i]) == 'VRSA') {
        x[i] <- 'STAAUR'
        next
      }
-      if (toupper(x.backup[i]) == 'MRSE') {
+      if (toupper(x_trimmed[i]) == 'MRSE') {
        x[i] <- 'STAEPI'
        next
      }
-      if (toupper(x.backup[i]) == 'VRE') {
+      if (toupper(x_trimmed[i]) == 'VRE') {
        x[i] <- 'ENC'
        next
      }
-      if (toupper(x.backup[i]) == 'MRPA') {
+      if (toupper(x_trimmed[i]) == 'MRPA') {
        # multi resistant P. aeruginosa
        x[i] <- 'PSEAER'
        next
      }
-      if (toupper(x.backup[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
+      if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) {
        # peni R, peni I, vanco I, vanco R: S. pneumoniae
        x[i] <- 'STCPNE'
        next
@@ -260,14 +275,14 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
    }

    # try any match keeping spaces
-    found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x_withspaces[i]),]$bactid
+    found <- MOs[which(MOs$fullname %like% x_withspaces[i]),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }

    # try any match diregarding spaces
-    found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x[i]),]$bactid
+    found <- MOs[which(MOs$fullname %like% x[i]),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
@@ -275,21 +290,21 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {

    # try exact match of only genus, with 'species' attached
    # (this prevents Streptococcus from becoming Peptostreptococcus, since "p" < "s")
-    found <- AMR::microorganisms[which(AMR::microorganisms$fullname == x_species[i]),]$bactid
+    found <- MOs[which(MOs$fullname == x_species[i]),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }

    # try any match of only genus, with 'species' attached
-    found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x_species[i]),]$bactid
+    found <- MOs[which(MOs$fullname %like% x_species[i]),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
    }

    # search for GLIMS code
-    found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$mocode) == toupper(x.backup[i])),]$bactid
+    found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$mocode) == toupper(x_trimmed[i])),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
@@ -298,11 +313,11 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
    # try splitting of characters and then find ID
    # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
    x_split <- x
-    x_length <- nchar(x.backup[i])
-    x_split[i] <- paste0(x.backup[i] %>% substr(1, x_length / 2) %>% trimws(),
+    x_length <- nchar(x_trimmed[i])
+    x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(),
                         '.* ',
-                         x.backup[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-    found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% paste0('^', x_split[i])),]$bactid
+                         x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
+    found <- MOs[which(MOs$fullname %like% paste0('^', x_split[i])),]$bactid
    if (length(found) > 0) {
      x[i] <- found[1L]
      next
@@ -310,13 +325,13 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {

    # try any match with text before and after original search string
    # so "negative rods" will be "GNR"
-    if (x.backup[i] %like% "^Gram") {
-      x.backup[i] <- gsub("^Gram", "", x.backup[i], ignore.case = TRUE)
+    if (x_trimmed[i] %like% "^Gram") {
+      x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE)
      # remove leading and trailing spaces again
-      x.backup[i] <- trimws(x.backup[i], which = "both")
+      x_trimmed[i] <- trimws(x_trimmed[i], which = "both")
    }
-    if (!is.na(x.backup[i])) {
-      found <- AMR::microorganisms[which(AMR::microorganisms$fullname %like% x.backup[i]),]$bactid
+    if (!is.na(x_trimmed[i])) {
+      found <- MOs[which(MOs$fullname %like% x_trimmed[i]),]$bactid
      if (length(found) > 0) {
        x[i] <- found[1L]
        next
@@ -325,7 +340,7 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {

    # not found
    x[i] <- NA_character_
-    failures <- c(failures, x.fullbackup[i])
+    failures <- c(failures, x_backup[i])

  }

@@ -336,6 +351,19 @@ as.bactid <- function(x, Becker = FALSE, Lancefield = FALSE) {
            ".",
            call. = FALSE)
  }
+
+  # left join the found results to the original input values (x_input)
+  df_found <- data.frame(input = as.character(unique(x_input)),
+                         found = x,
+                         stringsAsFactors = FALSE)
+  df_input <- data.frame(input = as.character(x_input),
+                         stringsAsFactors = FALSE)
+
+  x <- df_input %>%
+    left_join(df_found,
+              by = "input") %>%
+    pull(found)
+
  class(x) <- "bactid"
  attr(x, 'package') <- 'AMR'
  x
@@ -380,79 +408,3 @@ as.data.frame.bactid <- function (x, ...) {
 pull.bactid <- function(.data, ...) {
  pull(as.data.frame(.data), ...)
 }
-
-bactid_get_property <- function(bactid, param) {
-  if (!is.bactid(bactid)) {
-    bactid <- as.bactid(bactid)
-  }
-  suppressWarnings(
-    data.frame(bactid = bactid, stringsAsFactors = FALSE) %>%
-      left_join(AMR::microorganisms, by = "bactid") %>%
-      pull(param)
-  )
-}
-
-#' Get microbial property based on `bactid`
-#'
-#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set, based on their \code{bactid}. Get such an ID with \code{\link{as.bactid}}.
-#' @param bactid a valid bactid code, created with \code{\link{as.bactid}}
-#' @rdname bactid.property
-#' @name bactid.property
-#' @export
-bactid.family <- function(bactid) {
-  bactid_get_property(bactid, "family")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.genus <- function(bactid) {
-  bactid_get_property(bactid, "genus")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.species <- function(bactid) {
-  bactid_get_property(bactid, "species")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.subspecies <- function(bactid) {
-  bactid_get_property(bactid, "subspecies")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.fullname <- function(bactid) {
-  bactid_get_property(bactid, "fullname")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.type <- function(bactid) {
-  bactid_get_property(bactid, "type")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.gramstain <- function(bactid) {
-  bactid_get_property(bactid, "gramstain")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.aerobic <- function(bactid) {
-  bactid_get_property(bactid, "aerobic")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.type_nl <- function(bactid) {
-  bactid_get_property(bactid, "type_nl")
-}
-
-#' @rdname bactid.property
-#' @export
-bactid.gramstain_nl <- function(bactid) {
-  bactid_get_property(bactid, "gramstain_nl")
-}
--- a/R/data.R
+++ b/R/data.R
@@ -24,11 +24,11 @@
 #'   \item{\code{atc}}{ATC code, like \code{J01CR02}}
 #'   \item{\code{certe}}{Certe code, like \code{amcl}}
 #'   \item{\code{umcg}}{UMCG code, like \code{AMCL}}
-#'   \item{\code{abbr}}{Abbreviation as used by many countries, to be used for \code{\link{guess_atc}}}
-#'   \item{\code{official}}{Official name by the WHO, like \code{"Amoxicillin and enzyme inhibitor"}}
+#'   \item{\code{abbr}}{Abbreviation as used by many countries, used internally by \code{\link{as.atc}}}
+#'   \item{\code{official}}{Official name by the WHO, like \code{"Amoxicillin and beta-lactamase inhibitor"}}
 #'   \item{\code{official_nl}}{Official name in the Netherlands, like \code{"Amoxicilline met enzymremmer"}}
 #'   \item{\code{trivial_nl}}{Trivial name in Dutch, like \code{"Amoxicilline/clavulaanzuur"}}
-#'   \item{\code{trade_name}}{Trade name as used by many countries, to be used for \code{\link{guess_atc}}}
+#'   \item{\code{trade_name}}{Trade name as used by many countries, used internally by \code{\link{as.atc}}}
 #'   \item{\code{oral_ddd}}{Defined Daily Dose (DDD), oral treatment}
 #'   \item{\code{oral_units}}{Units of \code{ddd_units}}
 #'   \item{\code{iv_ddd}}{Defined Daily Dose (DDD), parenteral treatment}
@@ -252,11 +252,25 @@
 #            )
 #   )
 "antibiotics"
+antibiotics_add_to_property <- function(antibiotics, atc, property, value) {
+  if (length(atc) > 1L) {
+    stop("only one atc at a time")
+  }
+  if (!property %in% c("abbr", "trade_name")) {
+    stop("only possible for abbr and trade_name")
+  }
+  if (atc %in% antibiotics$atc) {
+    current <- antibiotics[which(antibiotics$atc == atc), property]
+    antibiotics[which(antibiotics$atc == atc), property] <- paste(current, value, sep = "|")
+    message("done")
+  }
+  antibiotics
+}

-#' Dataset with ~2500 microorganisms
+#' Dataset with ~2650 microorganisms
 #'
-#' A dataset containing 2464 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
-#' @format A data.frame with 2464 observations and 12 variables:
+#' A dataset containing 2,646 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
+#' @format A data.frame with 2,646 observations and 12 variables:
 #' \describe{
 #'   \item{\code{bactid}}{ID of microorganism}
 #'   \item{\code{bactsys}}{Bactsyscode of microorganism}
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -0,0 +1,139 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# AUTHORS                                                              #
+# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
+#                                                                      #
+# LICENCE                                                              #
+# This program is free software; you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License version 2.0,    #
+# as published by the Free Software Foundation.                        #
+#                                                                      #
+# This program is distributed in the hope that it will be useful,      #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
+# GNU General Public License for more details.                         #
+# ==================================================================== #
+
+#' Property of a microorganism
+#'
+#' Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set, based on their \code{bactid}. Get such an ID with \code{\link{as.bactid}}.
+#' @param x a (vector of a) valid \code{\link{bactid}} or any text that can be coerced to a valid bactid with \code{\link{as.bactid}}
+#' @param property one of the column names of one of the \code{\link{microorganisms}} data set, like \code{"bactid"}, \code{"bactsys"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"fullname"}, \code{"gramstain"} and \code{"aerobic"}
+#' @rdname mo_property
+#' @export
+#' @importFrom dplyr %>% left_join pull
+#' @seealso \code{\link{microorganisms}}
+#' @examples
+#' # All properties
+#' mo_family("E. coli")       # Enterobacteriaceae
+#' mo_genus("E. coli")        # Escherichia
+#' mo_species("E. coli")      # coli
+#' mo_subspecies("E. coli")   # <NA>
+#' mo_fullname("E. coli")     # Escherichia coli
+#' mo_type("E. coli")         # Bacteria
+#' mo_gramstain("E. coli")    # Negative rods
+#' mo_aerobic("E. coli")      # TRUE
+#' mo_type_nl("E. coli")      # Bacterie
+#' mo_gramstain_nl("E. coli") # Negatieve staven
+#'
+#'
+#' # Abbreviations known in the field
+#' mo_genus("EHEC")           # Escherichia
+#' mo_species("EHEC")         # coli
+#' mo_subspecies("EHEC")      # EHEC
+#' mo_fullname("EHEC")        # Escherichia coli (EHEC)
+#'
+#' mo_genus("MRSA")           # Staphylococcus
+#' mo_species("MRSA")         # aureus
+#' mo_gramstain("MRSA")       # Positive cocci
+#'
+#' mo_genus("VISA")           # Staphylococcus
+#' mo_species("VISA")         # aureus
+#'
+#'
+#' # Known subspecies
+#' mo_genus("doylei")         # Campylobacter
+#' mo_species("doylei")       # jejuni
+#' mo_fullname("doylei")      # Campylobacter jejuni (doylei)
+#'
+#'
+#' # Anaerobic bacteria
+#' mo_genus("B. fragilis")    # Bacteroides
+#' mo_species("B. fragilis")  # fragilis
+#' mo_aerobic("B. fragilis")  # FALSE
+mo_property <- function(x, property = 'fullname') {
+  property <- property[1]
+  if (!property %in% colnames(microorganisms)) {
+    stop("invalid property: ", property, " - use a column name of `microorganisms`")
+  }
+  if (!is.bactid(x)) {
+    x <- as.bactid(x) # this will give a warning if x cannot be coerced
+  }
+  suppressWarnings(
+    data.frame(bactid = x, stringsAsFactors = FALSE) %>%
+      left_join(AMR::microorganisms, by = "bactid") %>%
+      pull(property)
+  )
+}
+
+#' @rdname mo_property
+#' @export
+mo_family <- function(x) {
+  mo_property(x, "family")
+}
+
+#' @rdname mo_property
+#' @export
+mo_genus <- function(x) {
+  mo_property(x, "genus")
+}
+
+#' @rdname mo_property
+#' @export
+mo_species <- function(x) {
+  mo_property(x, "species")
+}
+
+#' @rdname mo_property
+#' @export
+mo_subspecies <- function(x) {
+  mo_property(x, "subspecies")
+}
+
+#' @rdname mo_property
+#' @export
+mo_fullname <- function(x) {
+  mo_property(x, "fullname")
+}
+
+#' @rdname mo_property
+#' @export
+mo_type <- function(x) {
+  mo_property(x, "type")
+}
+
+#' @rdname mo_property
+#' @export
+mo_gramstain <- function(x) {
+  mo_property(x, "gramstain")
+}
+
+#' @rdname mo_property
+#' @export
+mo_aerobic <- function(x) {
+  mo_property(x, "aerobic")
+}
+
+#' @rdname mo_property
+#' @export
+mo_type_nl <- function(x) {
+  mo_property(x, "type_nl")
+}
+
+#' @rdname mo_property
+#' @export
+mo_gramstain_nl <- function(x) {
+  mo_property(x, "gramstain_nl")
+}