AMR/R/join_microorganisms.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Data Analysis for R                   #
#                                                                      #
# SOURCE                                                               #
# https://github.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2018-2021 Berends MS, Luz CF et al.                              #
# Developed at the University of Groningen, the Netherlands, in        #
# collaboration with non-profit organisations Certe Medical            #
# Diagnostics & Advice, and University Medical Center Groningen.       # 
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
# We created this package for both routine data analysis and academic  #
# research and it was publicly released in the hope that it will be    #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
#                                                                      #
# Visit our website for the full manual and a complete tutorial about  #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
# ==================================================================== #

#' Join [microorganisms] to a Data Set
#'
#' Join the data set [microorganisms] easily to an existing table or character vector.
#' @inheritSection lifecycle Stable Lifecycle
#' @rdname join
#' @name join
#' @aliases join inner_join
#' @param x existing table to join, or character vector
#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (such as `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("bacteria_id" = "fullname")`)
#' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
#' @param ... ignored
#' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. 
#' 
#' If the `dplyr` package is installed, their join functions will be used. Otherwise, the much slower [merge()] function from base R will be used.
#' @inheritSection AMR Read more on Our Website!
#' @export
#' @examples
#' left_join_microorganisms(as.mo("K. pneumoniae"))
#' left_join_microorganisms("B_KLBSL_PNE")
#'
#' \donttest{
#' if (require("dplyr")) {
#'   example_isolates %>%
#'     left_join_microorganisms() %>% 
#'     colnames()
#'  
#'   df <- data.frame(date = seq(from = as.Date("2018-01-01"),
#'                               to = as.Date("2018-01-07"),
#'                               by = 1),
#'                    bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
#'                                       "E. coli", "E. coli", "E. coli")),
#'                    stringsAsFactors = FALSE)
#'   colnames(df)
#'   df_joined <- left_join_microorganisms(df, "bacteria")
#'   colnames(df_joined)
#' }
#' }
inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(suffix, allow_class = "character", has_length = 2)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "inner_join_microorganisms")
  checked <- joins_check_df(x, by)
  x_class <- get_prejoined_class(x)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_inner <- import_fn("inner_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_inner)) {
    join <- suppressWarnings(
      dplyr_inner(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  }
  if (NROW(join) > NROW(x)) {
    warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
  class(join) <- x_class
  join
}

#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(suffix, allow_class = "character", has_length = 2)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "left_join_microorganisms")
  checked <- joins_check_df(x, by)
  x_class <- get_prejoined_class(x)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_left <- import_fn("left_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_left)) {
    join <- suppressWarnings(
      dplyr_left(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  }
  if (NROW(join) > NROW(x)) {
    warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
  class(join) <- x_class
  join
}

#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(suffix, allow_class = "character", has_length = 2)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "right_join_microorganisms")
  checked <- joins_check_df(x, by)
  x_class <- get_prejoined_class(x)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_right <- import_fn("right_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_right)) {
    join <- suppressWarnings(
      dplyr_right(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  }
  if (NROW(join) > NROW(x)) {
    warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
  class(join) <- x_class
  join
}

#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  meet_criteria(suffix, allow_class = "character", has_length = 2)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "full_join_microorganisms")
  checked <- joins_check_df(x, by)
  x_class <- get_prejoined_class(x)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_full <- import_fn("full_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_full)) {
    join <- suppressWarnings(
      dplyr_full(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)
    )
  }
  if (NROW(join) > NROW(x)) {
    warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")
  }
  class(join) <- x_class
  join
}

#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = NULL, ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "semi_join_microorganisms")
  x_class <- get_prejoined_class(x)
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_semi)) {
    join <- suppressWarnings(
      dplyr_semi(x = x, y = microorganisms, by = by, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_semi_join(x = x, y = microorganisms, by = by, ...)
    )
  }
  class(join) <- x_class
  join
}

#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = NULL, ...) {
  meet_criteria(x, allow_class = c("data.frame", "character"))
  meet_criteria(by, allow_class = "character", allow_NULL = TRUE)
  
  check_dataset_integrity()
  x <- check_groups_before_join(x, "anti_join_microorganisms")
  checked <- joins_check_df(x, by)
  x_class <- get_prejoined_class(x)
  x <- checked$x
  by <- checked$by
  # use dplyr if available - it's much faster
  dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)
  if (!is.null(dplyr_anti)) {
    join <- suppressWarnings(
      dplyr_anti(x = x, y = microorganisms, by = by, ...)
    )
  } else {
    join <- suppressWarnings(
      pm_anti_join(x = x, y = microorganisms, by = by, ...)
    )
  }
  class(join) <- x_class
  join
}

joins_check_df <- function(x, by) {
  if (!any(class(x) %in% c("data.frame", "matrix"))) {
    x <- data.frame(mo = as.mo(x), stringsAsFactors = FALSE)
    if (is.null(by)) {
      by <- "mo"
    }
  }
  x <- as.data.frame(x, stringsAsFactors = FALSE)
  if (is.null(by)) {
    # search for column with class `mo` and return first one found
    by <- colnames(x)[lapply(x, is.mo) == TRUE][1]
    if (is.na(by)) {
      if ("mo" %in% colnames(x)) {
        by <- "mo"
        x[, "mo"] <- as.mo(x[, "mo"])
      } else {
        stop("Cannot join - no column found with name 'mo' or with class <mo>.", call. = FALSE)
      }
    }
    message_('Joining, by = "', by, '"', add_fn = font_black, as_note = FALSE) # message same as dplyr::join functions
  }
  if (is.null(names(by))) {
    joinby <- colnames(microorganisms)[1]
    names(joinby) <- by
  } else {
    joinby <- by
  }
  list(x = x,
       by = joinby)
}

get_prejoined_class <- function(x) {
  if (is.data.frame(x)) {
    class(x)
  } else {
    "data.frame"
  }
}

check_groups_before_join <- function(x, fn) {
  if (is.data.frame(x) && !is.null(attributes(x)$groups)) {
    x <- pm_ungroup(x)
    attr(x, "groups") <- NULL
    class(x) <- class(x)[!class(x) %like% "group"]
    warning_("Groups are dropped, since the ", fn, "() function relies on merge() from base R.", call = FALSE)
  }
  x
}
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# ==================================================================== #`
			`# TITLE #`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# Antimicrobial Resistance (AMR) Data Analysis for R #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# #`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`# SOURCE #`
(v1.2.0.9026) move to github 2020-07-08 14:48:06 +02:00			`# https://github.com/msberends/AMR #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# #`
			`# LICENCE #`
(v1.4.0.9047) unit tests 2020-12-27 00:30:28 +01:00			`# (c) 2018-2021 Berends MS, Luz CF et al. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# Developed at the University of Groningen, the Netherlands, in #`
			`# collaboration with non-profit organisations Certe Medical #`
			`# Diagnostics & Advice, and University Medical Center Groningen. #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# #`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
(v0.9.0.9008) Happy new year! Add lifecycles 2020-01-05 17:22:09 +01:00			`# We created this package for both routine data analysis and academic #`
			`# research and it was publicly released in the hope that it will be #`
			`# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #`
(v1.4.0) matching score update 2020-10-08 11:16:03 +02:00			`# #`
			`# Visit our website for the full manual and a complete tutorial about #`
(v1.5.0.9014) only_rsi_columns, is.rsi.eligible improvement 2021-02-02 23:57:35 +01:00			`# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# ==================================================================== #`

(v1.5.0.9006) major documentation update 2021-01-18 16:57:56 +01:00			`#' Join [microorganisms] to a Data Set`
first commit 2018-02-21 11:52:31 +01:00			`#'`
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			`#' Join the data set [microorganisms] easily to an existing table or character vector.`
(v1.5.0.9006) major documentation update 2021-01-18 16:57:56 +01:00			`#' @inheritSection lifecycle Stable Lifecycle`
first commit 2018-02-21 11:52:31 +01:00			`#' @rdname join`
			`#' @name join`
			`#' @aliases join inner_join`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' @param x existing table to join, or character vector`
(v1.4.0.9041) updates based on review 2020-12-17 16:22:25 +01:00			#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (such as `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("bacteria_id" = "fullname")`)
(v0.8.0.9036) complete documentation rewrite 2019-11-28 22:32:17 +01:00			#' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
(v1.2.0.9010) documentation fix 2020-06-17 21:35:10 +02:00			`#' @param ... ignored`
(v1.3.0.9022) mo_matching_score(), poorman update, as.rsi() fix 2020-09-18 16:05:53 +02:00			#' @details Note: As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix.
(v1.1.0.9004) lose dependencies 2020-05-16 13:05:47 +02:00			`#'`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			#' If the `dplyr` package is installed, their join functions will be used. Otherwise, the much slower [merge()] function from base R will be used.
(v1.5.0.9006) major documentation update 2021-01-18 16:57:56 +01:00			`#' @inheritSection AMR Read more on Our Website!`
first commit 2018-02-21 11:52:31 +01:00			`#' @export`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`#' @examples`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' left_join_microorganisms(as.mo("K. pneumoniae"))`
			`#' left_join_microorganisms("B_KLBSL_PNE")`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`#'`
(v1.3.0.9035) mdro() for EUCAST 3.2, examples cleanup 2020-09-29 23:35:46 +02:00			`#' \donttest{`
			`#' if (require("dplyr")) {`
			`#' example_isolates %>%`
			`#' left_join_microorganisms() %>%`
			`#' colnames()`
			`#'`
			`#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),`
			`#' to = as.Date("2018-01-07"),`
			`#' by = 1),`
			`#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",`
			`#' "E. coli", "E. coli", "E. coli")),`
			`#' stringsAsFactors = FALSE)`
			`#' colnames(df)`
			`#' df_joined <- left_join_microorganisms(df, "bacteria")`
			`#' colnames(df_joined)`
			`#' }`
(v1.1.0.9004) lose dependencies 2020-05-16 13:05:47 +02:00			`#' }`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(suffix, allow_class = "character", has_length = 2)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "inner_join_microorganisms")`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_inner <- import_fn("inner_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_inner)) {`
			`join <- suppressWarnings(`
			`dplyr_inner(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`} else {`
			`join <- suppressWarnings(`
			`pm_inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`}`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`if (NROW(join) > NROW(x)) {`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")`
first commit 2018-02-21 11:52:31 +01:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
first commit 2018-02-21 11:52:31 +01:00			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(suffix, allow_class = "character", has_length = 2)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "left_join_microorganisms")`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_left <- import_fn("left_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_left)) {`
			`join <- suppressWarnings(`
			`dplyr_left(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`} else {`
			`join <- suppressWarnings(`
			`pm_left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`}`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`if (NROW(join) > NROW(x)) {`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")`
first commit 2018-02-21 11:52:31 +01:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
first commit 2018-02-21 11:52:31 +01:00			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(suffix, allow_class = "character", has_length = 2)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "right_join_microorganisms")`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_right <- import_fn("right_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_right)) {`
			`join <- suppressWarnings(`
			`dplyr_right(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`} else {`
			`join <- suppressWarnings(`
			`pm_right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`}`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`if (NROW(join) > NROW(x)) {`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")`
first commit 2018-02-21 11:52:31 +01:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
first commit 2018-02-21 11:52:31 +01:00			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`
			`meet_criteria(suffix, allow_class = "character", has_length = 2)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "full_join_microorganisms")`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_full <- import_fn("full_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_full)) {`
			`join <- suppressWarnings(`
			`dplyr_full(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`} else {`
			`join <- suppressWarnings(`
			`pm_full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...)`
			`)`
			`}`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`if (NROW(join) > NROW(x)) {`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.")`
add tests using testthat 2018-03-27 17:43:42 +02:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
add tests using testthat 2018-03-27 17:43:42 +02:00			`join`
first commit 2018-02-21 11:52:31 +01:00			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`semi_join_microorganisms <- function(x, by = NULL, ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "semi_join_microorganisms")`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_semi <- import_fn("semi_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_semi)) {`
			`join <- suppressWarnings(`
(v1.3.0.9033) skimr fix 2020-09-28 11:00:59 +02:00			`dplyr_semi(x = x, y = microorganisms, by = by, ...)`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`)`
			`} else {`
			`join <- suppressWarnings(`
(v1.3.0.9033) skimr fix 2020-09-28 11:00:59 +02:00			`pm_semi_join(x = x, y = microorganisms, by = by, ...)`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`)`
			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
			`join`
first commit 2018-02-21 11:52:31 +01:00			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`anti_join_microorganisms <- function(x, by = NULL, ...) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`meet_criteria(x, allow_class = c("data.frame", "character"))`
			`meet_criteria(by, allow_class = "character", allow_NULL = TRUE)`

(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`check_dataset_integrity()`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- check_groups_before_join(x, "anti_join_microorganisms")`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`checked <- joins_check_df(x, by)`
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`x_class <- get_prejoined_class(x)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`x <- checked$x`
			`by <- checked$by`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`# use dplyr if available - it's much faster`
			`dplyr_anti <- import_fn("anti_join", "dplyr", error_on_fail = FALSE)`
			`if (!is.null(dplyr_anti)) {`
			`join <- suppressWarnings(`
(v1.3.0.9033) skimr fix 2020-09-28 11:00:59 +02:00			`dplyr_anti(x = x, y = microorganisms, by = by, ...)`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`)`
			`} else {`
			`join <- suppressWarnings(`
(v1.3.0.9033) skimr fix 2020-09-28 11:00:59 +02:00			`pm_anti_join(x = x, y = microorganisms, by = by, ...)`
(v1.3.0.9030) matching score update 2020-09-26 16:26:01 +02:00			`)`
			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`class(join) <- x_class`
			`join`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`}`

			`joins_check_df <- function(x, by) {`
update to septic_patients, speed improvements 2018-07-25 14:17:04 +02:00			`if (!any(class(x) %in% c("data.frame", "matrix"))) {`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`x <- data.frame(mo = as.mo(x), stringsAsFactors = FALSE)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`if (is.null(by)) {`
			`by <- "mo"`
			`}`
			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`x <- as.data.frame(x, stringsAsFactors = FALSE)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`if (is.null(by)) {`
			# search for column with class `mo` and return first one found
			`by <- colnames(x)[lapply(x, is.mo) == TRUE][1]`
			`if (is.na(by)) {`
			`if ("mo" %in% colnames(x)) {`
			`by <- "mo"`
(v1.0.1.9004) Support dplyr 1.0.0 2020-03-14 14:05:43 +01:00			`x[, "mo"] <- as.mo(x[, "mo"])`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`} else {`
(v1.2.0.9011) mo_domain(), improved error handling 2020-06-22 11:18:40 +02:00			`stop("Cannot join - no column found with name 'mo' or with class <mo>.", call. = FALSE)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`}`
			`}`
(v1.4.0.9011) message formatting 2020-10-27 15:56:51 +01:00			`message_('Joining, by = "', by, '"', add_fn = font_black, as_note = FALSE) # message same as dplyr::join functions`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`}`
first commit 2018-02-21 11:52:31 +01:00			`if (is.null(names(by))) {`
(v0.9.0.9023) EUCAST 2020 guidelines 2020-02-14 19:54:13 +01:00			`joinby <- colnames(microorganisms)[1]`
first commit 2018-02-21 11:52:31 +01:00			`names(joinby) <- by`
			`} else {`
			`joinby <- by`
			`}`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`list(x = x,`
			`by = joinby)`
first commit 2018-02-21 11:52:31 +01:00			`}`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00
(v1.2.0.9002) fix for joins 2020-06-03 14:33:55 +02:00			`get_prejoined_class <- function(x) {`
			`if (is.data.frame(x)) {`
			`class(x)`
			`} else {`
			`"data.frame"`
			`}`
			`}`

(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`check_groups_before_join <- function(x, fn) {`
			`if (is.data.frame(x) && !is.null(attributes(x)$groups)) {`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x <- pm_ungroup(x)`
			`attr(x, "groups") <- NULL`
			`class(x) <- class(x)[!class(x) %like% "group"]`
(v1.4.0.9015) bugfix 2020-11-10 16:35:56 +01:00			`warning_("Groups are dropped, since the ", fn, "() function relies on merge() from base R.", call = FALSE)`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`}`
(v1.4.0.9001) is_gram_positive(), is_gram_negative(), parameter hardening 2020-10-19 17:09:19 +02:00			`x`
(v1.2.0.9001) filter_ab_class() update 2020-06-03 11:48:00 +02:00			`}`