AMR/R/join_microorganisms.R

# ==================================================================== #
# TITLE                                                                #
# Antimicrobial Resistance (AMR) Analysis                              #
#                                                                      #
# SOURCE                                                               #
# https://gitlab.com/msberends/AMR                                     #
#                                                                      #
# LICENCE                                                              #
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)  #
#                                                                      #
# This R package is free software; you can freely use and distribute   #
# it for both personal and commercial purposes under the terms of the  #
# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
# the Free Software Foundation.                                        #
#                                                                      #
# This R package was created for academic research and was publicly    #
# released in the hope that it will be useful, but it comes WITHOUT    #
# ANY WARRANTY OR LIABILITY.                                           #
# Visit our website for more info: https://msberends.gitlab.io/AMR.    #
# ==================================================================== #

#' Join a table with \code{microorganisms}
#'
#' Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector.
#' @rdname join
#' @name join
#' @aliases join inner_join
#' @param x existing table to join, or character vector
#' @param by a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
#' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
#' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.
#' @details \strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
#' @inheritSection AMR Read more on our website!
#' @export
#' @examples
#' left_join_microorganisms(as.mo("K. pneumoniae"))
#' left_join_microorganisms("B_KLBSL_PNE")
#'
#' library(dplyr)
#' example_isolates %>% left_join_microorganisms()
#'
#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),
#'                             to = as.Date("2018-01-07"),
#'                             by = 1),
#'                  bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
#'                                     "E. coli", "E. coli", "E. coli")),
#'                  stringsAsFactors = FALSE)
#' colnames(df)
#' df_joined <- left_join_microorganisms(df, "bacteria")
#' colnames(df_joined)
inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
    dplyr::inner_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
}

#' @rdname join
#' @export
left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
    dplyr::left_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
}

#' @rdname join
#' @export
right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
    dplyr::right_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
}

#' @rdname join
#' @export
full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  join <- suppressWarnings(
    dplyr::full_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)
  )
  if (nrow(join) > nrow(x)) {
    warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')
  }
  join
}

#' @rdname join
#' @export
semi_join_microorganisms <- function(x, by = NULL, ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  suppressWarnings(
    dplyr::semi_join(x = x, y = AMR::microorganisms, by = by, ...)
  )
}

#' @rdname join
#' @export
anti_join_microorganisms <- function(x, by = NULL, ...) {
  checked <- joins_check_df(x, by)
  x <- checked$x
  by <- checked$by
  suppressWarnings(
    dplyr::anti_join(x = x, y = AMR::microorganisms, by = by, ...)
  )
}

joins_check_df <- function(x, by) {
  if (!any(class(x) %in% c("data.frame", "matrix"))) {
    x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
    if (is.null(by)) {
      by <- "mo"
    }
  }
  if (is.null(by)) {
    # search for column with class `mo` and return first one found
    by <- colnames(x)[lapply(x, is.mo) == TRUE][1]
    if (is.na(by)) {
      if ("mo" %in% colnames(x)) {
        by <- "mo"
      } else {
        stop("Cannot join - no column found with name or class  `mo`.", call. = FALSE)
      }
    }
    message('Joining, by = "', by, '"') # message same as dplyr::join functions
  }
  if (is.null(names(by))) {
    joinby <- colnames(AMR::microorganisms)[1]
    names(joinby) <- by
  } else {
    joinby <- by
  }
  list(x = x,
       by = joinby)
}
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# ==================================================================== #`
			`# TITLE #`
			`# Antimicrobial Resistance (AMR) Analysis #`
			`# #`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`# SOURCE #`
			`# https://gitlab.com/msberends/AMR #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# #`
			`# LICENCE #`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# #`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`# This R package is free software; you can freely use and distribute #`
			`# it for both personal and commercial purposes under the terms of the #`
			`# GNU General Public License version 2.0 (GNU GPL-2), as published by #`
			`# the Free Software Foundation. #`
			`# #`
			`# This R package was created for academic research and was publicly #`
			`# released in the hope that it will be useful, but it comes WITHOUT #`
			`# ANY WARRANTY OR LIABILITY. #`
new EUCAST rules algorithm 2019-04-05 18:47:39 +02:00			`# Visit our website for more info: https://msberends.gitlab.io/AMR. #`
limits for scale_y_percent - Licence update 2018-12-16 22:45:12 +01:00			`# ==================================================================== #`

- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`#' Join a table with \code{microorganisms}`
first commit 2018-02-21 11:52:31 +01:00			`#'`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`#' Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector.`
first commit 2018-02-21 11:52:31 +01:00			`#' @rdname join`
			`#' @name join`
			`#' @aliases join inner_join`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' @param x existing table to join, or character vector`
			`#' @param by a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`#' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.`
added septic_patients 2018-02-27 20:01:02 +01:00			`#' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' @details \strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.`
big website update, licence txt update 2019-01-02 23:24:07 +01:00			`#' @inheritSection AMR Read more on our website!`
first commit 2018-02-21 11:52:31 +01:00			`#' @export`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`#' @examples`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' left_join_microorganisms(as.mo("K. pneumoniae"))`
			`#' left_join_microorganisms("B_KLBSL_PNE")`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`#'`
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`#' library(dplyr)`
(v0.7.1.9063) septic_patients -> example_isolates 2019-08-27 16:45:42 +02:00			`#' example_isolates %>% left_join_microorganisms()`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`#'`
First CRAN submission edits 2018-02-22 20:48:48 +01:00			`#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),`
			`#' to = as.Date("2018-01-07"),`
			`#' by = 1),`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",`
			`#' "E. coli", "E. coli", "E. coli")),`
First CRAN submission edits 2018-02-22 20:48:48 +01:00			`#' stringsAsFactors = FALSE)`
			`#' colnames(df)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`#' df_joined <- left_join_microorganisms(df, "bacteria")`
			`#' colnames(df_joined)`
			`inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
new class bactid 2018-07-23 14:14:03 +02:00			`join <- suppressWarnings(`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`dplyr::inner_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)`
new class bactid 2018-07-23 14:14:03 +02:00			`)`
first commit 2018-02-21 11:52:31 +01:00			`if (nrow(join) > nrow(x)) {`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')`
first commit 2018-02-21 11:52:31 +01:00			`}`
			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
new class bactid 2018-07-23 14:14:03 +02:00			`join <- suppressWarnings(`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`dplyr::left_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)`
new class bactid 2018-07-23 14:14:03 +02:00			`)`
first commit 2018-02-21 11:52:31 +01:00			`if (nrow(join) > nrow(x)) {`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')`
first commit 2018-02-21 11:52:31 +01:00			`}`
			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
new class bactid 2018-07-23 14:14:03 +02:00			`join <- suppressWarnings(`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`dplyr::right_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)`
new class bactid 2018-07-23 14:14:03 +02:00			`)`
first commit 2018-02-21 11:52:31 +01:00			`if (nrow(join) > nrow(x)) {`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')`
first commit 2018-02-21 11:52:31 +01:00			`}`
			`join`
			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
new class bactid 2018-07-23 14:14:03 +02:00			`join <- suppressWarnings(`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`dplyr::full_join(x = x, y = AMR::microorganisms, by = by, suffix = suffix, ...)`
new class bactid 2018-07-23 14:14:03 +02:00			`)`
add tests using testthat 2018-03-27 17:43:42 +02:00			`if (nrow(join) > nrow(x)) {`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`warning('The newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original.')`
add tests using testthat 2018-03-27 17:43:42 +02:00			`}`
			`join`
first commit 2018-02-21 11:52:31 +01:00			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`semi_join_microorganisms <- function(x, by = NULL, ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
new class bactid 2018-07-23 14:14:03 +02:00			`suppressWarnings(`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`dplyr::semi_join(x = x, y = AMR::microorganisms, by = by, ...)`
new class bactid 2018-07-23 14:14:03 +02:00			`)`
first commit 2018-02-21 11:52:31 +01:00			`}`

			`#' @rdname join`
			`#' @export`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`anti_join_microorganisms <- function(x, by = NULL, ...) {`
			`checked <- joins_check_df(x, by)`
			`x <- checked$x`
			`by <- checked$by`
			`suppressWarnings(`
			`dplyr::anti_join(x = x, y = AMR::microorganisms, by = by, ...)`
			`)`
			`}`

			`joins_check_df <- function(x, by) {`
update to septic_patients, speed improvements 2018-07-25 14:17:04 +02:00			`if (!any(class(x) %in% c("data.frame", "matrix"))) {`
replaced bactid by mo 2018-08-31 13:36:19 +02:00			`x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`if (is.null(by)) {`
			`by <- "mo"`
			`}`
			`}`
			`if (is.null(by)) {`
			# search for column with class `mo` and return first one found
			`by <- colnames(x)[lapply(x, is.mo) == TRUE][1]`
			`if (is.na(by)) {`
			`if ("mo" %in% colnames(x)) {`
			`by <- "mo"`
			`} else {`
			stop("Cannot join - no column found with name or class `mo`.", call. = FALSE)
			`}`
			`}`
			`message('Joining, by = "', by, '"') # message same as dplyr::join functions`
- Added new function `guess_bactid` to determine the ID of a microorganism based on genus/species - Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive 2018-03-19 12:43:22 +01:00			`}`
first commit 2018-02-21 11:52:31 +01:00			`if (is.null(names(by))) {`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`joinby <- colnames(AMR::microorganisms)[1]`
first commit 2018-02-21 11:52:31 +01:00			`names(joinby) <- by`
			`} else {`
			`joinby <- by`
			`}`
count_all and some fixes 2018-10-12 16:35:18 +02:00			`list(x = x,`
			`by = joinby)`
first commit 2018-02-21 11:52:31 +01:00			`}`