2018-12-16 22:45:12 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This package is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This R package is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License version 2.0 for more details. #
# ==================================================================== #
2018-03-23 14:46:02 +01:00
#' Join a table with \code{microorganisms}
2018-02-21 11:52:31 +01:00
#'
2018-03-23 14:46:02 +01:00
#' Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector.
2018-02-21 11:52:31 +01:00
#' @rdname join
#' @name join
#' @aliases join inner_join
2018-10-12 16:35:18 +02:00
#' @param x existing table to join, or character vector
#' @param by a variable to join by - if left empty will search for a column with class \code{mo} (created with \code{\link{as.mo}}) or will be \code{"mo"} if that column name exists in \code{x}, could otherwise be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
2018-03-19 12:43:22 +01:00
#' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
2018-02-27 20:01:02 +01:00
#' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.
2018-10-12 16:35:18 +02:00
#' @details \strong{Note:} As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
2018-02-21 11:52:31 +01:00
#' @export
2018-04-02 11:11:21 +02:00
#' @examples
2018-10-12 16:35:18 +02:00
#' left_join_microorganisms(as.mo("K. pneumoniae"))
#' left_join_microorganisms("B_KLBSL_PNE")
2018-04-02 11:11:21 +02:00
#'
2018-03-19 20:39:23 +01:00
#' library(dplyr)
2018-03-23 14:46:02 +01:00
#' septic_patients %>% left_join_microorganisms()
2018-04-02 11:11:21 +02:00
#'
2018-02-22 20:48:48 +01:00
#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),
#' to = as.Date("2018-01-07"),
#' by = 1),
2018-10-12 16:35:18 +02:00
#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
#' "E. coli", "E. coli", "E. coli")),
2018-02-22 20:48:48 +01:00
#' stringsAsFactors = FALSE)
#' colnames(df)
2018-10-12 16:35:18 +02:00
#' df_joined <- left_join_microorganisms(df, "bacteria")
#' colnames(df_joined)
inner_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2018-10-12 16:35:18 +02:00
dplyr :: inner_join ( x = x , y = AMR :: microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if ( nrow ( join ) > nrow ( x ) ) {
2018-10-12 16:35:18 +02:00
warning ( ' The newly joined tbl contains ' , nrow ( join ) - nrow ( x ) , ' rows more that its original.' )
2018-02-21 11:52:31 +01:00
}
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
left_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2018-10-12 16:35:18 +02:00
dplyr :: left_join ( x = x , y = AMR :: microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if ( nrow ( join ) > nrow ( x ) ) {
2018-10-12 16:35:18 +02:00
warning ( ' The newly joined tbl contains ' , nrow ( join ) - nrow ( x ) , ' rows more that its original.' )
2018-02-21 11:52:31 +01:00
}
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
right_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2018-10-12 16:35:18 +02:00
dplyr :: right_join ( x = x , y = AMR :: microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if ( nrow ( join ) > nrow ( x ) ) {
2018-10-12 16:35:18 +02:00
warning ( ' The newly joined tbl contains ' , nrow ( join ) - nrow ( x ) , ' rows more that its original.' )
2018-02-21 11:52:31 +01:00
}
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
full_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2018-10-12 16:35:18 +02:00
dplyr :: full_join ( x = x , y = AMR :: microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2018-03-27 17:43:42 +02:00
if ( nrow ( join ) > nrow ( x ) ) {
2018-10-12 16:35:18 +02:00
warning ( ' The newly joined tbl contains ' , nrow ( join ) - nrow ( x ) , ' rows more that its original.' )
2018-03-27 17:43:42 +02:00
}
join
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
semi_join_microorganisms <- function ( x , by = NULL , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
suppressWarnings (
2018-10-12 16:35:18 +02:00
dplyr :: semi_join ( x = x , y = AMR :: microorganisms , by = by , ... )
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
anti_join_microorganisms <- function ( x , by = NULL , ... ) {
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
suppressWarnings (
dplyr :: anti_join ( x = x , y = AMR :: microorganisms , by = by , ... )
)
}
joins_check_df <- function ( x , by ) {
2018-07-25 14:17:04 +02:00
if ( ! any ( class ( x ) %in% c ( " data.frame" , " matrix" ) ) ) {
2018-08-31 13:36:19 +02:00
x <- data.frame ( mo = as.character ( x ) , stringsAsFactors = FALSE )
2018-10-12 16:35:18 +02:00
if ( is.null ( by ) ) {
by <- " mo"
}
}
if ( is.null ( by ) ) {
# search for column with class `mo` and return first one found
by <- colnames ( x ) [lapply ( x , is.mo ) == TRUE ] [1 ]
if ( is.na ( by ) ) {
if ( " mo" %in% colnames ( x ) ) {
by <- " mo"
} else {
stop ( " Cannot join - no column found with name or class `mo`." , call. = FALSE )
}
}
message ( ' Joining, by = "' , by , ' "' ) # message same as dplyr::join functions
2018-03-19 12:43:22 +01:00
}
2018-02-21 11:52:31 +01:00
if ( is.null ( names ( by ) ) ) {
2018-03-23 14:46:02 +01:00
joinby <- colnames ( AMR :: microorganisms ) [1 ]
2018-02-21 11:52:31 +01:00
names ( joinby ) <- by
} else {
joinby <- by
}
2018-10-12 16:35:18 +02:00
list ( x = x ,
by = joinby )
2018-02-21 11:52:31 +01:00
}