2018-12-16 22:45:12 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
# https://gitlab.com/msberends/AMR #
2018-12-16 22:45:12 +01:00
# #
# LICENCE #
2020-01-05 17:22:09 +01:00
# (c) 2018-2020 Berends MS, Luz CF et al. #
2018-12-16 22:45:12 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2018-12-16 22:45:12 +01:00
# ==================================================================== #
2020-05-16 13:05:47 +02:00
#' Join [microorganisms] to a data set
2018-02-21 11:52:31 +01:00
#'
2019-11-28 22:32:17 +01:00
#' Join the data set [microorganisms] easily to an existing table or character vector.
2020-01-05 17:22:09 +01:00
#' @inheritSection lifecycle Stable lifecycle
2018-02-21 11:52:31 +01:00
#' @rdname join
#' @name join
#' @aliases join inner_join
2018-10-12 16:35:18 +02:00
#' @param x existing table to join, or character vector
2019-11-28 22:32:17 +01:00
#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (like `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("my_genus_species" = "fullname")`)
#' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
#' @param ... other parameters to pass on to [dplyr::join()]
2020-05-16 13:05:47 +02:00
#' @details **Note:** As opposed to the [join()] functions of `dplyr`, [`character`] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix.
#'
#' These functions rely on [merge()], a base R function to do joins.
2019-01-02 23:24:07 +01:00
#' @inheritSection AMR Read more on our website!
2018-02-21 11:52:31 +01:00
#' @export
2018-04-02 11:11:21 +02:00
#' @examples
2018-10-12 16:35:18 +02:00
#' left_join_microorganisms(as.mo("K. pneumoniae"))
#' left_join_microorganisms("B_KLBSL_PNE")
2018-04-02 11:11:21 +02:00
#'
2020-05-16 13:05:47 +02:00
#' \dontrun{
2018-03-19 20:39:23 +01:00
#' library(dplyr)
2019-08-27 16:45:42 +02:00
#' example_isolates %>% left_join_microorganisms()
2018-04-02 11:11:21 +02:00
#'
2018-02-22 20:48:48 +01:00
#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),
#' to = as.Date("2018-01-07"),
#' by = 1),
2018-10-12 16:35:18 +02:00
#' bacteria = as.mo(c("S. aureus", "MRSA", "MSSA", "STAAUR",
#' "E. coli", "E. coli", "E. coli")),
2018-02-22 20:48:48 +01:00
#' stringsAsFactors = FALSE)
#' colnames(df)
2018-10-12 16:35:18 +02:00
#' df_joined <- left_join_microorganisms(df, "bacteria")
#' colnames(df_joined)
2020-05-16 13:05:47 +02:00
#' }
2018-10-12 16:35:18 +02:00
inner_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " inner_join_microorganisms" )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
inner_join ( x = x , y = microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2020-03-14 14:05:43 +01:00
if ( NROW ( join ) > NROW ( x ) ) {
2019-10-11 17:21:02 +02:00
warning ( " The newly joined tbl contains " , nrow ( join ) - nrow ( x ) , " rows more that its original." )
2018-02-21 11:52:31 +01:00
}
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
2018-02-21 11:52:31 +01:00
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
left_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " left_join_microorganisms" )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
left_join ( x = x , y = microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2020-03-14 14:05:43 +01:00
if ( NROW ( join ) > NROW ( x ) ) {
2019-10-11 17:21:02 +02:00
warning ( " The newly joined tbl contains " , nrow ( join ) - nrow ( x ) , " rows more that its original." )
2018-02-21 11:52:31 +01:00
}
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
2018-02-21 11:52:31 +01:00
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
right_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " right_join_microorganisms" )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
right_join ( x = x , y = microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2020-03-14 14:05:43 +01:00
if ( NROW ( join ) > NROW ( x ) ) {
2019-10-11 17:21:02 +02:00
warning ( " The newly joined tbl contains " , nrow ( join ) - nrow ( x ) , " rows more that its original." )
2018-02-21 11:52:31 +01:00
}
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
2018-02-21 11:52:31 +01:00
join
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
full_join_microorganisms <- function ( x , by = NULL , suffix = c ( " 2" , " " ) , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " full_join_microorganisms" )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
x <- checked $ x
by <- checked $ by
2018-07-23 14:14:03 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
full_join ( x = x , y = microorganisms , by = by , suffix = suffix , ... )
2018-07-23 14:14:03 +02:00
)
2020-03-14 14:05:43 +01:00
if ( NROW ( join ) > NROW ( x ) ) {
2019-10-11 17:21:02 +02:00
warning ( " The newly joined tbl contains " , nrow ( join ) - nrow ( x ) , " rows more that its original." )
2018-03-27 17:43:42 +02:00
}
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
2018-03-27 17:43:42 +02:00
join
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
semi_join_microorganisms <- function ( x , by = NULL , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " semi_join_microorganisms" )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
x <- checked $ x
by <- checked $ by
2020-06-03 11:48:00 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
semi_join ( x = x , y = microorganisms , by = by , ... )
2018-07-23 14:14:03 +02:00
)
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
join
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-10-12 16:35:18 +02:00
anti_join_microorganisms <- function ( x , by = NULL , ... ) {
2020-03-14 14:05:43 +01:00
check_dataset_integrity ( )
2020-06-03 11:48:00 +02:00
check_groups_before_join ( x , " anti_join_microorganisms" )
2018-10-12 16:35:18 +02:00
checked <- joins_check_df ( x , by )
2020-06-03 14:33:55 +02:00
x_class <- get_prejoined_class ( x )
2018-10-12 16:35:18 +02:00
x <- checked $ x
by <- checked $ by
2020-06-03 11:48:00 +02:00
join <- suppressWarnings (
2020-05-16 13:05:47 +02:00
anti_join ( x = x , y = microorganisms , by = by , ... )
2018-10-12 16:35:18 +02:00
)
2020-06-03 11:48:00 +02:00
class ( join ) <- x_class
join
2018-10-12 16:35:18 +02:00
}
joins_check_df <- function ( x , by ) {
2018-07-25 14:17:04 +02:00
if ( ! any ( class ( x ) %in% c ( " data.frame" , " matrix" ) ) ) {
2020-03-14 14:05:43 +01:00
x <- data.frame ( mo = as.mo ( x ) , stringsAsFactors = FALSE )
2018-10-12 16:35:18 +02:00
if ( is.null ( by ) ) {
by <- " mo"
}
}
2020-06-03 11:48:00 +02:00
x <- as.data.frame ( x , stringsAsFactors = FALSE )
2018-10-12 16:35:18 +02:00
if ( is.null ( by ) ) {
# search for column with class `mo` and return first one found
by <- colnames ( x ) [lapply ( x , is.mo ) == TRUE ] [1 ]
if ( is.na ( by ) ) {
if ( " mo" %in% colnames ( x ) ) {
by <- " mo"
2020-03-14 14:05:43 +01:00
x [ , " mo" ] <- as.mo ( x [ , " mo" ] )
2018-10-12 16:35:18 +02:00
} else {
2020-05-27 16:37:49 +02:00
stop ( " Cannot join - no column found with name or class <mo>." , call. = FALSE )
2018-10-12 16:35:18 +02:00
}
}
message ( ' Joining, by = "' , by , ' "' ) # message same as dplyr::join functions
2018-03-19 12:43:22 +01:00
}
2018-02-21 11:52:31 +01:00
if ( is.null ( names ( by ) ) ) {
2020-02-14 19:54:13 +01:00
joinby <- colnames ( microorganisms ) [1 ]
2018-02-21 11:52:31 +01:00
names ( joinby ) <- by
} else {
joinby <- by
}
2018-10-12 16:35:18 +02:00
list ( x = x ,
by = joinby )
2018-02-21 11:52:31 +01:00
}
2020-06-03 11:48:00 +02:00
2020-06-03 14:33:55 +02:00
get_prejoined_class <- function ( x ) {
if ( is.data.frame ( x ) ) {
class ( x )
} else {
" data.frame"
}
}
2020-06-03 11:48:00 +02:00
check_groups_before_join <- function ( x , fn ) {
if ( is.data.frame ( x ) && ! is.null ( attributes ( x ) $ groups ) ) {
warning ( " Groups are dropped, since the " , fn , " () function relies on merge() from base R, not on join() from dplyr." , call. = FALSE )
}
}