AMR/R/join_microorganisms.R

149 lines
5.1 KiB
R
Raw Normal View History

#' Join a table with \code{microorganisms}
2018-02-21 11:52:31 +01:00
#'
#' Join the dataset \code{\link{microorganisms}} easily to an existing table or character vector.
2018-02-21 11:52:31 +01:00
#' @rdname join
#' @name join
#' @aliases join inner_join
#' @param x existing table to join, also supports character vectors
2018-08-31 13:36:19 +02:00
#' @param by a variable to join by - could be a column name of \code{x} with values that exist in \code{microorganisms$mo} (like \code{by = "bacteria_id"}), or another column in \code{\link{microorganisms}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})
#' @param suffix if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.
2018-02-27 20:01:02 +01:00
#' @param ... other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.
#' @details As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
2018-02-21 11:52:31 +01:00
#' @export
2018-04-02 11:11:21 +02:00
#' @examples
#' left_join_microorganisms("STAAUR")
2018-04-02 11:11:21 +02:00
#'
#' library(dplyr)
#' septic_patients %>% left_join_microorganisms()
2018-04-02 11:11:21 +02:00
#'
2018-02-22 20:48:48 +01:00
#' df <- data.frame(date = seq(from = as.Date("2018-01-01"),
#' to = as.Date("2018-01-07"),
#' by = 1),
#' bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR",
#' "ESCCOL", "ESCCOL", "ESCCOL"),
#' stringsAsFactors = FALSE)
#' colnames(df)
#' df2 <- left_join_microorganisms(df, "bacteria_id")
2018-02-22 20:48:48 +01:00
#' colnames(df2)
2018-08-31 13:36:19 +02:00
inner_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
join <- suppressWarnings(
2018-09-03 10:04:49 +02:00
dplyr::inner_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
join
}
#' @rdname join
#' @export
2018-08-31 13:36:19 +02:00
left_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
join <- suppressWarnings(
2018-09-03 10:04:49 +02:00
dplyr::left_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
join
}
#' @rdname join
#' @export
2018-08-31 13:36:19 +02:00
right_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
join <- suppressWarnings(
2018-09-03 10:04:49 +02:00
dplyr::right_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
2018-07-23 14:14:03 +02:00
)
2018-02-21 11:52:31 +01:00
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
join
}
#' @rdname join
#' @export
2018-08-31 13:36:19 +02:00
full_join_microorganisms <- function(x, by = 'mo', suffix = c("2", ""), ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
join <- suppressWarnings(
2018-09-03 10:04:49 +02:00
dplyr::full_join(x = x, y = AMR::microorganisms, by = joinby, suffix = suffix, ...)
2018-07-23 14:14:03 +02:00
)
2018-03-27 17:43:42 +02:00
if (nrow(join) > nrow(x)) {
warning('the newly joined tbl contains ', nrow(join) - nrow(x), ' rows more that its original')
}
join
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-08-31 13:36:19 +02:00
semi_join_microorganisms <- function(x, by = 'mo', ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
suppressWarnings(
dplyr::semi_join(x = x, y = AMR::microorganisms, by = joinby, ...)
)
2018-02-21 11:52:31 +01:00
}
#' @rdname join
#' @export
2018-08-31 13:36:19 +02:00
anti_join_microorganisms <- function(x, by = 'mo', ...) {
if (!any(class(x) %in% c("data.frame", "matrix"))) {
2018-08-31 13:36:19 +02:00
x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE)
}
2018-02-21 11:52:31 +01:00
# no name set to `by` parameter
if (is.null(names(by))) {
joinby <- colnames(AMR::microorganisms)[1]
2018-02-21 11:52:31 +01:00
names(joinby) <- by
} else {
joinby <- by
}
2018-07-23 14:14:03 +02:00
suppressWarnings(
dplyr::anti_join(x = x, y = AMR::microorganisms, by = joinby, ...)
)
2018-02-21 11:52:31 +01:00
}