AMR/man/top_n_microorganisms.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/top_n_microorganisms.R
\name{top_n_microorganisms}
\alias{top_n_microorganisms}
\title{Filter Top \emph{n} Microorganisms}
\usage{
top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,
  col_mo = NULL, ...)
}
\arguments{
\item{x}{a data frame containing microbial data}

\item{n}{an integer specifying the maximum number of unique values of the \code{property} to include in the output}

\item{property}{a character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation.}

\item{n_for_each}{an optional integer specifying the maximum number of rows to retain for each value of the selected property. If \code{NULL}, all rows within the top \emph{n} groups will be included.}

\item{col_mo}{A character string indicating the column in \code{x} that contains microorganism names or codes. Defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.}

\item{...}{Additional arguments passed on to \code{\link[=mo_property]{mo_property()}} when \code{property} is not \code{NULL}.}
}
\description{
This function filters a data set to include only the top \emph{n} microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera.
}
\details{
This function is useful for preprocessing data before creating \link[=antibiograms]{antibiograms} or other analyses that require focused subsets of microbial data. For example, it can filter a data set to only include isolates from the top 10 species.
}
\examples{
# filter to the top 3 species:
top_n_microorganisms(example_isolates,
                     n = 3)

# filter to any species in the top 5 genera:
top_n_microorganisms(example_isolates,
                     n = 5, property = "genus")

# filter to the top 3 species in each of the top 5 genera:
top_n_microorganisms(example_isolates,
                     n = 5, property = "genus", n_for_each = 3)
}
\seealso{
\code{\link[=mo_property]{mo_property()}}, \code{\link[=as.mo]{as.mo()}}, \code{\link[=antibiogram]{antibiogram()}}
}
(v2.1.1.9126) implemented WISCA! Also added `top_n_microorganisms()` and fixed Python wrapper 2025-01-26 23:01:17 +01:00			`% Generated by roxygen2: do not edit by hand`
			`% Please edit documentation in R/top_n_microorganisms.R`
			`\name{top_n_microorganisms}`
			`\alias{top_n_microorganisms}`
			`\title{Filter Top \emph{n} Microorganisms}`
			`\usage{`
			`top_n_microorganisms(x, n, property = "fullname", n_for_each = NULL,`
			`col_mo = NULL, ...)`
			`}`
			`\arguments{`
			`\item{x}{a data frame containing microbial data}`

			`\item{n}{an integer specifying the maximum number of unique values of the \code{property} to include in the output}`

			\item{property}{a character string indicating the microorganism property to use for filtering. Must be one of the column names of the \link{microorganisms} data set: "mo", "fullname", "status", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies", "rank", "ref", "oxygen_tolerance", "source", "lpsn", "lpsn_parent", "lpsn_renamed_to", "mycobank", "mycobank_parent", "mycobank_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", or "snomed". If \code{NULL}, the raw values from \code{col_mo} will be used without transformation.}

			`\item{n_for_each}{an optional integer specifying the maximum number of rows to retain for each value of the selected property. If \code{NULL}, all rows within the top \emph{n} groups will be included.}`

			`\item{col_mo}{A character string indicating the column in \code{x} that contains microorganism names or codes. Defaults to the first column of class \code{\link{mo}}. Values will be coerced using \code{\link[=as.mo]{as.mo()}}.}`

			`\item{...}{Additional arguments passed on to \code{\link[=mo_property]{mo_property()}} when \code{property} is not \code{NULL}.}`
			`}`
			`\description{`
			`This function filters a data set to include only the top \emph{n} microorganisms based on a specified property, such as taxonomic family or genus. For example, it can filter a data set to the top 3 species, or to any species in the top 5 genera, or to the top 3 species in each of the top 5 genera.`
			`}`
			`\details{`
			`This function is useful for preprocessing data before creating \link[=antibiograms]{antibiograms} or other analyses that require focused subsets of microbial data. For example, it can filter a data set to only include isolates from the top 10 species.`
			`}`
			`\examples{`
			`# filter to the top 3 species:`
			`top_n_microorganisms(example_isolates,`
			`n = 3)`

			`# filter to any species in the top 5 genera:`
			`top_n_microorganisms(example_isolates,`
			`n = 5, property = "genus")`

			`# filter to the top 3 species in each of the top 5 genera:`
			`top_n_microorganisms(example_isolates,`
			`n = 5, property = "genus", n_for_each = 3)`
			`}`
			`\seealso{`
			`\code{\link[=mo_property]{mo_property()}}, \code{\link[=as.mo]{as.mo()}}, \code{\link[=antibiogram]{antibiogram()}}`
			`}`