- Added new function guess_bactid to determine the ID of a microorganism based on genus/species

- Renamed `ablist` to `antibiotics` - Added support for character vector in join functions - Altered `%like%` to make it case insensitive
2026-02-26 13:10:50 +01:00 · 2018-03-19 12:43:22 +01:00
parent 0fec64a240
commit 502a44eb25
23 changed files with 312 additions and 103 deletions
--- a/man/ablist.Rd
+++ b/man/ablist.Rd
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{ablist}
-\alias{ablist}
-\title{Dataset with 420 antibiotics}
-\format{A data.frame with 420 observations and 12 variables:
-\describe{
-  \item{\code{atc}}{ATC code, like \code{J01CR02}}
-  \item{\code{molis}}{MOLIS code, like \code{amcl}}
-  \item{\code{umcg}}{UMCG code, like \code{AMCL}}
-  \item{\code{official}}{Official name by the WHO, like \code{"amoxicillin and enzyme inhibitor"}}
-  \item{\code{official_nl}}{Official name in the Netherlands, like \code{"Amoxicilline met enzymremmer"}}
-  \item{\code{trivial_nl}}{Trivial name in Dutch, like \code{"Amoxicilline/clavulaanzuur"}}
-  \item{\code{oral_ddd}}{Defined Daily Dose (DDD) according to the WHO, oral treatment}
-  \item{\code{oral_units}}{Units of \code{ddd_units}}
-  \item{\code{iv_ddd}}{Defined Daily Dose (DDD) according to the WHO, parenteral treatment}
-  \item{\code{iv_units}}{Units of \code{iv_ddd}}
-  \item{\code{atc_group1}}{ATC group in Dutch, like \code{"Macroliden, lincosamiden en streptograminen"}}
-  \item{\code{atc_group2}}{Subgroup of \code{atc_group1} in Dutch, like \code{"Macroliden"}}
-}}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl} \cr \cr World Health Organization - \url{https://www.whocc.no/atc_ddd_index/}
-}
-\usage{
-ablist
-}
-\description{
-A dataset containing all antibiotics with a J0 code, with their DDD's.
-}
-\seealso{
-\code{\link{bactlist}}
-}
-\keyword{datasets}
--- a/man/abname.Rd
+++ b/man/abname.Rd
@@ -4,7 +4,7 @@
 \alias{abname}
 \title{Name of an antibiotic}
 \source{
-\code{\link{ablist}}
+\code{\link{antibiotics}}
 }
 \usage{
 abname(abcode, from = "umcg", to = "official", textbetween = " + ",
@@ -13,14 +13,14 @@ abname(abcode, from = "umcg", to = "official", textbetween = " + ",
 \arguments{
 \item{abcode}{a code or name, like \code{"AMOX"}, \code{"AMCL"} or \code{"J01CA04"}}

-\item{from, to}{type to transform from and to. See \code{\link{ablist}} for its column names.}
+\item{from, to}{type to transform from and to. See \code{\link{antibiotics}} for its column names.}

 \item{textbetween}{text to put between multiple returned texts}

 \item{tolower}{return output as lower case with function \code{\link{tolower}}.}
 }
 \description{
-Convert antibiotic codes (from a laboratory information system like MOLIS or GLIMS) to a (trivial) antibiotic name or ATC code, or vice versa. This uses the data from \code{\link{ablist}}.
+Convert antibiotic codes (from a laboratory information system like MOLIS or GLIMS) to a (trivial) antibiotic name or ATC code, or vice versa. This uses the data from \code{\link{antibiotics}}.
 }
 \examples{
 abname("AMCL")
--- a/man/antibiotics.Rd
+++ b/man/antibiotics.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{antibiotics}
+\alias{antibiotics}
+\title{Dataset with 420 antibiotics}
+\format{A data.frame with 420 observations and 16 variables:
+\describe{
+  \item{\code{atc}}{ATC code, like \code{J01CR02}}
+  \item{\code{molis}}{MOLIS code, like \code{amcl}}
+  \item{\code{umcg}}{UMCG code, like \code{AMCL}}
+  \item{\code{official}}{Official name by the WHO, like \code{"Amoxicillin and enzyme inhibitor"}}
+  \item{\code{official_nl}}{Official name in the Netherlands, like \code{"Amoxicilline met enzymremmer"}}
+  \item{\code{trivial_nl}}{Trivial name in Dutch, like \code{"Amoxicilline/clavulaanzuur"}}
+  \item{\code{oral_ddd}}{Defined Daily Dose (DDD), oral treatment}
+  \item{\code{oral_units}}{Units of \code{ddd_units}}
+  \item{\code{iv_ddd}}{Defined Daily Dose (DDD), parenteral treatment}
+  \item{\code{iv_units}}{Units of \code{iv_ddd}}
+  \item{\code{atc_group1}}{ATC group, like \code{"Macrolides, lincosamides and streptogramins"}}
+  \item{\code{atc_group2}}{Subgroup of \code{atc_group1}, like \code{"Macrolides"}}
+  \item{\code{atc_group1_nl}}{ATC group in Dutch, like \code{"Macroliden, lincosamiden en streptograminen"}}
+  \item{\code{atc_group2_nl}}{Subgroup of \code{atc_group1} in Dutch, like \code{"Macroliden"}}
+  \item{\code{useful_gramnegative}}{\code{FALSE} if not useful according to EUCAST, \code{NA} otherwise (see Source)}
+  \item{\code{useful_grampositive}}{\code{FALSE} if not useful according to EUCAST, \code{NA} otherwise (see Source)}
+}}
+\source{
+- World Health Organization: \url{https://www.whocc.no/atc_ddd_index/} \cr - EUCAST - Expert rules intrinsic exceptional V3.1 \cr - MOLIS (LIS of Certe): \url{https://www.certe.nl} \cr - GLIMS (LIS of UMCG): \url{https://www.umcg.nl}
+}
+\usage{
+antibiotics
+}
+\description{
+A dataset containing all antibiotics with a J0 code, with their DDD's. Properties were downloaded from the WHO, see Source.
+}
+\seealso{
+\code{\link{bactlist}}
+}
+\keyword{datasets}
--- a/man/bactlist.Rd
+++ b/man/bactlist.Rd
@@ -4,7 +4,7 @@
 \name{bactlist}
 \alias{bactlist}
 \title{Dataset with ~2500 microorganisms}
-\format{A data.frame with 2507 observations and 10 variables:
+\format{A data.frame with 2507 observations and 12 variables:
 \describe{
  \item{\code{bactid}}{ID of microorganism}
  \item{\code{bactsys}}{Bactsyscode of microorganism}
@@ -13,9 +13,11 @@
  \item{\code{species}}{Species name of microorganism, like \code{"coli"}}
  \item{\code{subspecies}}{Subspecies name of bio-/serovar of microorganism, like \code{"EHEC"}}
  \item{\code{fullname}}{Full name, like \code{"Echerichia coli (EHEC)"}}
-  \item{\code{type}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
-  \item{\code{gramstain}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
-  \item{\code{aerobic}}{Type aerobe/anaerobe of bacteria}
+  \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungus/yeast"}}
+  \item{\code{gramstain}}{Gram of microorganism, like \code{"Negative rods"}}
+  \item{\code{aerobic}}{Logical whether bacteria is aerobic}
+  \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
+  \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
 }}
 \source{
 MOLIS (LIS of Certe) - \url{https://www.certe.nl}
@@ -27,6 +29,6 @@ bactlist
 A dataset containing all microorganisms of MOLIS. MO codes of the UMCG can be looked up using \code{\link{bactlist.umcg}}.
 }
 \seealso{
-\code{\link{ablist}} \code{\link{bactlist.umcg}}
+\code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{bactlist.umcg}}
 }
 \keyword{datasets}
--- a/man/figures/mic_example.png
+++ b/man/figures/mic_example.png
--- a/man/guess_bactid.Rd
+++ b/man/guess_bactid.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/first_isolates.R
+\name{guess_bactid}
+\alias{guess_bactid}
+\title{Find bacteria ID based on genus/species}
+\usage{
+guess_bactid(x)
+}
+\arguments{
+\item{x}{character vector to determine \code{bactid}}
+}
+\value{
+Character (vector).
+}
+\description{
+Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
+}
+\examples{
+# These examples all return "STAAUR", the ID of S. aureus:
+guess_bactid("stau")
+guess_bactid("STAU")
+guess_bactid("staaur")
+guess_bactid("S. aureus")
+guess_bactid("S aureus")
+guess_bactid("Staphylococcus aureus")
+guess_bactid("MRSA") # Methicillin-resistant S. aureus
+guess_bactid("VISA") # Vancomycin Intermediate S. aureus
+}
+\seealso{
+\code{\link{bactlist}} for the dataframe that is being used to determine ID's.
+}
--- a/man/join.Rd
+++ b/man/join.Rd
@@ -11,39 +11,42 @@
 \alias{anti_join_bactlist}
 \title{Join a table with \code{bactlist}}
 \usage{
-inner_join_bactlist(x, by = "bactid", ...)
+inner_join_bactlist(x, by = "bactid", suffix = c("2", ""), ...)

-left_join_bactlist(x, by = "bactid", ...)
+left_join_bactlist(x, by = "bactid", suffix = c("2", ""), ...)

-right_join_bactlist(x, by = "bactid", ...)
+right_join_bactlist(x, by = "bactid", suffix = c("2", ""), ...)

-full_join_bactlist(x, by = "bactid", ...)
+full_join_bactlist(x, by = "bactid", suffix = c("2", ""), ...)

 semi_join_bactlist(x, by = "bactid", ...)

 anti_join_bactlist(x, by = "bactid", ...)
 }
 \arguments{
-\item{x}{existing table to join}
+\item{x}{existing table to join, also supports character vectors}

 \item{by}{a variable to join by - could be a column name of \code{x} with values that exist in \code{bactlist$bactid} (like \code{by = "bacteria_id"}), or another column in \code{\link{bactlist}} (but then it should be named, like \code{by = c("my_genus_species" = "fullname")})}

+\item{suffix}{if there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.}
+
 \item{...}{other parameters to pass on to \code{dplyr::\link[dplyr]{join}}.}
 }
 \description{
 Join the list of microorganisms \code{\link{bactlist}} easily to an existing table.
 }
 \details{
-As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
+As opposed to the \code{\link[dplyr]{join}} functions of \code{dplyr}, characters vectors are supported and at default existing columns will get a suffix \code{"2"} and the newly joined columns will not get a suffix. See \code{\link[dplyr]{join}} for more information.
 }
 \examples{
+left_join_bactlist("STAAUR")
+
 df <- data.frame(date = seq(from = as.Date("2018-01-01"),
                            to = as.Date("2018-01-07"),
                            by = 1),
                 bacteria_id = c("STAAUR", "STAAUR", "STAAUR", "STAAUR",
                                 "ESCCOL", "ESCCOL", "ESCCOL"),
                 stringsAsFactors = FALSE)
-                 
 colnames(df)
 df2 <- left_join_bactlist(df, "bacteria_id")
 colnames(df2)
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@@ -33,5 +33,5 @@ tbl$keyab <- key_antibiotics(tbl)
 }
 }
 \seealso{
-\code{\link{mo_property}} \code{\link{ablist}}
+\code{\link{mo_property}} \code{\link{antibiotics}}
 }
--- a/man/septic_patients.Rd
+++ b/man/septic_patients.Rd
@@ -15,7 +15,7 @@
  \item{\code{sex}}{sex of the patient}
  \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
  \item{\code{bactid}}{ID of microorganism, see \code{\link{bactlist}}}
-  \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}), these column names occur in \code{\link{ablist}} and can be translated with \code{\link{abname}}}
+  \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}), these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
 }}
 \source{
 MOLIS (LIS of Certe) - \url{https://www.certe.nl}