mirror of
https://github.com/msberends/AMR.git
synced 2025-04-17 19:10:38 +02:00
304 lines
8.4 KiB
Plaintext
304 lines
8.4 KiB
Plaintext
---
|
|
title: "Data sets for download / own use"
|
|
date: '`r format(Sys.Date(), "%d %B %Y")`'
|
|
output:
|
|
rmarkdown::html_vignette:
|
|
toc: true
|
|
toc_depth: 1
|
|
vignette: >
|
|
%\VignetteIndexEntry{Data sets for download / own use}
|
|
%\VignetteEncoding{UTF-8}
|
|
%\VignetteEngine{knitr::rmarkdown}
|
|
editor_options:
|
|
chunk_output_type: console
|
|
---
|
|
|
|
```{r setup, include = FALSE, results = "markup"}
|
|
knitr::opts_chunk$set(
|
|
warning = FALSE,
|
|
collapse = TRUE,
|
|
comment = "#",
|
|
fig.width = 7.5,
|
|
fig.height = 5
|
|
)
|
|
|
|
library(AMR)
|
|
library(dplyr)
|
|
|
|
options(knitr.kable.NA = "")
|
|
|
|
structure_txt <- function(dataset) {
|
|
paste0(
|
|
"A data set with ",
|
|
format(nrow(dataset), big.mark = " "), " rows and ",
|
|
ncol(dataset), " columns, containing the following column names: \n",
|
|
AMR:::vector_or(colnames(dataset), quotes = "*", last_sep = " and ", sort = FALSE), "."
|
|
)
|
|
}
|
|
|
|
download_txt <- function(filename) {
|
|
msg <- paste0(
|
|
"It was last updated on ",
|
|
trimws(format(file.mtime(paste0("../data/", filename, ".rda")), "%e %B %Y %H:%M:%S %Z", tz = "UTC")),
|
|
". Find more info about the contents, (scientific) source, and structure of this [data set here](https://msberends.github.io/AMR/reference/", ifelse(filename == "antivirals", "antimicrobials", filename), ".html).\n"
|
|
)
|
|
github_base <- "https://github.com/msberends/AMR/raw/main/data-raw/datasets/"
|
|
local_filename <- paste0("../data-raw/datasets/", filename)
|
|
rds <- paste0(local_filename, ".rds")
|
|
txt <- paste0(local_filename, ".txt")
|
|
excel <- paste0(local_filename, ".xlsx")
|
|
feather <- paste0(local_filename, ".feather")
|
|
parquet <- paste0(local_filename, ".parquet")
|
|
spss <- paste0(local_filename, ".sav")
|
|
stata <- paste0(local_filename, ".dta")
|
|
create_txt <- function(filename, type, software, exists) {
|
|
if (isTRUE(exists)) {
|
|
paste0(
|
|
"* Download as [", software, "](", github_base, basename(filename), ") (",
|
|
AMR:::formatted_filesize(filename), ") \n"
|
|
)
|
|
} else {
|
|
paste0("* *(unavailable as ", software, ")*\n")
|
|
}
|
|
}
|
|
|
|
if (any(
|
|
file.exists(rds),
|
|
file.exists(txt),
|
|
file.exists(excel),
|
|
file.exists(feather),
|
|
file.exists(parquet),
|
|
file.exists(spss),
|
|
file.exists(stata)
|
|
)) {
|
|
msg <- c(
|
|
msg, "\n**Direct download links:**\n\n",
|
|
create_txt(rds, "rds", "original R Data Structure (RDS) file", file.exists(rds)),
|
|
create_txt(txt, "txt", "tab-separated text file", file.exists(txt)),
|
|
create_txt(excel, "xlsx", "Microsoft Excel workbook", file.exists(excel)),
|
|
create_txt(feather, "feather", "Apache Feather file", file.exists(feather)),
|
|
create_txt(parquet, "parquet", "Apache Parquet file", file.exists(parquet)),
|
|
create_txt(spss, "sav", "IBM SPSS Statistics data file", file.exists(spss)),
|
|
create_txt(stata, "dta", "Stata DTA file", file.exists(stata))
|
|
)
|
|
}
|
|
paste0(msg, collapse = "")
|
|
}
|
|
|
|
print_df <- function(x, rows = 6) {
|
|
x %>%
|
|
as.data.frame(stringsAsFactors = FALSE) %>%
|
|
head(n = rows) %>%
|
|
mutate_all(function(x) {
|
|
if (is.list(x)) {
|
|
sapply(x, function(y) {
|
|
if (length(y) > 3) {
|
|
paste0(paste(y[1:3], collapse = ", "), ", ...")
|
|
} else if (length(y) == 0 || all(is.na(y))) {
|
|
""
|
|
} else {
|
|
paste(y, collapse = ", ")
|
|
}
|
|
})
|
|
} else {
|
|
x
|
|
}
|
|
}) %>%
|
|
knitr::kable(align = "c")
|
|
}
|
|
```
|
|
|
|
All reference data (about microorganisms, antimicrobials, SIR interpretation, EUCAST rules, etc.) in this `AMR` package are reliable, up-to-date and freely available. We continually export our data sets to formats for use in R, MS Excel, Apache Feather, Apache Parquet, SPSS, and Stata. We also provide tab-separated text files that are machine-readable and suitable for input in any software program, such as laboratory information systems.
|
|
|
|
> If you are working in Python, be sure to use our [AMR for Python](https://msberends.github.io/AMR/articles/AMR_for_Python.html) package. It allows all relevant AMR data sets to be natively available in Python.
|
|
|
|
## `microorganisms`: Full Microbial Taxonomy
|
|
|
|
`r structure_txt(microorganisms)`
|
|
|
|
This data set is in R available as `microorganisms`, after you load the `AMR` package.
|
|
|
|
`r download_txt("microorganisms")`
|
|
|
|
**NOTE: The exported files for SPSS and Stata contain only the first 50 SNOMED codes per record, as their file size would otherwise exceed 100 MB; the file size limit of GitHub.** Their file structures and compression techniques are very inefficient. Advice? Use R instead. It's free and much better in many ways.
|
|
|
|
The tab-separated text file and Microsoft Excel workbook both contain all SNOMED codes as comma separated values.
|
|
|
|
**Example content**
|
|
|
|
Included (sub)species per taxonomic kingdom:
|
|
|
|
```{r, echo = FALSE}
|
|
microorganisms %>%
|
|
count(kingdom) %>%
|
|
mutate(n = format(n, big.mark = " ")) %>%
|
|
setNames(c("Kingdom", "Number of (sub)species")) %>%
|
|
print_df()
|
|
```
|
|
|
|
First 6 rows when filtering on genus *Escherichia*:
|
|
|
|
```{r, echo = FALSE}
|
|
microorganisms %>%
|
|
filter(genus == "Escherichia") %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `antimicrobials`: Antibiotic and Antifungal Drugs
|
|
|
|
`r structure_txt(antimicrobials)`
|
|
|
|
This data set is in R available as `antimicrobials`, after you load the `AMR` package.
|
|
|
|
`r download_txt("antimicrobials")`
|
|
|
|
The tab-separated text, Microsoft Excel, SPSS, and Stata files all contain the ATC codes, common abbreviations, trade names and LOINC codes as comma separated values.
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
antimicrobials %>%
|
|
filter(ab %in% colnames(example_isolates)) %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `clinical_breakpoints`: Interpretation from MIC values & disk diameters to SIR
|
|
|
|
`r structure_txt(clinical_breakpoints)`
|
|
|
|
This data set is in R available as `clinical_breakpoints`, after you load the `AMR` package.
|
|
|
|
`r download_txt("clinical_breakpoints")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
clinical_breakpoints %>%
|
|
mutate(mo_name = mo_name(mo, language = NULL), .after = mo) %>%
|
|
mutate(ab_name = ab_name(ab, language = NULL), .after = ab) %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `microorganisms.groups`: Species Groups and Microbiological Complexes
|
|
|
|
`r structure_txt(microorganisms.groups)`
|
|
|
|
This data set is in R available as `microorganisms.groups`, after you load the `AMR` package.
|
|
|
|
`r download_txt("microorganisms.groups")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
microorganisms.groups %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `intrinsic_resistant`: Intrinsic Bacterial Resistance
|
|
|
|
`r structure_txt(intrinsic_resistant)`
|
|
|
|
This data set is in R available as `intrinsic_resistant`, after you load the `AMR` package.
|
|
|
|
`r download_txt("intrinsic_resistant")`
|
|
|
|
**Example content**
|
|
|
|
Example rows when filtering on *Enterobacter cloacae*:
|
|
|
|
```{r, echo = FALSE}
|
|
intrinsic_resistant %>%
|
|
transmute(
|
|
microorganism = mo_name(mo),
|
|
antibiotic = ab_name(ab)
|
|
) %>%
|
|
filter(microorganism == "Enterobacter cloacae") %>%
|
|
arrange(antibiotic) %>%
|
|
print_df(rows = Inf)
|
|
```
|
|
|
|
|
|
## `dosage`: Dosage Guidelines from EUCAST
|
|
|
|
`r structure_txt(dosage)`
|
|
|
|
This data set is in R available as `dosage`, after you load the `AMR` package.
|
|
|
|
`r download_txt("dosage")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
dosage %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `example_isolates`: Example Data for Practice
|
|
|
|
`r structure_txt(example_isolates)`
|
|
|
|
This data set is in R available as `example_isolates`, after you load the `AMR` package.
|
|
|
|
`r download_txt("example_isolates")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
example_isolates %>%
|
|
print_df()
|
|
```
|
|
|
|
## `example_isolates_unclean`: Example Data for Practice
|
|
|
|
`r structure_txt(example_isolates_unclean)`
|
|
|
|
This data set is in R available as `example_isolates_unclean`, after you load the `AMR` package.
|
|
|
|
`r download_txt("example_isolates_unclean")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
example_isolates_unclean %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `microorganisms.codes`: Common Laboratory Codes
|
|
|
|
`r structure_txt(microorganisms.codes)`
|
|
|
|
This data set is in R available as `microorganisms.codes`, after you load the `AMR` package.
|
|
|
|
`r download_txt("microorganisms.codes")`
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
microorganisms.codes %>%
|
|
print_df()
|
|
```
|
|
|
|
|
|
## `antivirals`: Antiviral Drugs
|
|
|
|
`r structure_txt(antivirals)`
|
|
|
|
This data set is in R available as `antivirals`, after you load the `AMR` package.
|
|
|
|
`r download_txt("antivirals")`
|
|
|
|
The tab-separated text, Microsoft Excel, SPSS, and Stata files all contain the trade names and LOINC codes as comma separated values.
|
|
|
|
**Example content**
|
|
|
|
```{r, echo = FALSE}
|
|
antivirals %>%
|
|
print_df()
|
|
```
|