AMR/data-raw/reproduction_of_example_iso...

147 lines
5.0 KiB
R
Raw Normal View History

# ==================================================================== #
2023-06-26 13:52:02 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
2023-06-26 13:52:02 +02:00
# SOURCE CODE: #
# https://github.com/msberends/AMR #
# #
2023-06-26 13:52:02 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
# ==================================================================== #
patients <- unlist(lapply(LETTERS, paste0, 1:10))
2022-08-28 10:31:50 +02:00
patients_table <- data.frame(
patient_id = patients,
gender = c(
rep("M", 135),
rep("F", 125)
)
)
dates <- seq(as.Date("2011-01-01"), as.Date("2020-01-01"), by = "day")
2022-08-28 10:31:50 +02:00
bacteria_a <- c(
"E. coli", "S. aureus",
"S. pneumoniae", "K. pneumoniae"
)
bacteria_b <- c("esccol", "staaur", "strpne", "klepne")
2022-08-28 10:31:50 +02:00
bacteria_c <- c(
"Escherichia coli", "Staphylococcus aureus",
"Streptococcus pneumoniae", "Klebsiella pneumoniae"
)
ab_interpretations <- c("S", "I", "R")
2022-08-28 10:31:50 +02:00
ab_interpretations_messy <- c("R", "< 0.5 S", "I")
sample_size <- 1000
2022-08-28 10:31:50 +02:00
data_a <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "A",
bacteria = sample(bacteria_a,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
data_b <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "B",
bacteria = sample(bacteria_b,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations_messy,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
data_c <- data.frame(
date = sample(dates, size = sample_size, replace = TRUE),
hospital = "C",
bacteria = sample(bacteria_c,
size = sample_size, replace = TRUE,
prob = c(0.50, 0.25, 0.15, 0.10)
),
AMX = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.60, 0.05, 0.35)
),
AMC = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.75, 0.10, 0.15)
),
CIP = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.80, 0.00, 0.20)
),
GEN = sample(ab_interpretations,
size = sample_size, replace = TRUE,
prob = c(0.92, 0.00, 0.08)
)
)
example_isolates_unclean <- data_a %>%
bind_rows(data_b, data_c)
example_isolates_unclean$patient_id <- sample(patients, size = nrow(example_isolates_unclean), replace = TRUE)
2022-08-28 10:31:50 +02:00
example_isolates_unclean <- example_isolates_unclean %>%
select(patient_id, hospital, date, bacteria, everything()) %>%
2022-08-27 20:49:37 +02:00
dataset_UTF8_to_ASCII()
2022-08-27 20:49:37 +02:00
usethis::use_data(example_isolates_unclean, overwrite = TRUE, internal = FALSE, version = 2, compress = "xz")