AMR/R/translate.R

268 lines
11 KiB
R
Raw Normal View History

2018-11-05 13:20:32 +01:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2018-11-05 13:20:32 +01:00
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-11-05 13:20:32 +01:00
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
2022-08-28 10:31:50 +02:00
# Diagnostics & Advice, and University Medical Center Groningen. #
2018-11-05 13:20:32 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-11-05 13:20:32 +01:00
# ==================================================================== #
2022-08-19 12:33:14 +02:00
#' Translate Strings from the AMR Package
2018-11-05 13:20:32 +01:00
#'
2020-05-16 13:05:47 +02:00
#' For language-dependent output of AMR functions, like [mo_name()], [mo_gramstain()], [mo_type()] and [ab_name()].
2022-08-19 12:33:14 +02:00
#' @param x text to translate
2022-10-05 09:12:22 +02:00
#' @param language language to choose. Use one of these supported language names or ISO-639-1 codes: `r vector_or(paste0(sapply(LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), " (" , LANGUAGES_SUPPORTED, ")"), quotes = FALSE, sort = FALSE)`.
#' @details The currently `r length(LANGUAGES_SUPPORTED)` supported languages are `r vector_or(paste0(sapply(LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]), " (" , LANGUAGES_SUPPORTED, ")"), quotes = FALSE, sort = FALSE)`. All these languages have translations available for all antimicrobial agents and colloquial microorganism names.
#'
#' **To silence language notes when this package loads** on a non-English operating system, you can set the option `AMR_locale` in your `.Rprofile` file like this:
#'
#' ```r
#' # Open .Rprofile file
#' utils::file.edit("~/.Rprofile")
#'
#' # Add e.g. Italian support to that file using:
#' options(AMR_locale = "Italian")
#' # or using:
#' AMR::set_AMR_locale("Italian")
#'
#' # And save the file!
#' ```
2019-05-10 16:44:59 +02:00
#'
2022-08-21 16:37:20 +02:00
#' Please read about adding or updating a language in [our Wiki](https://github.com/msberends/AMR/wiki/).
2019-05-10 16:44:59 +02:00
#'
#' ## Changing the Default Language
2022-08-27 20:49:37 +02:00
#' The system language will be used at default (as returned by `Sys.getenv("LANG")` or, if `LANG` is not set, [`Sys.getlocale("LC_COLLATE")`][Sys.getlocale()]), if that language is supported. But the language to be used can be overwritten in two ways and will be checked in this order:
2022-08-28 10:31:50 +02:00
#'
2022-10-05 09:12:22 +02:00
#' 1. Setting the R option `AMR_locale`, either by using e.g. `set_AMR_locale("German")` or by running e.g. `options(AMR_locale = "German")`.
2022-08-28 10:31:50 +02:00
#'
2022-10-05 09:12:22 +02:00
#' Note that setting an \R option only works in the same session. Save the command `options(AMR_locale = "(your language)")` to your `.Rprofile` file to apply it for every session. Run `utils::file.edit("~/.Rprofile")` to edit your `.Rprofile` file.
2022-08-19 12:33:14 +02:00
#' 2. Setting the system variable `LANGUAGE` or `LANG`, e.g. by adding `LANGUAGE="de_DE.utf8"` to your `.Renviron` file in your home directory.
2022-08-28 10:31:50 +02:00
#'
#' Thus, if the R option `AMR_locale` is set, the system variables `LANGUAGE` and `LANG` will be ignored.
2019-05-10 16:44:59 +02:00
#' @rdname translate
#' @name translate
2018-11-05 13:20:32 +01:00
#' @export
2019-05-10 16:44:59 +02:00
#' @examples
2022-08-20 20:17:14 +02:00
#' # Current settings (based on system language)
2022-08-19 12:33:14 +02:00
#' ab_name("Ciprofloxacin")
2022-10-05 09:12:22 +02:00
#' mo_name("Coagulase-negative Staphylococcus (CoNS)")
2019-05-10 16:44:59 +02:00
#'
2022-08-19 12:33:14 +02:00
#' # setting another language
2022-10-05 09:12:22 +02:00
#' set_AMR_locale("Spanish")
#' ab_name("Ciprofloxacin")
#' mo_name("Coagulase-negative Staphylococcus (CoNS)")
#'
#' # setting yet another language
2022-08-19 12:33:14 +02:00
#' set_AMR_locale("Greek")
#' ab_name("Ciprofloxacin")
2022-10-05 09:12:22 +02:00
#' mo_name("Coagulase-negative Staphylococcus (CoNS)")
2022-08-28 10:31:50 +02:00
#'
2022-10-05 09:12:22 +02:00
#' # setting yet another language
#' set_AMR_locale("Ukrainian")
2022-08-19 12:33:14 +02:00
#' ab_name("Ciprofloxacin")
2022-10-05 09:12:22 +02:00
#' mo_name("Coagulase-negative Staphylococcus (CoNS)")
2022-08-28 10:31:50 +02:00
#'
2022-08-20 20:17:14 +02:00
#' # set_AMR_locale() understands endonyms, English exonyms, and ISO-639-1:
#' set_AMR_locale("Deutsch")
#' set_AMR_locale("German")
#' set_AMR_locale("de")
2019-05-10 16:44:59 +02:00
#'
2022-08-20 20:17:14 +02:00
#' # reset to system default
2022-08-19 12:33:14 +02:00
#' reset_AMR_locale()
get_AMR_locale <- function() {
2022-08-19 12:33:14 +02:00
if (!is.null(getOption("AMR_locale", default = NULL))) {
return(validate_language(getOption("AMR_locale"), extra_txt = "set with `options(AMR_locale = ...)`"))
}
2022-08-28 10:31:50 +02:00
2022-08-19 12:33:14 +02:00
lang <- ""
# now check the LANGUAGE system variable - return it if set
if (!identical("", Sys.getenv("LANGUAGE"))) {
lang <- Sys.getenv("LANGUAGE")
}
if (!identical("", Sys.getenv("LANG"))) {
lang <- Sys.getenv("LANG")
}
if (lang == "") {
lang <- Sys.getlocale("LC_COLLATE")
2018-11-05 13:20:32 +01:00
}
2022-08-28 10:31:50 +02:00
2022-08-19 12:33:14 +02:00
lang <- find_language(lang)
if (lang != "en" && interactive() && message_not_thrown_before("get_AMR_locale", entire_session = TRUE)) {
2022-08-28 10:31:50 +02:00
message_(
"Assuming the ", LANGUAGES_SUPPORTED_NAMES[[lang]]$exonym, " language (",
LANGUAGES_SUPPORTED_NAMES[[lang]]$endonym, ") for the AMR package. Change this with `set_AMR_locale()`. ",
2022-10-05 09:12:22 +02:00
"This note will be shown once per session but can be silenced, see `?set_AMR_locale()`."
2022-08-28 10:31:50 +02:00
)
}
2022-08-19 12:33:14 +02:00
lang
2020-09-14 12:21:23 +02:00
}
2022-08-19 12:33:14 +02:00
#' @rdname translate
#' @export
2022-08-27 20:49:37 +02:00
set_AMR_locale <- function(language) {
language <- validate_language(language)
options(AMR_locale = language)
2022-10-05 09:12:22 +02:00
if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
# show which language to use now
message_(
"Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
ifelse(language != "en",
paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
""
),
" for the AMR package for this session."
)
}
2022-08-19 12:33:14 +02:00
}
#' @rdname translate
#' @export
reset_AMR_locale <- function() {
options(AMR_locale = NULL)
2022-10-05 09:12:22 +02:00
if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
# show which language to use now
language <- suppressMessages(get_AMR_locale())
message_("Using the ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym, " language (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ") for the AMR package for this session.")
}
2022-08-19 12:33:14 +02:00
}
#' @rdname translate
#' @export
translate_AMR <- function(x, language = get_AMR_locale()) {
translate_into_language(x, language = language)
}
2022-08-27 20:49:37 +02:00
2022-08-19 12:33:14 +02:00
validate_language <- function(language, extra_txt = character(0)) {
2022-10-05 09:12:22 +02:00
if (isTRUE(trimws2(tolower(language[1])) %in% c("en", "english", "", "false", NA)) || length(language) == 0) {
2022-08-20 20:17:14 +02:00
return("en")
}
2022-10-05 09:12:22 +02:00
lang <- find_language(language[1], fallback = FALSE)
2022-08-20 20:17:14 +02:00
stop_ifnot(length(lang) > 0 && lang %in% LANGUAGES_SUPPORTED,
2022-08-28 10:31:50 +02:00
"unsupported language for AMR package", extra_txt, ": \"", language, "\". Use one of these language names or ISO-639-1 codes: ",
paste0('"', vapply(FUN.VALUE = character(1), LANGUAGES_SUPPORTED_NAMES, function(x) x[[1]]),
'" ("', LANGUAGES_SUPPORTED, '")',
collapse = ", "
),
call = FALSE
)
2022-08-20 20:17:14 +02:00
lang
2022-08-19 12:33:14 +02:00
}
2022-08-27 20:49:37 +02:00
find_language <- function(language, fallback = TRUE) {
2022-10-05 09:12:22 +02:00
language <- Map(LANGUAGES_SUPPORTED_NAMES,
LANGUAGES_SUPPORTED,
f = function(l, n, check = language) {
grepl(paste0(
"^(", l[1], "|", l[2], "|",
n, "(_|$)|", toupper(n), "(_|$))"
),
check,
ignore.case = TRUE,
perl = TRUE,
useBytes = FALSE
)
},
USE.NAMES = TRUE
2022-08-28 10:31:50 +02:00
)
2022-08-27 20:49:37 +02:00
language <- names(which(language == TRUE))
if (isTRUE(fallback) && length(language) == 0) {
2018-11-15 12:42:35 +01:00
# other language -> set to English
2022-08-27 20:49:37 +02:00
language <- "en"
2018-11-05 13:20:32 +01:00
}
2022-08-27 20:49:37 +02:00
language
2018-11-05 13:20:32 +01:00
}
# translate strings based on inst/translations.tsv
2022-08-19 12:33:14 +02:00
translate_into_language <- function(from,
2022-08-28 10:31:50 +02:00
language = get_AMR_locale(),
2022-08-19 12:33:14 +02:00
only_unknown = FALSE,
only_affect_ab_names = FALSE,
only_affect_mo_names = FALSE) {
2022-10-05 09:12:22 +02:00
if (is.null(language) || language[1] %in% c("en", "", NA)) {
return(from)
}
2022-08-28 10:31:50 +02:00
df_trans <- TRANSLATIONS # internal data file
from.bak <- from
from_unique <- unique(from)
from_unique_translated <- from_unique
2022-08-28 10:31:50 +02:00
2022-08-20 20:17:14 +02:00
# get ISO-639-1 of language
lang <- validate_language(language)
# only keep lines where translation is available for this language
2022-08-20 20:17:14 +02:00
df_trans <- df_trans[which(!is.na(df_trans[, lang, drop = TRUE])), , drop = FALSE]
2021-05-17 11:26:12 +02:00
# and where the original string is not equal to the string in the target language
2022-08-20 20:17:14 +02:00
df_trans <- df_trans[which(df_trans[, "pattern", drop = TRUE] != df_trans[, lang, drop = TRUE]), , drop = FALSE]
if (only_unknown == TRUE) {
2020-07-08 14:48:06 +02:00
df_trans <- subset(df_trans, pattern %like% "unknown")
}
2021-05-17 11:26:12 +02:00
if (only_affect_ab_names == TRUE) {
df_trans <- subset(df_trans, affect_ab_name == TRUE)
}
if (only_affect_mo_names == TRUE) {
2021-02-18 23:23:14 +01:00
df_trans <- subset(df_trans, affect_mo_name == TRUE)
}
2021-05-17 11:26:12 +02:00
if (NROW(df_trans) == 0) {
return(from)
}
2022-08-28 10:31:50 +02:00
# default: case sensitive if value if 'case_sensitive' is missing:
df_trans$case_sensitive[is.na(df_trans$case_sensitive)] <- TRUE
# default: not using regular expressions if 'regular_expr' is missing:
df_trans$regular_expr[is.na(df_trans$regular_expr)] <- FALSE
2022-08-28 10:31:50 +02:00
# check if text to look for is in one of the patterns
2021-04-07 08:37:42 +02:00
any_form_in_patterns <- tryCatch(
any(from_unique %like% paste0("(", paste(gsub(" +\\(.*", "", df_trans$pattern), collapse = "|"), ")")),
error = function(e) {
warning_("Translation not possible. Please open an issue on GitHub (https://github.com/msberends/AMR/issues).")
2021-04-07 08:37:42 +02:00
return(FALSE)
2022-08-28 10:31:50 +02:00
}
)
if (NROW(df_trans) == 0 | !any_form_in_patterns) {
return(from)
}
2022-08-28 10:31:50 +02:00
lapply(
seq_len(nrow(df_trans)),
function(i) {
from_unique_translated <<- gsub(
pattern = df_trans$pattern[i],
replacement = df_trans[i, lang, drop = TRUE],
x = from_unique_translated,
ignore.case = !df_trans$case_sensitive[i] & df_trans$regular_expr[i],
fixed = !df_trans$regular_expr[i],
perl = df_trans$regular_expr[i]
)
}
)
# force UTF-8 for diacritics
from_unique_translated <- enc2utf8(from_unique_translated)
2022-08-28 10:31:50 +02:00
# a kind of left join to get all results back
from_unique_translated[match(from.bak, from_unique)]
}