2018-11-05 13:20:32 +01:00
# ==================================================================== #
# TITLE #
2021-02-02 23:57:35 +01:00
# Antimicrobial Resistance (AMR) Data Analysis for R #
2018-11-05 13:20:32 +01:00
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-11-05 13:20:32 +01:00
# #
# LICENCE #
2020-12-27 00:30:28 +01:00
# (c) 2018-2021 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
2018-11-05 13:20:32 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-11-05 13:20:32 +01:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Translate Strings from AMR Package
2018-11-05 13:20:32 +01:00
#'
2020-05-16 13:05:47 +02:00
#' For language-dependent output of AMR functions, like [mo_name()], [mo_gramstain()], [mo_type()] and [ab_name()].
2021-01-18 16:57:56 +01:00
#' @inheritSection lifecycle Stable Lifecycle
2021-10-06 13:23:57 +02:00
#' @details Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: <https://github.com/msberends/AMR/blob/main/data-raw/translations.tsv>. This file will be read by all functions where a translated output can be desired, like all [`mo_*`][mo_property()] functions (such as [mo_name()], [mo_gramstain()], [mo_type()], etc.) and [`ab_*`][ab_property()] functions (such as [ab_name()], [ab_group()], etc.).
2019-05-10 16:44:59 +02:00
#'
2021-12-12 09:42:03 +01:00
#' Currently supported languages are: `r vector_and(names(LANGUAGES_SUPPORTED), quotes = FALSE)`. All these languages have translations available for all antimicrobial agents and colloquial microorganism names.
2019-08-11 19:07:26 +02:00
#'
2020-07-08 14:48:06 +02:00
#' Please suggest your own translations [by creating a new issue on our repository](https://github.com/msberends/AMR/issues/new?title=Translations).
2019-05-10 16:44:59 +02:00
#'
2021-01-18 16:57:56 +01:00
#' ## Changing the Default Language
2020-09-14 19:41:48 +02:00
#' The system language will be used at default (as returned by `Sys.getenv("LANG")` or, if `LANG` is not set, [Sys.getlocale()]), if that language is supported. But the language to be used can be overwritten in two ways and will be checked in this order:
2020-09-14 12:21:23 +02:00
#'
#' 1. Setting the R option `AMR_locale`, e.g. by running `options(AMR_locale = "de")`
#' 2. Setting the system variable `LANGUAGE` or `LANG`, e.g. by adding `LANGUAGE="de_DE.utf8"` to your `.Renviron` file in your home directory
#'
2021-12-12 09:42:03 +01:00
#' Thus, if the R option `AMR_locale` is set, the system variables `LANGUAGE` and `LANG` will be ignored.
2021-01-18 16:57:56 +01:00
#' @inheritSection AMR Read more on Our Website!
2019-05-10 16:44:59 +02:00
#' @rdname translate
#' @name translate
2018-11-05 13:20:32 +01:00
#' @export
2019-05-10 16:44:59 +02:00
#' @examples
2020-12-22 00:51:17 +01:00
#' # The 'language' argument of below functions
2019-05-10 16:44:59 +02:00
#' # will be set automatically to your system language
2021-12-12 09:42:03 +01:00
#' # with get_AMR_locale()
2019-05-10 16:44:59 +02:00
#'
#' # English
2019-08-11 19:07:26 +02:00
#' mo_name("CoNS", language = "en")
2019-05-10 16:44:59 +02:00
#' #> "Coagulase-negative Staphylococcus (CoNS)"
#'
2021-09-29 12:12:35 +02:00
#' # Danish
2021-12-12 09:42:03 +01:00
#' mo_name("CoNS", language = "da")
#' #> "Koagulase-negative stafylokokker (KNS)"
2021-09-29 12:12:35 +02:00
#'
2019-05-10 16:44:59 +02:00
#' # Dutch
2019-08-11 19:07:26 +02:00
#' mo_name("CoNS", language = "nl")
2019-05-10 16:44:59 +02:00
#' #> "Coagulase-negatieve Staphylococcus (CNS)"
#'
2021-09-29 12:12:35 +02:00
#' # German
#' mo_name("CoNS", language = "de")
#' #> "Koagulase-negative Staphylococcus (KNS)"
2019-05-10 16:44:59 +02:00
#'
#' # Italian
2019-08-11 19:07:26 +02:00
#' mo_name("CoNS", language = "it")
2019-05-10 16:44:59 +02:00
#' #> "Staphylococcus negativo coagulasi (CoNS)"
#'
#' # Portuguese
2019-08-11 19:07:26 +02:00
#' mo_name("CoNS", language = "pt")
2019-05-10 16:44:59 +02:00
#' #> "Staphylococcus coagulase negativo (CoNS)"
2021-09-29 12:12:35 +02:00
#'
#' # Spanish
#' mo_name("CoNS", language = "es")
#' #> "Staphylococcus coagulasa negativo (SCN)"
2021-12-12 09:42:03 +01:00
get_AMR_locale <- function ( ) {
2020-10-19 17:09:19 +02:00
# AMR versions 1.3.0 and prior used the environmental variable:
2020-09-12 08:49:01 +02:00
if ( ! identical ( " " , Sys.getenv ( " AMR_locale" ) ) ) {
options ( AMR_locale = Sys.getenv ( " AMR_locale" ) )
}
2019-09-12 15:08:53 +02:00
if ( ! is.null ( getOption ( " AMR_locale" , default = NULL ) ) ) {
2020-09-12 08:49:01 +02:00
lang <- getOption ( " AMR_locale" )
if ( lang %in% LANGUAGES_SUPPORTED ) {
return ( lang )
2020-09-03 12:31:48 +02:00
} else {
2021-02-04 16:48:16 +01:00
stop_ ( " unsupported language set as option 'AMR_locale': \"" , lang , " \" - use either " ,
2021-12-12 09:42:03 +01:00
vector_or ( paste0 ( ' "' , LANGUAGES_SUPPORTED , ' " (' , names ( LANGUAGES_SUPPORTED ) , " )" ) , quotes = FALSE ) )
2020-09-03 12:31:48 +02:00
}
2020-09-14 12:21:23 +02:00
} else {
2021-12-12 09:42:03 +01:00
# now check the LANGUAGE system variable - return it if set
2020-09-14 12:21:23 +02:00
if ( ! identical ( " " , Sys.getenv ( " LANGUAGE" ) ) ) {
return ( coerce_language_setting ( Sys.getenv ( " LANGUAGE" ) ) )
}
if ( ! identical ( " " , Sys.getenv ( " LANG" ) ) ) {
return ( coerce_language_setting ( Sys.getenv ( " LANG" ) ) )
}
2018-11-05 13:20:32 +01:00
}
2020-05-16 13:05:47 +02:00
2021-12-12 09:42:03 +01:00
# fallback - automatic determination based on LC_COLLATE
if ( interactive ( ) && message_not_thrown_before ( " get_AMR_locale" , entire_session = TRUE ) ) {
lang <- coerce_language_setting ( Sys.getlocale ( " LC_COLLATE" ) )
if ( lang != " en" ) {
message_ ( " Assuming the " , names ( LANGUAGES_SUPPORTED ) [LANGUAGES_SUPPORTED == lang ] ,
" language for the AMR package. Change this with `options(AMR_locale = \"...\")` or see `?get_AMR_locale()`. " ,
" Supported languages are " , vector_and ( names ( LANGUAGES_SUPPORTED ) , quotes = FALSE ) ,
" . This note will be shown once per session." )
}
return ( lang )
}
2020-12-17 16:22:25 +01:00
coerce_language_setting ( Sys.getlocale ( " LC_COLLATE" ) )
2020-09-14 12:21:23 +02:00
}
coerce_language_setting <- function ( lang ) {
2021-12-12 09:42:03 +01:00
# grepl() with ignore.case = FALSE is 8x faster than %like_case%
2020-10-19 17:09:19 +02:00
if ( grepl ( " ^(English|en_|EN_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-12 15:07:43 +01:00
# as first option to optimise speed
2018-11-05 13:20:32 +01:00
" en"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(German|Deutsch|de_|DE_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" de"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(Dutch|Nederlands|nl_|NL_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" nl"
2021-09-29 12:12:35 +02:00
} else if ( grepl ( " ^(Danish|Dansk|da_|DA_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
" da"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(Spanish|Espa.+ol|es_|ES_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" es"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(Italian|Italiano|it_|IT_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" it"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(French|Fran.+ais|fr_|FR_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" fr"
2020-10-19 17:09:19 +02:00
} else if ( grepl ( " ^(Portuguese|Portugu.+s|pt_|PT_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
2018-11-05 13:20:32 +01:00
" pt"
2021-12-12 09:42:03 +01:00
} else if ( grepl ( " ^(Russian|р у с с |ru_|RU_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
" ru"
} else if ( grepl ( " ^(Swedish|Svenskt|sv_|SV_)" , lang , ignore.case = FALSE , perl = TRUE ) ) {
" sv"
2018-11-12 15:07:43 +01:00
} else {
2018-11-15 12:42:35 +01:00
# other language -> set to English
2018-11-12 15:07:43 +01:00
" en"
2018-11-05 13:20:32 +01:00
}
}
2019-09-12 15:08:53 +02:00
# translate strings based on inst/translations.tsv
2021-05-17 11:26:12 +02:00
translate_AMR <- function ( from ,
2021-12-12 09:42:03 +01:00
language = get_AMR_locale ( ) ,
2021-05-17 11:26:12 +02:00
only_unknown = FALSE ,
only_affect_ab_names = FALSE ,
only_affect_mo_names = FALSE ) {
2020-05-16 13:05:47 +02:00
2019-09-12 15:08:53 +02:00
if ( is.null ( language ) ) {
return ( from )
}
if ( language %in% c ( " en" , " " , NA ) ) {
return ( from )
}
2021-06-04 21:07:55 +02:00
df_trans <- TRANSLATIONS # internal data file
2020-09-03 12:31:48 +02:00
from.bak <- from
from_unique <- unique ( from )
from_unique_translated <- from_unique
2019-09-12 15:08:53 +02:00
2020-09-03 12:31:48 +02:00
stop_ifnot ( language %in% LANGUAGES_SUPPORTED ,
2021-02-04 16:48:16 +01:00
" unsupported language: \"" , language , " \" - use either " ,
vector_or ( LANGUAGES_SUPPORTED , quotes = TRUE ) ,
2020-06-22 11:18:40 +02:00
call = FALSE )
2019-09-12 15:08:53 +02:00
2021-03-04 23:28:32 +01:00
# only keep lines where translation is available for this language
df_trans <- df_trans [which ( ! is.na ( df_trans [ , language , drop = TRUE ] ) ) , , drop = FALSE ]
2021-05-17 11:26:12 +02:00
# and where the original string is not equal to the string in the target language
df_trans <- df_trans [which ( df_trans [ , " pattern" , drop = TRUE ] != df_trans [ , language , drop = TRUE ] ) , , drop = FALSE ]
2019-09-12 15:08:53 +02:00
if ( only_unknown == TRUE ) {
2020-07-08 14:48:06 +02:00
df_trans <- subset ( df_trans , pattern %like% " unknown" )
2019-09-12 15:08:53 +02:00
}
2021-05-17 11:26:12 +02:00
if ( only_affect_ab_names == TRUE ) {
df_trans <- subset ( df_trans , affect_ab_name == TRUE )
}
if ( only_affect_mo_names == TRUE ) {
2021-02-18 23:23:14 +01:00
df_trans <- subset ( df_trans , affect_mo_name == TRUE )
}
2021-05-17 11:26:12 +02:00
if ( NROW ( df_trans ) == 0 ) {
return ( from )
}
2019-09-12 15:08:53 +02:00
2021-03-04 23:28:32 +01:00
# default: case sensitive if value if 'case_sensitive' is missing:
df_trans $ case_sensitive [is.na ( df_trans $ case_sensitive ) ] <- TRUE
# default: not using regular expressions if 'regular_expr' is missing:
df_trans $ regular_expr [is.na ( df_trans $ regular_expr ) ] <- FALSE
2019-09-12 15:08:53 +02:00
# check if text to look for is in one of the patterns
2021-04-07 08:37:42 +02:00
any_form_in_patterns <- tryCatch (
any ( from_unique %like% paste0 ( " (" , paste ( gsub ( " +\\(.*" , " " , df_trans $ pattern ) , collapse = " |" ) , " )" ) ) ,
error = function ( e ) {
warning_ ( " Translation not possible. Please open an issue on GitHub (https://github.com/msberends/AMR/issues)." , call = FALSE )
return ( FALSE )
} )
2019-09-12 15:08:53 +02:00
if ( NROW ( df_trans ) == 0 | ! any_form_in_patterns ) {
return ( from )
}
2020-09-03 12:31:48 +02:00
lapply ( seq_len ( nrow ( df_trans ) ) ,
function ( i ) from_unique_translated <<- gsub ( pattern = df_trans $ pattern [i ] ,
2021-03-04 23:28:32 +01:00
replacement = df_trans [i , language , drop = TRUE ] ,
2020-09-03 12:31:48 +02:00
x = from_unique_translated ,
2021-04-07 08:37:42 +02:00
ignore.case = ! df_trans $ case_sensitive [i ] & df_trans $ regular_expr [i ] ,
2021-03-04 23:28:32 +01:00
fixed = ! df_trans $ regular_expr [i ] ,
perl = df_trans $ regular_expr [i ] ) )
2019-09-12 15:08:53 +02:00
# force UTF-8 for diacritics
2020-09-03 12:31:48 +02:00
from_unique_translated <- enc2utf8 ( from_unique_translated )
# a kind of left join to get all results back
from_unique_translated [match ( from.bak , from_unique ) ]
2019-09-12 15:08:53 +02:00
}