From 7df28bce28ee2ebddd659aadfa06ccb69276d911 Mon Sep 17 00:00:00 2001 From: Matthijs Berends Date: Thu, 8 Jan 2026 10:21:48 +0100 Subject: [PATCH] (v3.0.1.9015) fix translations --- DESCRIPTION | 4 ++-- NEWS.md | 2 +- R/translate.R | 48 +++++++++++++++--------------------------------- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c7adbf117..1b548fe35 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 3.0.1.9014 -Date: 2026-01-07 +Version: 3.0.1.9015 +Date: 2026-01-08 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) data analysis and to work with microbial and antimicrobial properties by diff --git a/NEWS.md b/NEWS.md index a410da661..a1fd2232f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9014 +# AMR 3.0.1.9015 ### New * Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` diff --git a/R/translate.R b/R/translate.R index 60e244def..8cf706247 100755 --- a/R/translate.R +++ b/R/translate.R @@ -264,44 +264,26 @@ translate_into_language <- function(from, } # non-regex part - translate_tokens <- function(tokens) { - patterns <- df_trans$pattern[df_trans$regular_expr == FALSE] - replacements <- df_trans[[lang]][df_trans$regular_expr == FALSE] - matches <- match(tokens, patterns) - tokens[!is.na(matches)] <- replacements[matches[!is.na(matches)]] - tokens + translate_exec <- function(term) { + # sort trans file on length of pattern + trns <- df_trans[order(nchar(df_trans$pattern), decreasing = TRUE), ] + for (i in seq_len(nrow(trns))) { + term <- gsub( + pattern = trns$pattern[i], + replacement = trns[i, lang, drop = TRUE], + x = term, + ignore.case = !trns$case_sensitive[i] & trns$regular_expr[i], + fixed = !trns$regular_expr[i], + perl = trns$regular_expr[i], + ) + } + term } from_unique_translated[order(nchar(from_unique_translated), decreasing = TRUE)] <- vapply( FUN.VALUE = character(1), USE.NAMES = FALSE, from_unique_translated[order(nchar(from_unique_translated), decreasing = TRUE)], - function(x) { - delimiters <- "[ /()]" - split_regex <- paste0("(?<=", delimiters, ")|(?=", delimiters, ")") - tokens <- strsplit(x, split_regex, perl = TRUE)[[1]] - tokens <- translate_tokens(tokens) - out <- paste(tokens, collapse = "") - # also try with those tokens - out <- translate_tokens(out) - out - } - ) - - df_trans_regex <- df_trans[which(df_trans$regular_expr == TRUE), ] - # regex part - lapply( - # starting with longest pattern, since more general translations are shorter, such as 'Group' - order(nchar(df_trans_regex$pattern), decreasing = TRUE), - function(i) { - from_unique_translated <<- gsub( - pattern = df_trans_regex$pattern[i], - replacement = df_trans_regex[i, lang, drop = TRUE], - x = from_unique_translated, - ignore.case = !df_trans_regex$case_sensitive[i], - fixed = FALSE, - perl = TRUE - ) - } + translate_exec ) # force UTF-8 for diacritics