diff --git a/DESCRIPTION b/DESCRIPTION index 95d01d65e..c7adbf117 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: AMR -Version: 3.0.1.9013 +Version: 3.0.1.9014 Date: 2026-01-07 Title: Antimicrobial Resistance Data Analysis Description: Functions to simplify and standardise antimicrobial resistance (AMR) diff --git a/NEWS.md b/NEWS.md index 46bdb3904..a410da661 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 3.0.1.9013 +# AMR 3.0.1.9014 ### New * Integration with the **tidymodels** framework to allow seamless use of SIR, MIC and disk data in modelling pipelines via `recipes` diff --git a/R/translate.R b/R/translate.R index cb5da72d1..60e244def 100755 --- a/R/translate.R +++ b/R/translate.R @@ -263,6 +263,30 @@ translate_into_language <- function(from, df_trans$pattern[df_trans$regular_expr == TRUE] <- gsub("$$", "$", df_trans$pattern[df_trans$regular_expr == TRUE], fixed = TRUE) } + # non-regex part + translate_tokens <- function(tokens) { + patterns <- df_trans$pattern[df_trans$regular_expr == FALSE] + replacements <- df_trans[[lang]][df_trans$regular_expr == FALSE] + matches <- match(tokens, patterns) + tokens[!is.na(matches)] <- replacements[matches[!is.na(matches)]] + tokens + } + from_unique_translated[order(nchar(from_unique_translated), decreasing = TRUE)] <- vapply( + FUN.VALUE = character(1), + USE.NAMES = FALSE, + from_unique_translated[order(nchar(from_unique_translated), decreasing = TRUE)], + function(x) { + delimiters <- "[ /()]" + split_regex <- paste0("(?<=", delimiters, ")|(?=", delimiters, ")") + tokens <- strsplit(x, split_regex, perl = TRUE)[[1]] + tokens <- translate_tokens(tokens) + out <- paste(tokens, collapse = "") + # also try with those tokens + out <- translate_tokens(out) + out + } + ) + df_trans_regex <- df_trans[which(df_trans$regular_expr == TRUE), ] # regex part lapply( @@ -279,26 +303,6 @@ translate_into_language <- function(from, ) } ) - # non-regex part - translate_tokens <- function(tokens) { - patterns <- df_trans$pattern[df_trans$regular_expr == FALSE] - replacements <- df_trans[[lang]][df_trans$regular_expr == FALSE] - matches <- match(tokens, patterns) - tokens[!is.na(matches)] <- replacements[matches[!is.na(matches)]] - tokens - } - from_unique_translated <- vapply( - FUN.VALUE = character(1), - USE.NAMES = FALSE, - from_unique_translated, - function(x) { - delimiters <- "[ /()]" - split_regex <- paste0("(?<=", delimiters, ")|(?=", delimiters, ")") - tokens <- strsplit(x, split_regex, perl = TRUE)[[1]] - tokens <- translate_tokens(tokens) - paste(tokens, collapse = "") - } - ) # force UTF-8 for diacritics from_unique_translated <- enc2utf8(from_unique_translated)