From cb0d74a4f008f3fdade89cca65dbf3ddb87aa37d Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Mon, 10 Sep 2018 15:45:25 +0200
Subject: [PATCH] support for French and Italian, added quote to freq

---
 NEWS.md                           |  5 +--
 R/freq.R                          |  6 ++++
 R/mo.R                            |  2 +-
 R/mo_property.R                   | 51 +++++++++++++++++++++++++++++--
 README.md                         |  2 +-
 man/freq.Rd                       |  6 ++--
 tests/testthat/test-freq.R        |  1 +
 tests/testthat/test-mo_property.R | 22 ++++++++-----
 vignettes/AMR.Rmd                 |  4 +--
 9 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 6af7a6fa..b0879032 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,7 +10,7 @@
   * Column names of datasets `microorganisms` and `septic_patients`
   * All old syntaxes will still work with this version, but will throw warnings
 * Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`.
-* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. They also come with support for German, Dutch, Spanish and Portuguese, and it defaults to the systems locale:
+* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale:
   ```r
   mo_gramstain("E. coli")
   # [1] "Negative rods"
@@ -55,7 +55,8 @@
 * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded
 * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences
 * Fix for joins, where predefined suffices would not be honoured
-* Support for types list and matrix for `freq`
+* Added parameter `quote` to the `freq` function
+* Support for types (classes) list and matrix for `freq`
   ```r
   my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
   freq(my_matrix)
diff --git a/R/freq.R b/R/freq.R
index 0c44dec0..978fb47d 100755
--- a/R/freq.R
+++ b/R/freq.R
@@ -27,6 +27,7 @@
 #' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)}
 #' @param markdown print table in markdown format (this forces \code{nmax = NA})
 #' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})
+#' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes
 #' @param sep a character string to separate the terms when selecting multiple columns
 #' @param f a frequency table
 #' @param n number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties.
@@ -148,6 +149,7 @@ frequency_tbl <- function(x,
                           row.names = TRUE,
                           markdown = FALSE,
                           digits = 2,
+                          quote = FALSE,
                           sep = " ") {
 
   mult.columns <- 0
@@ -429,6 +431,10 @@ frequency_tbl <- function(x,
     }
   }
 
+  if (quote == TRUE) {
+    df$item <- paste0('"', df$item, '"')
+  }
+
   df <- as.data.frame(df, stringsAsFactors = FALSE)
 
   df$percent <- df$count / base::sum(df$count, na.rm = TRUE)
diff --git a/R/mo.R b/R/mo.R
index f0a071e0..ddf2a6dd 100644
--- a/R/mo.R
+++ b/R/mo.R
@@ -127,7 +127,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) {
 
   x_backup <- x
   # translate to English for supported languages of mo_property
-  x <- gsub("(Gruppe|gruppe|groep|grupo)", "group", x)
+  x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x)
   # remove 'empty' genus and species values
   x <- gsub("(no MO)", "", x, fixed = TRUE)
   # remove dots and other non-text in case of "E. coli" except spaces
diff --git a/R/mo_property.R b/R/mo_property.R
index f4c9034a..b6e17279 100644
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -214,9 +214,9 @@ mo_translate <- function(x, language) {
     return(x)
   }
 
-  supported <- c("en", "de", "nl", "es", "pt")
+  supported <- c("en", "de", "nl", "es", "pt", "it", "fr")
   if (!language %in% supported) {
-    stop("Unsupported language: '", language, "' - use one of ", paste0("'", sort(supported), "'", collapse = ", "), call. = FALSE)
+    stop("Unsupported language: '", language, "' - use one of: ", paste0("'", sort(supported), "'", collapse = ", "), call. = FALSE)
   }
 
   case_when(
@@ -302,7 +302,50 @@ mo_translate <- function(x, language) {
       gsub("biotype",          "bi\u00f3tipo", ., fixed = TRUE) %>%
       gsub("vegetative",       "vegetativo", ., fixed = TRUE) %>%
       gsub("([([ ]*?)group",   "\\1grupo", .) %>%
-      gsub("([([ ]*?)Group",   "\\1Grupo", .)
+      gsub("([([ ]*?)Group",   "\\1Grupo", .),
+
+    # Italian
+    language == "it" ~ x %>%
+      gsub("Coagulase Negative Staphylococcus","Staphylococcus negativo coagulasi", ., fixed = TRUE) %>%
+      gsub("Coagulase Positive Staphylococcus","Staphylococcus positivo coagulasi", ., fixed = TRUE) %>%
+      gsub("Beta-haemolytic Streptococcus",    "Streptococcus Beta-emolitico", ., fixed = TRUE) %>%
+      gsub("(no MO)",          "(non MO)", ., fixed = TRUE) %>%
+      gsub("Negative rods",    "Bastoncini Gram-negativi", ., fixed = TRUE) %>%
+      gsub("Negative cocci",   "Cocchi Gram-negativi", ., fixed = TRUE) %>%
+      gsub("Positive rods",    "Bastoncini Gram-positivi", ., fixed = TRUE) %>%
+      gsub("Positive cocci",   "Cocchi Gram-positivi", ., fixed = TRUE) %>%
+      gsub("Parasites",        "Parassiti", ., fixed = TRUE) %>%
+      gsub("Fungi and yeasts", "Funghi e lieviti", ., fixed = TRUE) %>%
+      gsub("Bacteria",         "Batterio", ., fixed = TRUE) %>%
+      gsub("Fungus/yeast",     "Fungo/lievito", ., fixed = TRUE) %>%
+      gsub("Parasite",         "Parassita", ., fixed = TRUE) %>%
+      gsub("biogroup",         "biogruppo", ., fixed = TRUE) %>%
+      gsub("biotype",          "biotipo", ., fixed = TRUE) %>%
+      gsub("vegetative",       "vegetativo", ., fixed = TRUE) %>%
+      gsub("([([ ]*?)group",   "\\1gruppo", .) %>%
+      gsub("([([ ]*?)Group",   "\\1Gruppo", .),
+
+    # French
+    language == "fr" ~ x %>%
+      gsub("Coagulase Negative Staphylococcus","Staphylococcus \u00e0 coagulase n\u00e9gative", ., fixed = TRUE) %>%
+      gsub("Coagulase Positive Staphylococcus","Staphylococcus \u00e0 coagulase positif", ., fixed = TRUE) %>%
+      gsub("Beta-haemolytic Streptococcus",    "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>%
+      gsub("(no MO)",          "(pas MO)", ., fixed = TRUE) %>%
+      gsub("Negative rods",    "Bacilles n\u00e9gatif", ., fixed = TRUE) %>%
+      gsub("Negative cocci",   "Cocci n\u00e9gatif", ., fixed = TRUE) %>%
+      gsub("Positive rods",    "Bacilles positif", ., fixed = TRUE) %>%
+      gsub("Positive cocci",   "Cocci positif", ., fixed = TRUE) %>%
+      # gsub("Parasites",        "Parasites", ., fixed = TRUE) %>%
+      gsub("Fungi and yeasts", "Champignons et levures", ., fixed = TRUE) %>%
+      gsub("Bacteria",         "Bact\u00e9rie", ., fixed = TRUE) %>%
+      gsub("Fungus/yeast",     "Champignon/levure", ., fixed = TRUE) %>%
+      # gsub("Parasite",         "Parasite", ., fixed = TRUE) %>%
+      gsub("biogroup",         "biogroupe", ., fixed = TRUE) %>%
+      # gsub("biotype",          "biotype", ., fixed = TRUE) %>%
+      gsub("vegetative",       "v\u00e9g\u00e9tatif", ., fixed = TRUE) %>%
+      gsub("([([ ]*?)group",   "\\1groupe", .) %>%
+      gsub("([([ ]*?)Group",   "\\1Groupe", .)
+
   )
 
 }
@@ -314,7 +357,9 @@ mo_getlangcode <- function() {
     sys %like% '(Deutsch|German|de_)'       ~ "de",
     sys %like% '(Nederlands|Dutch|nl_)'     ~ "nl",
     sys %like% '(Espa.ol|Spanish|es_)'      ~ "es",
+    sys %like% '(Fran.ais|French|fr_)'      ~ "fr",
     sys %like% '(Portugu.s|Portuguese|pt_)' ~ "pt",
+    sys %like% '(Italiano|Italian|it_)'     ~ "it",
     TRUE                                    ~ "en"
   )
 }
diff --git a/README.md b/README.md
index aba1ff4c..a75579e4 100755
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ This `AMR` package basically does four important things:
    * Use `first_isolate` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute).
      * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them.
    * Use `MDRO` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported.
-   * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish and Portuguese. These functions can be used to add new variables to your data.
+   * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data.
    * The data set `antibiotics` contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like `ab_official` and `ab_tradenames` to look up values. As the `mo_*` functions use `as.mo` internally, the `ab_*` functions use `as.atc` internally so it uses AI to guess your expected result. For example, `ab_official("Fluclox")`, `ab_official("Floxapen")` and `ab_official("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data.
 
 3. It **analyses the data** with convenient functions that use well-known methods.
diff --git a/man/freq.Rd b/man/freq.Rd
index 0a1c399a..781ef25d 100755
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -9,11 +9,11 @@
 \usage{
 frequency_tbl(x, ..., sort.count = TRUE,
   nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE,
-  markdown = FALSE, digits = 2, sep = " ")
+  markdown = FALSE, digits = 2, quote = FALSE, sep = " ")
 
 freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
   na.rm = TRUE, row.names = TRUE, markdown = FALSE, digits = 2,
-  sep = " ")
+  quote = FALSE, sep = " ")
 
 top_freq(f, n)
 
@@ -37,6 +37,8 @@ top_freq(f, n)
 
 \item{digits}{how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})}
 
+\item{quote}{a logical value indicating whether or not strings should be printed with surrounding quotes}
+
 \item{sep}{a character string to separate the terms when selecting multiple columns}
 
 \item{f}{a frequency table}
diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R
index 787b17a5..4e747011 100755
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -20,6 +20,7 @@ test_that("frequency table works", {
   expect_output(print(freq(septic_patients$age, markdown = TRUE), markdown = FALSE))
   expect_output(print(freq(septic_patients$age, markdown = TRUE), markdown = TRUE))
   expect_output(print(freq(septic_patients$age[0])))
+  expect_output(print(freq(septic_patients$age, quote = TRUE)))
 
   # character
   expect_output(print(freq(septic_patients$mo)))
diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R
index de467241..391e73ce 100644
--- a/tests/testthat/test-mo_property.R
+++ b/tests/testthat/test-mo_property.R
@@ -16,13 +16,6 @@ test_that("mo_property works", {
   expect_equal(mo_shortname("S. aga"), "S. agalactiae")
   expect_equal(mo_shortname("S. aga", Lancefield = TRUE), "GBS")
 
-  expect_equal(mo_type("E. coli", language = "de"), "Bakterium")
-
-  expect_equal(mo_type("E. coli", language = "nl"), "Bacterie")
-  expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven")
-
-  expect_error(mo_type("E. coli", language = "INVALID"))
-
   # test integrity
   library(dplyr)
   MOs <- AMR::microorganisms %>% filter(!is.na(mo))
@@ -45,4 +38,19 @@ test_that("mo_property works", {
   expect_gt(sum(tb$c) / nrow(tb), 0.9) # more than 90% of MO code should be identical
   expect_identical(sum(tb$f), nrow(tb)) # all shortnames should be identical
 
+  # check languages
+  expect_equal(mo_type("E. coli", language = "de"), "Bakterium")
+  expect_equal(mo_type("E. coli", language = "nl"), "Bacterie")
+  expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven")
+
+  expect_output(print(mo_gramstain("E. coli", language = "en")))
+  expect_output(print(mo_gramstain("E. coli", language = "de")))
+  expect_output(print(mo_gramstain("E. coli", language = "nl")))
+  expect_output(print(mo_gramstain("E. coli", language = "es")))
+  expect_output(print(mo_gramstain("E. coli", language = "pt")))
+  expect_output(print(mo_gramstain("E. coli", language = "it")))
+  expect_output(print(mo_gramstain("E. coli", language = "fr")))
+
+  expect_error(mo_gramstain("E. coli", language = "UNKNOWN"))
+
 })
diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd
index bbd4d17e..e6dedaf1 100755
--- a/vignettes/AMR.Rmd
+++ b/vignettes/AMR.Rmd
@@ -34,9 +34,9 @@ This `AMR` package basically does four important things:
    * Use `first_isolate` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute).
      * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them.
    * Use `MDRO` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported.
-   * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish and Portuguese. These functions can be used to add new variables to your data.
+   * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data.
    * The data set `antibiotics` contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like `ab_official` and `ab_tradenames` to look up values. As the `mo_*` functions use `as.mo` internally, the `ab_*` functions use `as.atc` internally so it uses AI to guess your expected result. For example, `ab_official("Fluclox")`, `ab_official("Floxapen")` and `ab_official("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data.
-
+   
 3. It **analyses the data** with convenient functions that use well-known methods.
 
    * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *amount* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))