From cb0d74a4f008f3fdade89cca65dbf3ddb87aa37d Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 10 Sep 2018 15:45:25 +0200 Subject: [PATCH] support for French and Italian, added quote to freq --- NEWS.md | 5 +-- R/freq.R | 6 ++++ R/mo.R | 2 +- R/mo_property.R | 51 +++++++++++++++++++++++++++++-- README.md | 2 +- man/freq.Rd | 6 ++-- tests/testthat/test-freq.R | 1 + tests/testthat/test-mo_property.R | 22 ++++++++----- vignettes/AMR.Rmd | 4 +-- 9 files changed, 81 insertions(+), 18 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6af7a6fa..b0879032 100755 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ * Column names of datasets `microorganisms` and `septic_patients` * All old syntaxes will still work with this version, but will throw warnings * Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`. -* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. They also come with support for German, Dutch, Spanish and Portuguese, and it defaults to the systems locale: +* Aliases for existing function `mo_property`: `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies`, `mo_fullname`, `mo_shortname`, `mo_aerobic`, `mo_type` and `mo_gramstain`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale: ```r mo_gramstain("E. coli") # [1] "Negative rods" @@ -55,7 +55,8 @@ * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences * Fix for joins, where predefined suffices would not be honoured -* Support for types list and matrix for `freq` +* Added parameter `quote` to the `freq` function +* Support for types (classes) list and matrix for `freq` ```r my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2)) freq(my_matrix) diff --git a/R/freq.R b/R/freq.R index 0c44dec0..978fb47d 100755 --- a/R/freq.R +++ b/R/freq.R @@ -27,6 +27,7 @@ #' @param row.names a logical value indicating whether row indices should be printed as \code{1:nrow(x)} #' @param markdown print table in markdown format (this forces \code{nmax = NA}) #' @param digits how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")}) +#' @param quote a logical value indicating whether or not strings should be printed with surrounding quotes #' @param sep a character string to separate the terms when selecting multiple columns #' @param f a frequency table #' @param n number of top \emph{n} items to return, use -n for the bottom \emph{n} items. It will include more than \code{n} rows if there are ties. @@ -148,6 +149,7 @@ frequency_tbl <- function(x, row.names = TRUE, markdown = FALSE, digits = 2, + quote = FALSE, sep = " ") { mult.columns <- 0 @@ -429,6 +431,10 @@ frequency_tbl <- function(x, } } + if (quote == TRUE) { + df$item <- paste0('"', df$item, '"') + } + df <- as.data.frame(df, stringsAsFactors = FALSE) df$percent <- df$count / base::sum(df$count, na.rm = TRUE) diff --git a/R/mo.R b/R/mo.R index f0a071e0..ddf2a6dd 100644 --- a/R/mo.R +++ b/R/mo.R @@ -127,7 +127,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x_backup <- x # translate to English for supported languages of mo_property - x <- gsub("(Gruppe|gruppe|groep|grupo)", "group", x) + x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x) # remove 'empty' genus and species values x <- gsub("(no MO)", "", x, fixed = TRUE) # remove dots and other non-text in case of "E. coli" except spaces diff --git a/R/mo_property.R b/R/mo_property.R index f4c9034a..b6e17279 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -214,9 +214,9 @@ mo_translate <- function(x, language) { return(x) } - supported <- c("en", "de", "nl", "es", "pt") + supported <- c("en", "de", "nl", "es", "pt", "it", "fr") if (!language %in% supported) { - stop("Unsupported language: '", language, "' - use one of ", paste0("'", sort(supported), "'", collapse = ", "), call. = FALSE) + stop("Unsupported language: '", language, "' - use one of: ", paste0("'", sort(supported), "'", collapse = ", "), call. = FALSE) } case_when( @@ -302,7 +302,50 @@ mo_translate <- function(x, language) { gsub("biotype", "bi\u00f3tipo", ., fixed = TRUE) %>% gsub("vegetative", "vegetativo", ., fixed = TRUE) %>% gsub("([([ ]*?)group", "\\1grupo", .) %>% - gsub("([([ ]*?)Group", "\\1Grupo", .) + gsub("([([ ]*?)Group", "\\1Grupo", .), + + # Italian + language == "it" ~ x %>% + gsub("Coagulase Negative Staphylococcus","Staphylococcus negativo coagulasi", ., fixed = TRUE) %>% + gsub("Coagulase Positive Staphylococcus","Staphylococcus positivo coagulasi", ., fixed = TRUE) %>% + gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-emolitico", ., fixed = TRUE) %>% + gsub("(no MO)", "(non MO)", ., fixed = TRUE) %>% + gsub("Negative rods", "Bastoncini Gram-negativi", ., fixed = TRUE) %>% + gsub("Negative cocci", "Cocchi Gram-negativi", ., fixed = TRUE) %>% + gsub("Positive rods", "Bastoncini Gram-positivi", ., fixed = TRUE) %>% + gsub("Positive cocci", "Cocchi Gram-positivi", ., fixed = TRUE) %>% + gsub("Parasites", "Parassiti", ., fixed = TRUE) %>% + gsub("Fungi and yeasts", "Funghi e lieviti", ., fixed = TRUE) %>% + gsub("Bacteria", "Batterio", ., fixed = TRUE) %>% + gsub("Fungus/yeast", "Fungo/lievito", ., fixed = TRUE) %>% + gsub("Parasite", "Parassita", ., fixed = TRUE) %>% + gsub("biogroup", "biogruppo", ., fixed = TRUE) %>% + gsub("biotype", "biotipo", ., fixed = TRUE) %>% + gsub("vegetative", "vegetativo", ., fixed = TRUE) %>% + gsub("([([ ]*?)group", "\\1gruppo", .) %>% + gsub("([([ ]*?)Group", "\\1Gruppo", .), + + # French + language == "fr" ~ x %>% + gsub("Coagulase Negative Staphylococcus","Staphylococcus \u00e0 coagulase n\u00e9gative", ., fixed = TRUE) %>% + gsub("Coagulase Positive Staphylococcus","Staphylococcus \u00e0 coagulase positif", ., fixed = TRUE) %>% + gsub("Beta-haemolytic Streptococcus", "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>% + gsub("(no MO)", "(pas MO)", ., fixed = TRUE) %>% + gsub("Negative rods", "Bacilles n\u00e9gatif", ., fixed = TRUE) %>% + gsub("Negative cocci", "Cocci n\u00e9gatif", ., fixed = TRUE) %>% + gsub("Positive rods", "Bacilles positif", ., fixed = TRUE) %>% + gsub("Positive cocci", "Cocci positif", ., fixed = TRUE) %>% + # gsub("Parasites", "Parasites", ., fixed = TRUE) %>% + gsub("Fungi and yeasts", "Champignons et levures", ., fixed = TRUE) %>% + gsub("Bacteria", "Bact\u00e9rie", ., fixed = TRUE) %>% + gsub("Fungus/yeast", "Champignon/levure", ., fixed = TRUE) %>% + # gsub("Parasite", "Parasite", ., fixed = TRUE) %>% + gsub("biogroup", "biogroupe", ., fixed = TRUE) %>% + # gsub("biotype", "biotype", ., fixed = TRUE) %>% + gsub("vegetative", "v\u00e9g\u00e9tatif", ., fixed = TRUE) %>% + gsub("([([ ]*?)group", "\\1groupe", .) %>% + gsub("([([ ]*?)Group", "\\1Groupe", .) + ) } @@ -314,7 +357,9 @@ mo_getlangcode <- function() { sys %like% '(Deutsch|German|de_)' ~ "de", sys %like% '(Nederlands|Dutch|nl_)' ~ "nl", sys %like% '(Espa.ol|Spanish|es_)' ~ "es", + sys %like% '(Fran.ais|French|fr_)' ~ "fr", sys %like% '(Portugu.s|Portuguese|pt_)' ~ "pt", + sys %like% '(Italiano|Italian|it_)' ~ "it", TRUE ~ "en" ) } diff --git a/README.md b/README.md index aba1ff4c..a75579e4 100755 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ This `AMR` package basically does four important things: * Use `first_isolate` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute). * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. * Use `MDRO` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported. - * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish and Portuguese. These functions can be used to add new variables to your data. + * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data. * The data set `antibiotics` contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like `ab_official` and `ab_tradenames` to look up values. As the `mo_*` functions use `as.mo` internally, the `ab_*` functions use `as.atc` internally so it uses AI to guess your expected result. For example, `ab_official("Fluclox")`, `ab_official("Floxapen")` and `ab_official("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data. 3. It **analyses the data** with convenient functions that use well-known methods. diff --git a/man/freq.Rd b/man/freq.Rd index 0a1c399a..781ef25d 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -9,11 +9,11 @@ \usage{ frequency_tbl(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, - markdown = FALSE, digits = 2, sep = " ") + markdown = FALSE, digits = 2, quote = FALSE, sep = " ") freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = FALSE, digits = 2, - sep = " ") + quote = FALSE, sep = " ") top_freq(f, n) @@ -37,6 +37,8 @@ top_freq(f, n) \item{digits}{how many significant digits are to be used for numeric values in the header (not for the items themselves, that depends on \code{\link{getOption}("digits")})} +\item{quote}{a logical value indicating whether or not strings should be printed with surrounding quotes} + \item{sep}{a character string to separate the terms when selecting multiple columns} \item{f}{a frequency table} diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 787b17a5..4e747011 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -20,6 +20,7 @@ test_that("frequency table works", { expect_output(print(freq(septic_patients$age, markdown = TRUE), markdown = FALSE)) expect_output(print(freq(septic_patients$age, markdown = TRUE), markdown = TRUE)) expect_output(print(freq(septic_patients$age[0]))) + expect_output(print(freq(septic_patients$age, quote = TRUE))) # character expect_output(print(freq(septic_patients$mo))) diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index de467241..391e73ce 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -16,13 +16,6 @@ test_that("mo_property works", { expect_equal(mo_shortname("S. aga"), "S. agalactiae") expect_equal(mo_shortname("S. aga", Lancefield = TRUE), "GBS") - expect_equal(mo_type("E. coli", language = "de"), "Bakterium") - - expect_equal(mo_type("E. coli", language = "nl"), "Bacterie") - expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven") - - expect_error(mo_type("E. coli", language = "INVALID")) - # test integrity library(dplyr) MOs <- AMR::microorganisms %>% filter(!is.na(mo)) @@ -45,4 +38,19 @@ test_that("mo_property works", { expect_gt(sum(tb$c) / nrow(tb), 0.9) # more than 90% of MO code should be identical expect_identical(sum(tb$f), nrow(tb)) # all shortnames should be identical + # check languages + expect_equal(mo_type("E. coli", language = "de"), "Bakterium") + expect_equal(mo_type("E. coli", language = "nl"), "Bacterie") + expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven") + + expect_output(print(mo_gramstain("E. coli", language = "en"))) + expect_output(print(mo_gramstain("E. coli", language = "de"))) + expect_output(print(mo_gramstain("E. coli", language = "nl"))) + expect_output(print(mo_gramstain("E. coli", language = "es"))) + expect_output(print(mo_gramstain("E. coli", language = "pt"))) + expect_output(print(mo_gramstain("E. coli", language = "it"))) + expect_output(print(mo_gramstain("E. coli", language = "fr"))) + + expect_error(mo_gramstain("E. coli", language = "UNKNOWN")) + }) diff --git a/vignettes/AMR.Rmd b/vignettes/AMR.Rmd index bbd4d17e..e6dedaf1 100755 --- a/vignettes/AMR.Rmd +++ b/vignettes/AMR.Rmd @@ -34,9 +34,9 @@ This `AMR` package basically does four important things: * Use `first_isolate` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute). * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. * Use `MDRO` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported. - * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, Spanish and Portuguese. These functions can be used to add new variables to your data. + * The data set `microorganisms` contains the family, genus, species, subspecies, colloquial name and Gram stain of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). This enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family` or `mo_gramstain`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data. * The data set `antibiotics` contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like `ab_official` and `ab_tradenames` to look up values. As the `mo_*` functions use `as.mo` internally, the `ab_*` functions use `as.atc` internally so it uses AI to guess your expected result. For example, `ab_official("Fluclox")`, `ab_official("Floxapen")` and `ab_official("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data. - + 3. It **analyses the data** with convenient functions that use well-known methods. * Calculate the resistance (and even co-resistance) of microbial isolates with the `portion_R`, `portion_IR`, `portion_I`, `portion_SI` and `portion_S` functions. Similarly, the *amount* of isolates can be determined with the `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` functions. All these functions can be used [with the `dplyr` package](https://dplyr.tidyverse.org/#usage) (e.g. in conjunction with [`summarise`](https://dplyr.tidyverse.org/reference/summarise.html))