new g.test() and edited freq()

2025-07-08 10:31:53 +02:00 · 2018-07-01 21:40:37 +02:00
parent f7af8a81da
commit 3527894b49
18 changed files with 893 additions and 296 deletions
--- a/vignettes/freq.R
+++ b/vignettes/freq.R
@ -10,20 +10,23 @@ library(AMR)
 # just using base R
 freq(septic_patients$sex)

-# using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe from the dplyr package
 septic_patients$sex %>% freq()

-# do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function from the dplyr package
 septic_patients %>%
  select(sex) %>%
  freq()

+# or the preferred way: using a pipe to pass the variable on to the freq function
+septic_patients %>% freq(sex) # this also shows 'age' in the title
+
+
 ## ---- echo = TRUE--------------------------------------------------------
 freq(septic_patients$sex)

 ## ---- echo = TRUE, results = 'hide'--------------------------------------
-my_patients <- septic_patients %>% 
-  left_join_microorganisms()
+my_patients <- septic_patients %>% left_join_microorganisms()

 ## ---- echo = TRUE--------------------------------------------------------
 colnames(microorganisms)
@ -33,26 +36,21 @@ dim(septic_patients)
 dim(my_patients)

 ## ---- echo = TRUE--------------------------------------------------------
-my_patients %>%
-  select(genus, species) %>%
-  freq()
+my_patients %>% freq(genus, species)

 ## ---- echo = TRUE--------------------------------------------------------
 # # get age distribution of unique patients
 septic_patients %>% 
  distinct(patient_id, .keep_all = TRUE) %>% 
-  select(age) %>% 
-  freq(nmax = 5)
+  freq(age, nmax = 5)

 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq()
+  freq(hospital_id)

 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq(sort.count = TRUE)
+  freq(hospital_id, sort.count = TRUE)

 ## ---- echo = TRUE--------------------------------------------------------
 septic_patients %>%
@ -65,29 +63,24 @@ septic_patients %>%
  freq(nmax = 5)

 ## ---- echo = TRUE--------------------------------------------------------
-septic_patients %>%
-  select(amox) %>% 
-  freq(na.rm = FALSE)
-
-## ---- echo = TRUE--------------------------------------------------------
-septic_patients %>%
-  select(hospital_id) %>% 
-  freq(row.names = FALSE)
-
-## ---- echo = TRUE--------------------------------------------------------
-septic_patients %>%
-  select(hospital_id) %>% 
-  freq(markdown = TRUE)
-
-## ---- echo = TRUE--------------------------------------------------------
-my_df <- septic_patients %>%
-  select(hospital_id) %>% 
-  freq(as.data.frame = TRUE)
-
-my_df
-
+my_df <- septic_patients %>% freq(age)
 class(my_df)

+## ---- echo = TRUE--------------------------------------------------------
+dim(my_df)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(amox, na.rm = FALSE)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id, row.names = FALSE)
+
+## ---- echo = TRUE--------------------------------------------------------
+septic_patients %>%
+  freq(hospital_id, markdown = TRUE)
+
 ## ---- echo = FALSE-------------------------------------------------------
 # this will print "2018" in 2018, and "2018-yyyy" after 2018.
 yrs <- c(2018:format(Sys.Date(), "%Y"))
--- a/vignettes/freq.Rmd
+++ b/vignettes/freq.Rmd
@ -30,13 +30,17 @@ To only show and quickly review the content of one variable, you can just select
 # just using base R
 freq(septic_patients$sex)

-# using base R to select the variable and pass it on with a pipe
+# using base R to select the variable and pass it on with a pipe from the dplyr package
 septic_patients$sex %>% freq()

-# do it all with pipes, using the `select` function of the dplyr package
+# do it all with pipes, using the `select` function from the dplyr package
 septic_patients %>%
  select(sex) %>%
  freq()
+
+# or the preferred way: using a pipe to pass the variable on to the freq function
+septic_patients %>% freq(sex) # this also shows 'age' in the title
+
 ```
 This will all lead to the following table:
 ```{r, echo = TRUE}
@ -50,8 +54,7 @@ Multiple variables will be pasted into one variable to review individual cases,

 For illustration, we could add some more variables to the `septic_patients` dataset to learn about bacterial properties:
 ```{r, echo = TRUE, results = 'hide'}
-my_patients <- septic_patients %>% 
-  left_join_microorganisms()
+my_patients <- septic_patients %>% left_join_microorganisms()
 ```
 Now all variables of the `microorganisms` dataset have been joined to the `septic_patients` dataset. The `microorganisms` dataset consists of the following variables:
 ```{r, echo = TRUE}
@ -66,9 +69,7 @@ dim(my_patients)

 So now the `genus` and `species` variables are available. A frequency table of these combined variables can be created like this:
 ```{r, echo = TRUE}
-my_patients %>%
-  select(genus, species) %>%
-  freq()
+my_patients %>% freq(genus, species)
 ```

 ## Frequencies of numeric values
@ -81,8 +82,7 @@ In case of numeric values (like integers, doubles, etc.) additional descriptive
 # # get age distribution of unique patients
 septic_patients %>% 
  distinct(patient_id, .keep_all = TRUE) %>% 
-  select(age) %>% 
-  freq(nmax = 5)
+  freq(age, nmax = 5)
 ```

 So the following properties are determined, where `NA` values are always ignored:
@ -109,16 +109,14 @@ Frequencies of factors will be sorted on factor level instead of item count by d

 ```{r, echo = TRUE}
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq()
+  freq(hospital_id)
 ```

 ... with this, where items are now sorted on count:

 ```{r, echo = TRUE}
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq(sort.count = TRUE)
+  freq(hospital_id, sort.count = TRUE)
 ```

 All classes will be printed into the header. Variables with the new `rsi` class of this AMR package are actually ordered factors and have three classes (look at `Class` in the header):
@ -139,6 +137,21 @@ septic_patients %>%
  freq(nmax = 5)
 ```

+## Assigning a frequency table to an object
+
+A frequency table is actaually a regular `data.frame`, with the exception that it contains an additional class.
+
+```{r, echo = TRUE}
+my_df <- septic_patients %>% freq(age)
+class(my_df)
+```
+
+Because of this additional class, a frequency table prints like the examples above. But the object itself contains the complete table without a row limitation:
+
+```{r, echo = TRUE}
+dim(my_df)
+```
+
 ## Additional parameters

 ### Parameter `na.rm`
@ -146,8 +159,7 @@ With the `na.rm` parameter (defaults to `TRUE`, but they will always be shown in

 ```{r, echo = TRUE}
 septic_patients %>%
-  select(amox) %>% 
-  freq(na.rm = FALSE)
+  freq(amox, na.rm = FALSE)
 ```

 ### Parameter `row.names`
@ -155,8 +167,7 @@ The default frequency tables shows row indices. To remove them, use `row.names =

 ```{r, echo = TRUE}
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq(row.names = FALSE)
+  freq(hospital_id, row.names = FALSE)
 ```

 ### Parameter `markdown`
@ -164,21 +175,7 @@ The `markdown` parameter can be used in reports created with R Markdown. This wi

 ```{r, echo = TRUE}
 septic_patients %>%
-  select(hospital_id) %>% 
-  freq(markdown = TRUE)
-```
-
-### Parameter `as.data.frame`
-With the `as.data.frame` parameter you can assign the frequency table to an object, or just print it as a `data.frame` to the console:
-
-```{r, echo = TRUE}
-my_df <- septic_patients %>%
-  select(hospital_id) %>% 
-  freq(as.data.frame = TRUE)
-
-my_df
-
-class(my_df)
+  freq(hospital_id, markdown = TRUE)
 ```

 ----