Skip to content

Commit 9d36b70

Browse files
committed
Add back in built vignettes
1 parent 1917c89 commit 9d36b70

15 files changed

+5298
-0
lines changed

inst/doc/chisq_test.R

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
## ----include=FALSE-------------------------------------------------------
2+
knitr::opts_chunk$set(fig.width = 8, fig.height = 3)
3+
4+
## ----message=FALSE, warning=FALSE----------------------------------------
5+
library(nycflights13)
6+
library(dplyr)
7+
library(ggplot2)
8+
library(stringr)
9+
library(infer)
10+
set.seed(2017)
11+
fli_small <- flights %>%
12+
na.omit() %>%
13+
sample_n(size = 500) %>%
14+
mutate(season = case_when(
15+
month %in% c(10:12, 1:3) ~ "winter",
16+
month %in% c(4:9) ~ "summer"
17+
)) %>%
18+
mutate(day_hour = case_when(
19+
between(hour, 1, 12) ~ "morning",
20+
between(hour, 13, 24) ~ "not morning"
21+
)) %>%
22+
select(arr_delay, dep_delay, season,
23+
day_hour, origin, carrier)
24+
25+
## ------------------------------------------------------------------------
26+
obs_chisq <- fli_small %>%
27+
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
28+
calculate(stat = "Chisq")
29+
30+
## ------------------------------------------------------------------------
31+
obs_chisq <- fli_small %>%
32+
chisq_test(formula = origin ~ season) %>%
33+
dplyr::select(statistic)
34+
35+
## ------------------------------------------------------------------------
36+
obs_chisq <- fli_small %>%
37+
chisq_stat(formula = origin ~ season)
38+
39+
## ------------------------------------------------------------------------
40+
chisq_null_distn <- fli_small %>%
41+
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
42+
hypothesize(null = "independence") %>%
43+
generate(reps = 1000, type = "permute") %>%
44+
calculate(stat = "Chisq")
45+
chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")
46+
47+
## ------------------------------------------------------------------------
48+
chisq_null_distn %>%
49+
get_pvalue(obs_stat = obs_chisq, direction = "greater")
50+
51+
## ------------------------------------------------------------------------
52+
fli_small %>%
53+
specify(origin ~ season) %>%
54+
hypothesize(null = "independence") %>%
55+
# generate() ## Not used for theoretical
56+
calculate(stat = "Chisq") %>%
57+
visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")
58+
59+
## ----eval=FALSE----------------------------------------------------------
60+
# fli_small %>%
61+
# specify(origin ~ season) %>% %>% # alt: response = origin, explanatory = season
62+
# hypothesize(null = "independence") %>%
63+
# generate(reps = 1000, type = "permute") %>%
64+
# calculate(stat = "Chisq") %>%
65+
# visualize(method = "both", obs_stat = obs_chisq, direction = "right")
66+
67+
## ----echo=FALSE----------------------------------------------------------
68+
# To use same distribution calculated above
69+
chisq_null_distn %>%
70+
visualize(method = "both", obs_stat = obs_chisq, direction = "right")
71+
72+
## ------------------------------------------------------------------------
73+
fli_small %>%
74+
chisq_test(formula = origin ~ season) %>%
75+
dplyr::select(p_value) %>%
76+
dplyr::pull()
77+

inst/doc/chisq_test.Rmd

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
---
2+
title: "Chi-squared test example using `nycflights13` `flights` data"
3+
author: "Chester Ismay"
4+
date: "`r Sys.Date()`"
5+
output:
6+
rmarkdown::html_vignette:
7+
df_print: kable
8+
vignette: |
9+
%\VignetteIndexEntry{Chi-squared test flights example}
10+
%\VignetteEngine{knitr::rmarkdown}
11+
%\VignetteEncoding{UTF-8}
12+
---
13+
14+
```{r include=FALSE}
15+
knitr::opts_chunk$set(fig.width = 8, fig.height = 3)
16+
```
17+
18+
**Note**: The `type` argument in `generate()` is automatically filled based on the entries for `specify()` and
19+
`hypothesize()`. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
20+
21+
## Data preparation
22+
23+
```{r message=FALSE, warning=FALSE}
24+
library(nycflights13)
25+
library(dplyr)
26+
library(ggplot2)
27+
library(stringr)
28+
library(infer)
29+
set.seed(2017)
30+
fli_small <- flights %>%
31+
na.omit() %>%
32+
sample_n(size = 500) %>%
33+
mutate(season = case_when(
34+
month %in% c(10:12, 1:3) ~ "winter",
35+
month %in% c(4:9) ~ "summer"
36+
)) %>%
37+
mutate(day_hour = case_when(
38+
between(hour, 1, 12) ~ "morning",
39+
between(hour, 13, 24) ~ "not morning"
40+
)) %>%
41+
select(arr_delay, dep_delay, season,
42+
day_hour, origin, carrier)
43+
```
44+
45+
* Two numeric - `arr_delay`, `dep_delay`
46+
* Two categories
47+
- `season` (`"winter"`, `"summer"`),
48+
- `day_hour` (`"morning"`, `"not morning"`)
49+
* Three categories - `origin` (`"EWR"`, `"JFK"`, `"LGA"`)
50+
* Sixteen categories - `carrier`
51+
52+
***
53+
54+
# One numerical variable, one categorical (2 levels)
55+
56+
## Calculate observed statistic
57+
58+
The recommended approach is to use `specify() %>% calculate()`:
59+
60+
```{r}
61+
obs_chisq <- fli_small %>%
62+
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
63+
calculate(stat = "Chisq")
64+
```
65+
66+
The observed $\chi^2$ statistic is `r obs_chisq`.
67+
68+
Or using `chisq_test` in `infer`
69+
70+
```{r}
71+
obs_chisq <- fli_small %>%
72+
chisq_test(formula = origin ~ season) %>%
73+
dplyr::select(statistic)
74+
```
75+
76+
Again, the observed $\chi^2$ statistic is `r obs_chisq`.
77+
78+
Or using another shortcut function in `infer`:
79+
80+
```{r}
81+
obs_chisq <- fli_small %>%
82+
chisq_stat(formula = origin ~ season)
83+
```
84+
85+
Lastly, the observed $\chi^2$ statistic is `r obs_chisq`.
86+
87+
## Randomization approach to $\chi^2$-statistic
88+
89+
```{r}
90+
chisq_null_distn <- fli_small %>%
91+
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
92+
hypothesize(null = "independence") %>%
93+
generate(reps = 1000, type = "permute") %>%
94+
calculate(stat = "Chisq")
95+
chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")
96+
```
97+
98+
## Calculate the randomization-based $p$-value
99+
100+
```{r}
101+
chisq_null_distn %>%
102+
get_pvalue(obs_stat = obs_chisq, direction = "greater")
103+
```
104+
105+
106+
## Theoretical distribution
107+
108+
```{r }
109+
fli_small %>%
110+
specify(origin ~ season) %>%
111+
hypothesize(null = "independence") %>%
112+
# generate() ## Not used for theoretical
113+
calculate(stat = "Chisq") %>%
114+
visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")
115+
```
116+
117+
## Overlay appropriate $\chi^2$ distribution on top of permuted statistics
118+
119+
```{r eval=FALSE}
120+
fli_small %>%
121+
specify(origin ~ season) %>% %>% # alt: response = origin, explanatory = season
122+
hypothesize(null = "independence") %>%
123+
generate(reps = 1000, type = "permute") %>%
124+
calculate(stat = "Chisq") %>%
125+
visualize(method = "both", obs_stat = obs_chisq, direction = "right")
126+
```
127+
128+
```{r echo=FALSE}
129+
# To use same distribution calculated above
130+
chisq_null_distn %>%
131+
visualize(method = "both", obs_stat = obs_chisq, direction = "right")
132+
```
133+
134+
135+
## Compute theoretical p-value
136+
137+
```{r}
138+
fli_small %>%
139+
chisq_test(formula = origin ~ season) %>%
140+
dplyr::select(p_value) %>%
141+
dplyr::pull()
142+
```
143+

0 commit comments

Comments
 (0)