Skip to content

Commit 6021338

Browse files
authored
Merge pull request #251 from tidymodels/develop
Develop
2 parents d48bacb + 1b7f4aa commit 6021338

File tree

91 files changed

+8230
-932
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+8230
-932
lines changed

.Rbuildignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
^docs*
1111
^CONDUCT\.md$
1212
^README\.md$
13-
^NEWS\.md$
1413
^cran-comments\.md$
1514
^_build\.sh$
1615
^appveyor\.yml$

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ latex: false
1111
env:
1212
global:
1313
- CRAN: http://cran.rstudio.com
14+
- VDIFFR_RUN_TESTS: false
1415

1516
notifications:
1617
email:

DESCRIPTION

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: infer
22
Type: Package
33
Title: Tidy Statistical Inference
4-
Version: 0.4.1
4+
Version: 0.5.0
55
Authors@R: c(
66
person("Andrew", "Bray", email = "[email protected]", role = c("aut", "cre")),
77
person("Chester", "Ismay", email = "[email protected]", role = "aut"),
@@ -28,7 +28,8 @@ Imports:
2828
ggplot2,
2929
magrittr,
3030
glue (>= 1.3.0),
31-
grDevices
31+
grDevices,
32+
purrr
3233
Depends:
3334
R (>= 3.1.2)
3435
Suggests:
@@ -39,7 +40,8 @@ Suggests:
3940
nycflights13,
4041
stringr,
4142
testthat,
42-
covr
43+
covr,
44+
vdiffr
4345
URL: https://github.com/tidymodels/infer
4446
BugReports: https://github.com/tidymodels/infer/issues
4547
Roxygen: list(markdown = TRUE)

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,17 @@ importFrom(ggplot2,ylab)
4646
importFrom(glue,glue_collapse)
4747
importFrom(magrittr,"%>%")
4848
importFrom(methods,hasArg)
49+
importFrom(purrr,compact)
4950
importFrom(rlang,"!!")
5051
importFrom(rlang,":=")
5152
importFrom(rlang,enquo)
5253
importFrom(rlang,eval_tidy)
5354
importFrom(rlang,f_lhs)
5455
importFrom(rlang,f_rhs)
56+
importFrom(rlang,get_expr)
5557
importFrom(rlang,quo)
5658
importFrom(rlang,sym)
59+
importFrom(stats,as.formula)
5760
importFrom(stats,dchisq)
5861
importFrom(stats,df)
5962
importFrom(stats,dnorm)

NEWS.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
# infer 0.5.0
2+
3+
## Breaking changes
4+
5+
- `shade_confidence_interval()` now plots vertical lines starting from zero (previously - from the bottom of a plot) (#234).
6+
- `shade_p_value()` now uses "area under the curve" approach to shading (#229).
7+
8+
## Other
9+
10+
- Updated `chisq_test()` to take arguments in a response/explanatory format, perform goodness of fit tests, and default to the approximation approach (#241).
11+
- Updated `chisq_stat()` to do goodness of fit (#241).
12+
- Make interface to `hypothesize()` clearer by adding the options for the point null parameters to the function signature (#242).
13+
- Manage `infer` class more systematically (#219).
14+
- Use `vdiffr` for plot testing (#221).
15+
116
# infer 0.4.1
217

318
- Added Evgeni Chasnovski as author for his incredible work on refactoring the package and providing excellent support.

R/calculate.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ calculate <- function(x,
108108
)
109109
}
110110
# else {
111-
# class(result) <- append("infer", class(result))
111+
# result <- append_infer_class(result)
112112
# }
113113

114114
result <- copy_attrs(to = result, from = x)
@@ -232,12 +232,12 @@ calc_impl.Chisq <- function(type, x, order, ...) {
232232
p_levels <- get_par_levels(x)
233233
x %>%
234234
dplyr::summarize(
235-
stat = stats::chisq.test(
235+
stat = suppressWarnings(stats::chisq.test(
236236
# Ensure correct ordering of parameters
237237
table(!!(attr(x, "response")))[p_levels],
238238
p = attr(x, "params")
239239
)$stat
240-
)
240+
))
241241
} else {
242242
# Straight from `specify()`
243243
stop_glue(

R/generate.R

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,7 @@ bootstrap <- function(x, reps = 1, ...) {
145145
result <- rep_sample_n(x, size = nrow(x), replace = TRUE, reps = reps)
146146
result <- copy_attrs(to = result, from = x)
147147

148-
class(result) <- append("infer", class(result))
149-
150-
result
148+
append_infer_class(result)
151149
}
152150

153151
#' @importFrom dplyr bind_rows group_by
@@ -159,9 +157,7 @@ permute <- function(x, reps = 1, ...) {
159157

160158
df_out <- copy_attrs(to = df_out, from = x)
161159

162-
class(df_out) <- append("infer", class(df_out))
163-
164-
df_out
160+
append_infer_class(df_out)
165161
}
166162

167163
permute_once <- function(x, ...) {
@@ -195,7 +191,7 @@ simulate <- function(x, reps = 1, ...) {
195191

196192
rep_tbl <- copy_attrs(to = rep_tbl, from = x)
197193

198-
class(rep_tbl) <- append("infer", class(rep_tbl))
199-
200-
dplyr::group_by(rep_tbl, replicate)
194+
rep_tbl <- dplyr::group_by(rep_tbl, replicate)
195+
196+
append_infer_class(rep_tbl)
201197
}

R/hypothesize.R

Lines changed: 38 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33
#' @param x A data frame that can be coerced into a [tibble][tibble::tibble].
44
#' @param null The null hypothesis. Options include `"independence"` and
55
#' `"point"`.
6-
#' @param ... Arguments passed to downstream functions.
6+
#' @param p The true proportion of successes (a number between 0 and 1). To be used with point null hypotheses when the specified response
7+
#' variable is categorical.
8+
#' @param mu The true mean (any numerical value). To be used with point null
9+
#' hypotheses when the specified response variable is continuous.
10+
#' @param med The true median (any numerical value). To be used with point null
11+
#' hypotheses when the specified response variable is continuous.
12+
#' @param sigma The true standard deviation (any numerical value). To be used with
13+
#' point null hypotheses.
714
#'
815
#' @return A tibble containing the response (and explanatory, if specified)
916
#' variable data with parameter information stored as well.
@@ -17,71 +24,43 @@
1724
#' generate(reps = 100, type = "permute") %>%
1825
#' calculate(stat = "F")
1926
#'
27+
#' @importFrom purrr compact
2028
#' @export
21-
hypothesize <- function(x, null, ...) {
22-
hypothesize_checks(x, null)
29+
hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) {
2330

31+
# Custom logic, because using match.arg() would give a default value when
32+
# the user didn't specify anything.
33+
null <- match_null_hypothesis(null)
2434
attr(x, "null") <- null
2535

26-
dots <- list(...)
27-
28-
if ((null == "point") && (length(dots) == 0)) {
29-
stop_glue(
30-
"Provide a parameter and a value to check such as `mu = 30` for the ",
31-
"point hypothesis."
32-
)
33-
}
34-
35-
if ((null == "independence") && (length(dots) > 0)) {
36-
warning_glue(
37-
"Parameter values are not specified when testing that two variables are ",
38-
"independent."
39-
)
40-
}
41-
42-
if ((length(dots) > 0) && (null == "point")) {
43-
params <- parse_params(dots, x)
44-
attr(x, "params") <- params
45-
46-
if (any(grepl("p.", attr(attr(x, "params"), "names")))) {
47-
# simulate instead of bootstrap based on the value of `p` provided
48-
attr(x, "type") <- "simulate"
49-
} else {
50-
attr(x, "type") <- "bootstrap"
51-
}
36+
hypothesize_checks(x, null)
5237

53-
}
38+
dots <- compact(list(p = p, mu = mu, med = med, sigma = sigma))
5439

55-
if (!is.null(null) && (null == "independence")) {
56-
attr(x, "type") <- "permute"
57-
}
40+
switch(
41+
null,
42+
independence = {
43+
params <- sanitize_hypothesis_params_independence(dots)
44+
attr(x, "type") <- "permute"
45+
},
46+
point = {
47+
params <- sanitize_hypothesis_params_point(dots, x)
48+
attr(x, "params") <- unlist(params)
5849

59-
# Check one proportion test set up correctly
60-
if (null == "point") {
61-
if (is.factor(response_variable(x))) {
62-
if (!any(grepl("p", attr(attr(x, "params"), "names")))) {
63-
stop_glue(
64-
'Testing one categorical variable requires `p` to be used as a ',
65-
'parameter.'
66-
)
50+
if (!is.null(params$p)) {
51+
# simulate instead of bootstrap based on the value of `p` provided
52+
attr(x, "type") <- "simulate"
53+
} else {
54+
# Check one proportion test set up correctly
55+
if (is.factor(response_variable(x))) {
56+
stop_glue(
57+
'Testing one categorical variable requires `p` to be used as a ',
58+
'parameter.'
59+
)
60+
}
61+
attr(x, "type") <- "bootstrap"
6762
}
6863
}
69-
}
70-
71-
# Check one numeric test set up correctly
72-
## Not currently able to reach in testing as other checks
73-
## already produce errors
74-
# if (null == "point") {
75-
# if (
76-
# !is.factor(response_variable(x))
77-
# & !any(grepl("mu|med|sigma", attr(attr(x, "params"), "names")))
78-
# ) {
79-
# stop_glue(
80-
# 'Testing one numerical variable requires one of ',
81-
# '`mu`, `med`, or `sd` to be used as a parameter.'
82-
# )
83-
# }
84-
# }
85-
86-
tibble::as_tibble(x)
64+
)
65+
append_infer_class(tibble::as_tibble(x))
8766
}

R/infer.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ NULL
1717
if (getRversion() >= "2.15.1") {
1818
utils::globalVariables(
1919
c(
20-
"prop", "stat", "value", "x", "..density..", "statistic", ".",
20+
"prop", "stat", "value", "x", "y", "..density..", "statistic", ".",
2121
"parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density",
2222
"denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat",
2323
"total_suc", "explan", "probs", "conf.low", "conf.high"

R/shade_confidence_interval.R

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#' Add information about confidence interval
2+
#'
3+
#' `shade_confidence_interval()` plots confidence interval region on top of the
4+
#' [visualize()] output. It should be used as \\{ggplot2\\} layer function (see
5+
#' examples). `shade_ci()` is its alias.
6+
#'
7+
#' @param endpoints A 2 element vector or a 1 x 2 data frame containing the
8+
#' lower and upper values to be plotted. Most useful for visualizing
9+
#' conference intervals.
10+
#' @param color A character or hex string specifying the color of the
11+
#' end points as a vertical lines on the plot.
12+
#' @param fill A character or hex string specifying the color to shade the
13+
#' confidence interval. If `NULL` then no shading is actually done.
14+
#' @param ... Other arguments passed along to \\{ggplot2\\} functions.
15+
#' @return A list of \\{ggplot2\\} objects to be added to the `visualize()`
16+
#' output.
17+
#'
18+
#' @seealso [shade_p_value()] to add information about p-value region.
19+
#'
20+
#' @examples
21+
#' viz_plot <- mtcars %>%
22+
#' dplyr::mutate(am = factor(am)) %>%
23+
#' specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
24+
#' hypothesize(null = "independence") %>%
25+
#' generate(reps = 100, type = "permute") %>%
26+
#' calculate(stat = "t", order = c("1", "0")) %>%
27+
#' visualize(method = "both")
28+
#'
29+
#' viz_plot + shade_confidence_interval(c(-1.5, 1.5))
30+
#' viz_plot + shade_confidence_interval(c(-1.5, 1.5), fill = NULL)
31+
#'
32+
#' @name shade_confidence_interval
33+
NULL
34+
35+
#' @rdname shade_confidence_interval
36+
#' @export
37+
shade_confidence_interval <- function(endpoints, color = "mediumaquamarine",
38+
fill = "turquoise", ...) {
39+
endpoints <- impute_endpoints(endpoints)
40+
check_shade_confidence_interval_args(color, fill)
41+
42+
res <- list()
43+
if (is.null(endpoints)) {
44+
return(res)
45+
}
46+
47+
if (!is.null(fill)) {
48+
res <- c(
49+
res, list(
50+
ggplot2::geom_rect(
51+
data = data.frame(endpoints[1]),
52+
fill = fill, alpha = 0.6,
53+
aes(xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf),
54+
inherit.aes = FALSE,
55+
...
56+
)
57+
)
58+
)
59+
}
60+
61+
c(
62+
res,
63+
list(
64+
ggplot2::geom_segment(
65+
data = data.frame(x = endpoints),
66+
aes(x = x, xend = x, y = 0, yend = Inf),
67+
colour = color, size = 2,
68+
inherit.aes = FALSE
69+
)
70+
)
71+
)
72+
}
73+
74+
#' @rdname shade_confidence_interval
75+
#' @export
76+
shade_ci <- shade_confidence_interval

0 commit comments

Comments
 (0)