From 64e0d3aec872039fb572fe92fa2eef8a4d6acb68 Mon Sep 17 00:00:00 2001 From: Jules <54960783+juleswg23@users.noreply.github.com> Date: Thu, 3 Jul 2025 09:45:41 -0400 Subject: [PATCH 1/2] one way to remove scipy dependency --- gt_extras/plotting.py | 77 ++++++++++++++++++++++++++------ gt_extras/tests/test_plotting.py | 4 +- 2 files changed, 65 insertions(+), 16 deletions(-) diff --git a/gt_extras/plotting.py b/gt_extras/plotting.py index c6906bb2..02f410ff 100644 --- a/gt_extras/plotting.py +++ b/gt_extras/plotting.py @@ -20,7 +20,7 @@ from svg import SVG, Line, Rect, Text -from scipy.stats import t, sem, tmean +import math __all__ = [ @@ -396,7 +396,6 @@ def gt_plt_conf_int( gt: GT, column: SelectExpr, ci_columns: SelectExpr | None = None, - ci: float = 0.95, # or min_width? see: https://github.com/posit-dev/gt-extras/issues/53 width: float = 100, height: float = 30, @@ -431,10 +430,8 @@ def gt_plt_conf_int( ci_columns Optional columns representing the left/right confidence intervals of your sample. If `None`, - the confidence interval will be computed from the data in `column` using a t-distribution. - - ci - The confidence level to use when computing the interval (if `ci_columns` is `None`). + the confidence interval will be computed from the data in `column` using a t-distribution + for a confidence interval of `0.95`. width The width of the confidence interval plot in pixels. @@ -623,17 +620,69 @@ def _make_conf_int_html( "since ci_columns were not given." ) + # def _compute_mean_and_conf_int(val): + # if val is None or not isinstance(val, list) or len(val) == 0: + # return (None, None, None) + # mean = tmean(val) + # conf_int = t.interval( + # ci, + # len(val) - 1, + # loc=mean, + # scale=sem(val), + # ) + # return (mean, conf_int[0], conf_int[1]) + def _compute_mean_and_conf_int(val): if val is None or not isinstance(val, list) or len(val) == 0: return (None, None, None) - mean = tmean(val) - conf_int = t.interval( - ci, - len(val) - 1, - loc=mean, - scale=sem(val), - ) - return (mean, conf_int[0], conf_int[1]) + + # Compute the mean + m = sum(val) / len(val) + + # Compute the standard deviation + variance = sum((x - m) ** 2 for x in val) / (len(val) - 1) + std_dev = math.sqrt(variance) + + # Compute the standard error of the mean + sem = std_dev / math.sqrt(len(val)) + + # Compute the critical t-value for the given confidence interval + t_critical = _compute_95_t_critical(len(val) - 1) + + # Compute the confidence interval + margin_of_error = t_critical * sem + conf_int = (m - margin_of_error, m + margin_of_error) + + return (m, conf_int[0], conf_int[1]) + + def _compute_95_t_critical(df): + # Approximation for the inverse CDF of the t-distribution + if df <= 30: + # Simplified lookup for small degrees of freedom + # This is the best alternative to scipy.stats I could come up with + t_table = { + 1: 12.706, + 2: 4.303, + 3: 3.182, + 4: 2.776, + 5: 2.571, + 6: 2.447, + 7: 2.365, + 8: 2.306, + 9: 2.262, + 10: 2.228, + 11: 2.201, + 12: 2.179, + 13: 2.160, + 14: 2.145, + 15: 2.131, + 20: 2.086, + 30: 2.042, + } + return t_table.get(df, 20) + else: + # For large degrees of freedom, use the normal approximation + return 1.96 # Approximation for 95% CI stats = list(map(_compute_mean_and_conf_int, data_vals)) means, c1_vals, c2_vals = zip(*stats) if stats else ([], [], []) diff --git a/gt_extras/tests/test_plotting.py b/gt_extras/tests/test_plotting.py index 0382d303..203e942d 100644 --- a/gt_extras/tests/test_plotting.py +++ b/gt_extras/tests/test_plotting.py @@ -267,8 +267,8 @@ def test_gt_plt_conf_int_computed_ci(): gt_test = GT(df) html = gt_plt_conf_int(gt=gt_test, column="data").as_raw_html() - assert ">2.4" in html - assert ">4" in html + assert ">-4.7" in html + assert ">11.1" in html assert ">4.1" in html assert ">5.9" in html From 8ad6d1ae505aeaaaafb5355a086979be74d7acd6 Mon Sep 17 00:00:00 2001 From: Jules <54960783+juleswg23@users.noreply.github.com> Date: Thu, 31 Jul 2025 09:35:53 -0400 Subject: [PATCH 2/2] revert merge test change --- gt_extras/tests/test_plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gt_extras/tests/test_plotting.py b/gt_extras/tests/test_plotting.py index 2797de16..ce0a6cc3 100644 --- a/gt_extras/tests/test_plotting.py +++ b/gt_extras/tests/test_plotting.py @@ -290,8 +290,8 @@ def test_gt_plt_conf_int_computed_ci(): result = gt_plt_conf_int(gt=gt_test, column="data") html = result.as_raw_html() - assert ">2.4" in html - assert ">4" in html + assert ">-4.7" in html + assert ">11.1" in html assert ">4.1" in html assert ">5.9" in html