From de9a3a3ff3752a08552b6bfc530a8a8ebefc4083 Mon Sep 17 00:00:00 2001 From: jbbqqf Date: Sat, 9 May 2026 20:31:33 +0200 Subject: [PATCH] frequent_patterns: reject min_support outside (0, 1] (#864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `min_support` is documented as a fraction in the half-open interval `(0, 1]`. The validation in apriori / fpgrowth / fpmax / hmine only checked `<= 0`, so values like `min_support=2` would pass through and return an empty DataFrame silently — confusing for users who picked the wrong scale (counts vs fractions, percent vs fraction). Tightens each validator to also reject `> 1.0` with the same message that already advertises the `(0, 1]` interval. Adds a single `test_output4_min_support_above_one_issue_864` to the shared `FPTestEx3All` base class so it runs against all four algorithms. Co-Authored-By: Claude Code --- docs/sources/CHANGELOG.md | 2 ++ mlxtend/frequent_patterns/apriori.py | 6 +++++- mlxtend/frequent_patterns/fpgrowth.py | 5 ++++- mlxtend/frequent_patterns/fpmax.py | 5 ++++- mlxtend/frequent_patterns/hmine.py | 4 +++- mlxtend/frequent_patterns/tests/test_fpbase.py | 13 +++++++++++++ 6 files changed, 31 insertions(+), 4 deletions(-) diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index c661b5f47..d8a783584 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -23,6 +23,8 @@ The CHANGELOG for the current development version is available at - Fixes an edge-case bug where decision regions plots didn't have unique colors ([#1157](https://github.com/rasbt/mlxtend/issues/1157) via [mariam851](https://github.com/mariam851)) +- Reject `min_support` values outside the documented `(0, 1]` interval in `apriori`, `fpgrowth`, `fpmax`, and `hmine`. The previous check only caught `<= 0`, so passing e.g. `min_support=2` silently returned an empty result ([#864](https://github.com/rasbt/mlxtend/issues/864) via [jbbqqf](https://github.com/jbbqqf)) + ### Version 0.24.0 (13 Dec 2025) diff --git a/mlxtend/frequent_patterns/apriori.py b/mlxtend/frequent_patterns/apriori.py index b92c946a6..666c5d21b 100644 --- a/mlxtend/frequent_patterns/apriori.py +++ b/mlxtend/frequent_patterns/apriori.py @@ -244,7 +244,11 @@ def _support(_x, _n_rows, _is_sparse): out = np.sum(_x, axis=0) / _n_rows return np.array(out).reshape(-1) - if min_support <= 0.0: + # Reject min_support outside (0, 1]. The check used to only catch + # `<= 0`, which let callers pass values such as `min_support=2` + # silently — apriori would then run and return an empty DataFrame + # because no itemset can have a fractional support > 1 (issue #864). + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/fpgrowth.py b/mlxtend/frequent_patterns/fpgrowth.py index 5ebe4e56f..457fe8539 100644 --- a/mlxtend/frequent_patterns/fpgrowth.py +++ b/mlxtend/frequent_patterns/fpgrowth.py @@ -77,7 +77,10 @@ def fpgrowth( """ fpc.valid_input_check(df, null_values) - if min_support <= 0.0: + # See issue #864: reject min_support outside (0, 1] up front. Without + # the upper bound, callers passing e.g. `min_support=2` would get an + # empty result silently because no fractional support can exceed 1. + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/fpmax.py b/mlxtend/frequent_patterns/fpmax.py index 859aa63e3..27d2f3fe7 100644 --- a/mlxtend/frequent_patterns/fpmax.py +++ b/mlxtend/frequent_patterns/fpmax.py @@ -79,7 +79,10 @@ def fpmax( """ fpc.valid_input_check(df, null_values) - if min_support <= 0.0: + # See issue #864: reject min_support outside (0, 1] up front (mirrors + # the same check in apriori / fpgrowth). The fractional support + # interpretation makes any value > 1 unreachable. + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/hmine.py b/mlxtend/frequent_patterns/hmine.py index 73f2ecf9c..c673aed4c 100644 --- a/mlxtend/frequent_patterns/hmine.py +++ b/mlxtend/frequent_patterns/hmine.py @@ -77,7 +77,9 @@ def hmine( """ fpc.valid_input_check(df) - if min_support <= 0.0: + # See issue #864: reject min_support outside (0, 1] up front (mirrors + # apriori / fpgrowth / fpmax). Fractional support cannot exceed 1. + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/tests/test_fpbase.py b/mlxtend/frequent_patterns/tests/test_fpbase.py index 3d9bedd30..6699063af 100644 --- a/mlxtend/frequent_patterns/tests/test_fpbase.py +++ b/mlxtend/frequent_patterns/tests/test_fpbase.py @@ -326,6 +326,19 @@ def test_output3(self): min_support=0.0, ) + def test_output4_min_support_above_one_issue_864(self): + # Regression test for #864: min_support is a fraction in (0, 1]. + # Values > 1 used to pass silently, returning an empty result; + # they now raise the same ValueError as min_support <= 0. + assert_raises( + ValueError, + "`min_support` must be a positive " + "number within the interval `(0, 1]`. Got 2.", + self.fpalgo, + self.df, + min_support=2, + ) + def compare_dataframes(df1, df2): itemsets1 = [sorted(list(i)) for i in df1["itemsets"]]