diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index b02420d27..26280f545 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -23,6 +23,8 @@ The CHANGELOG for the current development version is available at - Fixes an edge-case bug where decision regions plots didn't have unique colors ([#1157](https://github.com/rasbt/mlxtend/issues/1157) via [mariam851](https://github.com/mariam851)) +- Reject `min_support` values outside the documented `(0, 1]` interval in `apriori`, `fpgrowth`, `fpmax`, and `hmine`. The previous check only caught `<= 0`, so passing e.g. `min_support=2` silently returned an empty result ([#864](https://github.com/rasbt/mlxtend/issues/864) via [jbbqqf](https://github.com/jbbqqf)) + - Add a `top_k` argument to `ExhaustiveFeatureSelector.get_metric_dict()` so callers can request only the highest-scoring subsets before converting the result to a DataFrame ([#610](https://github.com/rasbt/mlxtend/issues/610) via [jbbqqf](https://github.com/jbbqqf)) - `minmax_scaling` no longer returns silent NaNs for constant columns; constant columns are now collapsed to `min_val`, mirroring the existing contract of `standardize`. ([#1167](https://github.com/rasbt/mlxtend/issues/1167) via [jbbqqf](https://github.com/jbbqqf)) diff --git a/mlxtend/frequent_patterns/apriori.py b/mlxtend/frequent_patterns/apriori.py index 0cfd00f30..6a8c14776 100644 --- a/mlxtend/frequent_patterns/apriori.py +++ b/mlxtend/frequent_patterns/apriori.py @@ -258,7 +258,7 @@ def _support(_x, _n_rows, _is_sparse): out = np.sum(_x, axis=0) / _n_rows return np.array(out).reshape(-1) - if min_support <= 0.0: + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/fpgrowth.py b/mlxtend/frequent_patterns/fpgrowth.py index 5ebe4e56f..85bf8449d 100644 --- a/mlxtend/frequent_patterns/fpgrowth.py +++ b/mlxtend/frequent_patterns/fpgrowth.py @@ -77,7 +77,7 @@ def fpgrowth( """ fpc.valid_input_check(df, null_values) - if min_support <= 0.0: + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/fpmax.py b/mlxtend/frequent_patterns/fpmax.py index 859aa63e3..2516ec290 100644 --- a/mlxtend/frequent_patterns/fpmax.py +++ b/mlxtend/frequent_patterns/fpmax.py @@ -79,7 +79,7 @@ def fpmax( """ fpc.valid_input_check(df, null_values) - if min_support <= 0.0: + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/hmine.py b/mlxtend/frequent_patterns/hmine.py index 73f2ecf9c..f58f862c0 100644 --- a/mlxtend/frequent_patterns/hmine.py +++ b/mlxtend/frequent_patterns/hmine.py @@ -77,7 +77,7 @@ def hmine( """ fpc.valid_input_check(df) - if min_support <= 0.0: + if min_support <= 0.0 or min_support > 1.0: raise ValueError( "`min_support` must be a positive " "number within the interval `(0, 1]`. " diff --git a/mlxtend/frequent_patterns/tests/test_fpbase.py b/mlxtend/frequent_patterns/tests/test_fpbase.py index 3d9bedd30..6699063af 100644 --- a/mlxtend/frequent_patterns/tests/test_fpbase.py +++ b/mlxtend/frequent_patterns/tests/test_fpbase.py @@ -326,6 +326,19 @@ def test_output3(self): min_support=0.0, ) + def test_output4_min_support_above_one_issue_864(self): + # Regression test for #864: min_support is a fraction in (0, 1]. + # Values > 1 used to pass silently, returning an empty result; + # they now raise the same ValueError as min_support <= 0. + assert_raises( + ValueError, + "`min_support` must be a positive " + "number within the interval `(0, 1]`. Got 2.", + self.fpalgo, + self.df, + min_support=2, + ) + def compare_dataframes(df1, df2): itemsets1 = [sorted(list(i)) for i in df1["itemsets"]]