rasbt · rasbt · Jun 6, 2026 · May 9, 2026 · Jun 6, 2026
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -23,6 +23,8 @@ The CHANGELOG for the current development version is available at
 
 - Fixes an edge-case bug where decision regions plots didn't have unique colors ([#1157](https://github.com/rasbt/mlxtend/issues/1157) via [mariam851](https://github.com/mariam851))
 
+- Add a `top_k` argument to `ExhaustiveFeatureSelector.get_metric_dict()` so callers can request only the highest-scoring subsets before converting the result to a DataFrame ([#610](https://github.com/rasbt/mlxtend/issues/610) via [jbbqqf](https://github.com/jbbqqf))
+
 - `minmax_scaling` no longer returns silent NaNs for constant columns; constant columns are now collapsed to `min_val`, mirroring the existing contract of `standardize`. ([#1167](https://github.com/rasbt/mlxtend/issues/1167) via [jbbqqf](https://github.com/jbbqqf))
 
 - `bias_variance_decomp` now accepts pandas DataFrames and Series as input, in addition to NumPy arrays. ([#1070](https://github.com/rasbt/mlxtend/issues/1070) via [berns722](https://github.com/berns722))

diff --git a/mlxtend/feature_selection/exhaustive_feature_selector.py b/mlxtend/feature_selection/exhaustive_feature_selector.py
@@ -556,14 +556,25 @@ def fit_transform(self, X, y, groups=None, **fit_params):
         self.fit(X, y, groups=groups, **fit_params)
         return self.transform(X)
 
-    def get_metric_dict(self, confidence_interval=0.95):
+    def get_metric_dict(self, confidence_interval=0.95, top_k=None):
         """Return metric dictionary
 
         Parameters
         ----------
         confidence_interval : float (default: 0.95)
             A positive float between 0.0 and 1.0 to compute the confidence
             interval bounds of the CV score averages.
+        top_k : int or None (default: None)
+            If a positive integer, restrict the returned dictionary to the
+            top-`top_k` feature subsets ranked by `avg_score` descending.
+            ExhaustiveFeatureSelector can produce a very large number of
+            evaluated subsets, and downstream consumers (notably
+            ``pd.DataFrame.from_dict(..., orient='index')``) often only need
+            the highest-scoring entries. ``top_k`` lets callers cap the
+            returned dictionary before such conversions without
+            re-implementing the ranking themselves (issue #610).
+            ``None`` (default) preserves the historical behaviour and
+            returns all subsets.
 
         Returns
         ----------
@@ -580,7 +591,22 @@ def get_metric_dict(self, confidence_interval=0.95):
 
         """
         self._check_fitted()
-        fdict = deepcopy(self.subsets_)
+        if top_k is not None:
+            if not isinstance(top_k, (int, np.integer)) or top_k <= 0:
+                raise ValueError(
+                    "`top_k` must be a positive integer or None. " "Got %r." % (top_k,)
+                )
+            # Preserve the original iteration keys so downstream code can
+            # still cross-reference `subsets_` using the same keys.
+            subset_keys = sorted(
+                self.subsets_,
+                key=lambda k: self.subsets_[k]["avg_score"],
+                reverse=True,
+            )[:top_k]
+            fdict = {k: deepcopy(self.subsets_[k]) for k in subset_keys}
+        else:
+            fdict = deepcopy(self.subsets_)
+
         for k in fdict:
             std_dev = np.std(self.subsets_[k]["cv_scores"])
             bound, std_err = self._calc_confidence(

diff --git a/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py b/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py
@@ -843,3 +843,121 @@ def test_logistic_regression_compatibility():
 
     assert efs.best_idx_ == (3,)
     assert efs.best_score_ > 0.90
+
+
+def test_get_metric_dict_top_k_returns_top_subsets_issue_610():
+    # Regression test for #610: top_k must restrict the returned dict to
+    # the top-K subsets ranked by avg_score (descending).
+    knn = KNeighborsClassifier(n_neighbors=4)
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    efs = EFS(
+        knn,
+        min_features=1,
+        max_features=3,
+        scoring="accuracy",
+        cv=3,
+        clone_estimator=False,
+        print_progress=False,
+        n_jobs=1,
+    )
+    efs.fit(X, y)
+
+    full = efs.get_metric_dict()
+    assert len(full) > 3, "test setup expects more than 3 subsets evaluated"
+
+    top3 = efs.get_metric_dict(top_k=3)
+    assert len(top3) == 3, "top_k=3 should return exactly 3 entries"
+
+    # The returned subsets are exactly the 3 highest-scoring ones from `full`.
+    expected_top = sorted(
+        full.keys(), key=lambda k: full[k]["avg_score"], reverse=True
+    )[:3]
+    assert set(top3.keys()) == set(
+        expected_top
+    ), "top_k did not return the highest-scoring subsets"
+
+    # All other metrics (avg_score, ci_bound, std_dev, std_err, feature_idx,
+    # feature_names if present) must match the corresponding entries in the
+    # full dict.
+    for k in top3:
+        assert top3[k]["feature_idx"] == full[k]["feature_idx"]
+        assert top3[k]["avg_score"] == full[k]["avg_score"]
+
+
+def test_get_metric_dict_top_k_none_preserves_default_behavior_issue_610():
+    knn = KNeighborsClassifier(n_neighbors=4)
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    efs = EFS(
+        knn,
+        min_features=1,
+        max_features=2,
+        scoring="accuracy",
+        cv=2,
+        clone_estimator=False,
+        print_progress=False,
+        n_jobs=1,
+    )
+    efs.fit(X, y)
+
+    default = efs.get_metric_dict()
+    explicit_none = efs.get_metric_dict(top_k=None)
+    assert default.keys() == explicit_none.keys()
+
+
+def test_get_metric_dict_top_k_invalid_raises_issue_610():
+    knn = KNeighborsClassifier(n_neighbors=4)
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    efs = EFS(
+        knn,
+        min_features=1,
+        max_features=2,
+        scoring="accuracy",
+        cv=2,
+        clone_estimator=False,
+        print_progress=False,
+        n_jobs=1,
+    )
+    efs.fit(X, y)
+    assert_raises(
+        ValueError,
+        "`top_k` must be a positive integer or None",
+        efs.get_metric_dict,
+        top_k=0,
+    )
+    assert_raises(
+        ValueError,
+        "`top_k` must be a positive integer or None",
+        efs.get_metric_dict,
+        top_k=-2,
+    )
+    assert_raises(
+        ValueError,
+        "`top_k` must be a positive integer or None",
+        efs.get_metric_dict,
+        top_k=1.5,
+    )
+
+
+def test_get_metric_dict_top_k_larger_than_total_returns_all_issue_610():
+    # Boundary: if top_k exceeds the number of evaluated subsets, all
+    # entries are returned (not an error).
+    knn = KNeighborsClassifier(n_neighbors=4)
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    efs = EFS(
+        knn,
+        min_features=1,
+        max_features=2,
+        scoring="accuracy",
+        cv=2,
+        clone_estimator=False,
+        print_progress=False,
+        n_jobs=1,
+    )
+    efs.fit(X, y)
+    full = efs.get_metric_dict()
+    out = efs.get_metric_dict(top_k=10**6)
+    assert out.keys() == full.keys()