rasbt · rasbt · Jun 6, 2026 · May 9, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -23,6 +23,8 @@ The CHANGELOG for the current development version is available at
 
 - Fixes an edge-case bug where decision regions plots didn't have unique colors ([#1157](https://github.com/rasbt/mlxtend/issues/1157) via [mariam851](https://github.com/mariam851))
 
+- Fix `preprocessing.standardize` so a constant column is mapped to all-zeros (as the docstring promises) instead of `-mean(column)` ([#1058](https://github.com/rasbt/mlxtend/issues/1058) via [jbbqqf](https://github.com/jbbqqf))
+
 - Reject `min_support` values outside the documented `(0, 1]` interval in `apriori`, `fpgrowth`, `fpmax`, and `hmine`. The previous check only caught `<= 0`, so passing e.g. `min_support=2` silently returned an empty result ([#864](https://github.com/rasbt/mlxtend/issues/864) via [jbbqqf](https://github.com/jbbqqf))
 
 - Add a `top_k` argument to `ExhaustiveFeatureSelector.get_metric_dict()` so callers can request only the highest-scoring subsets before converting the result to a DataFrame ([#610](https://github.com/rasbt/mlxtend/issues/610) via [jbbqqf](https://github.com/jbbqqf))

diff --git a/mlxtend/preprocessing/scaling.py b/mlxtend/preprocessing/scaling.py
@@ -139,9 +139,16 @@ def standardize(array, columns=None, ddof=0, return_params=False, params=None):
         }
     are_constant = np.all(ary_newt[:, columns] == ary_newt[0, columns], axis=0)
 
+    # For constant columns the standard deviation is 0 (or NaN with some ddof
+    # values), so dividing by it would propagate NaNs / Infs. Forcing std to
+    # 1.0 means the subtraction (col - mean) below collapses the column to
+    # exactly 0.0, matching the contract documented in the "Notes" section
+    # ("If all values in a given column are the same, these values are all
+    # set to 0.0"). The previous version also pre-zeroed the column before
+    # the divide, but that turned (0 - mean) / 1 into -mean instead of 0
+    # -- see issue #1058.
     for c, b in zip(columns, are_constant):
         if b:
-            ary_newt[:, c] = np.zeros(dim[0])
             parameters["stds"][c] = 1.0
 
     ary_newt[:, columns] = (ary_newt[:, columns] - parameters["avgs"]) / parameters[

diff --git a/mlxtend/preprocessing/tests/test__scaling__standardizing.py b/mlxtend/preprocessing/tests/test__scaling__standardizing.py
@@ -164,3 +164,37 @@ def test_standardize_all_columns_pandas():
         ]
     )
     np.testing.assert_allclose(df_out1.values, ary_out1, rtol=1e-03)
+
+
+def test_standardize_constant_column_numpy_issue_1058():
+    # Regression test for #1058: a constant column was being mapped to
+    # `-mean(column)` instead of `0.0`. The "Notes" docstring promises that
+    # constant columns are set to 0.0.
+    ary = np.array([[0, 1, 2, 5], [1, 2, 3, 5], [3, 1, 2, 5]], dtype=float)
+    ary_actu = standardize(ary)
+    # The 4th column is the constant one and must be all-zero.
+    np.testing.assert_allclose(ary_actu[:, 3], np.zeros(3))
+    # Sanity check: the non-constant columns stay close to z-score scale
+    # (mean 0, std 1 with ddof=0). We only assert mean ~= 0 to avoid
+    # depending on the exact std implementation.
+    np.testing.assert_allclose(ary_actu[:, :3].mean(axis=0), np.zeros(3), atol=1e-9)
+
+
+def test_standardize_constant_column_pandas_issue_1058():
+    # Same regression as the numpy variant, exercised through the pandas
+    # branch of standardize().
+    df = pd.DataFrame(
+        {"x": [0.0, 1.0, 3.0], "y": [1.0, 2.0, 1.0], "k": [5.0, 5.0, 5.0]}
+    )
+    df_actu = standardize(df, ["x", "y", "k"])
+    np.testing.assert_allclose(df_actu["k"].values, np.zeros(3))
+
+
+def test_standardize_constant_column_returns_unit_std_param_issue_1058():
+    # The contract from the "Notes" docstring is that the std for a constant
+    # column ends up as 1.0 in the returned `params` dict, so the column can
+    # be reused by a subsequent call without dividing by zero.
+    ary = np.array([[5.0, 1.0], [5.0, 2.0], [5.0, 3.0]])
+    out, params = standardize(ary, return_params=True)
+    np.testing.assert_allclose(out[:, 0], np.zeros(3))
+    assert params["stds"][0] == 1.0