From c7ae5722204b80187039d96550abd3037db8bdc1 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 25 Mar 2026 16:10:15 +0000 Subject: [PATCH 1/3] Handle normalised lambda and lambda_grad metadata entries. --- src/BioSimSpace/FreeEnergy/_relative.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/BioSimSpace/FreeEnergy/_relative.py b/src/BioSimSpace/FreeEnergy/_relative.py index 98801998..4e52a0f7 100644 --- a/src/BioSimSpace/FreeEnergy/_relative.py +++ b/src/BioSimSpace/FreeEnergy/_relative.py @@ -1059,15 +1059,20 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): raise ValueError("Parquet metadata does not contain 'lambda'.") if not is_mbar: try: - lambda_grad = metadata["lambda_grad"] + # Normalise to :.5f strings to match sire energy trajectory column names. + lambda_grad = [f"{float(v):.5f}" for v in metadata["lambda_grad"]] except: raise ValueError("Parquet metadata does not contain 'lambda grad'") else: try: - lambda_grad = metadata["lambda_grad"] + # Normalise to :.5f strings to match sire energy trajectory column names. + lambda_grad = [f"{float(v):.5f}" for v in metadata["lambda_grad"]] except: lambda_grad = [] + # Key used to index the simulated lambda column in the dataframe. + lam_key = f"{lam:.5f}" + # Make sure that the temperature is correct. if not T == temperature: raise ValueError( @@ -1083,7 +1088,7 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): df = df[[x for x in df.columns if x not in lambda_grad]] # Subtract the potential at the simulated lambda. - df = df.subtract(df[lam], axis=0) + df = df.subtract(df[lam_key], axis=0) # Apply the existing attributes. df.attrs = attrs @@ -1096,19 +1101,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): lam_delta = lambda_grad[0] # Forward difference. - if lam_delta > lam: - incr = lam_delta - lam - grad = (df[lam_delta] - df[lam]) / incr + if float(lam_delta) > lam: + incr = float(lam_delta) - lam + grad = (df[lam_delta] - df[lam_key]) / incr # Backward difference. else: - incr = lam - lam_delta - grad = (df[lam] - df[lam_delta]) / incr + incr = lam - float(lam_delta) + grad = (df[lam_key] - df[lam_delta]) / incr # Central difference. else: lam_below, lam_above = lambda_grad - double_incr = lam_above - lam_below + double_incr = float(lam_above) - float(lam_below) grad = (df[lam_above] - df[lam_below]) / double_incr # Create a DataFrame with the multi-index From 337de4a4ab650fddef358222420584697c2e037c Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 25 Mar 2026 16:16:54 +0000 Subject: [PATCH 2/3] Handle normalisation in a backwards compatible way. --- src/BioSimSpace/FreeEnergy/_relative.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/BioSimSpace/FreeEnergy/_relative.py b/src/BioSimSpace/FreeEnergy/_relative.py index 4e52a0f7..6557acf2 100644 --- a/src/BioSimSpace/FreeEnergy/_relative.py +++ b/src/BioSimSpace/FreeEnergy/_relative.py @@ -1083,6 +1083,20 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): # Convert to a pandas dataframe. df = table.to_pandas() + # Normalise column names to :.5f string format so that comparisons are + # consistent regardless of whether the parquet was written with float keys + # (old sire) or formatted string keys (new sire). + df.columns = [ + f"{float(c):.5f}" + if isinstance(c, (int, float)) + or ( + isinstance(c, str) + and c.replace(".", "", 1).replace("-", "", 1).isdigit() + ) + else c + for c in df.columns + ] + if is_mbar: # Extract all columns other than those used for the gradient. df = df[[x for x in df.columns if x not in lambda_grad]] From d10713811879b7aaa0c5d5e98e0afd4239d26af7 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 25 Mar 2026 16:20:10 +0000 Subject: [PATCH 3/3] Normalise to floats for simplicity. --- src/BioSimSpace/FreeEnergy/_relative.py | 44 ++++++++++++------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/BioSimSpace/FreeEnergy/_relative.py b/src/BioSimSpace/FreeEnergy/_relative.py index 6557acf2..19bc9e08 100644 --- a/src/BioSimSpace/FreeEnergy/_relative.py +++ b/src/BioSimSpace/FreeEnergy/_relative.py @@ -1059,20 +1059,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): raise ValueError("Parquet metadata does not contain 'lambda'.") if not is_mbar: try: - # Normalise to :.5f strings to match sire energy trajectory column names. - lambda_grad = [f"{float(v):.5f}" for v in metadata["lambda_grad"]] + # Normalise to floats to match the DataFrame column type expected + # by alchemlyb (handles both old float and new string metadata). + lambda_grad = [float(v) for v in metadata["lambda_grad"]] except: raise ValueError("Parquet metadata does not contain 'lambda grad'") else: try: - # Normalise to :.5f strings to match sire energy trajectory column names. - lambda_grad = [f"{float(v):.5f}" for v in metadata["lambda_grad"]] + # Normalise to floats to match the DataFrame column type expected + # by alchemlyb (handles both old float and new string metadata). + lambda_grad = [float(v) for v in metadata["lambda_grad"]] except: lambda_grad = [] - # Key used to index the simulated lambda column in the dataframe. - lam_key = f"{lam:.5f}" - # Make sure that the temperature is correct. if not T == temperature: raise ValueError( @@ -1083,16 +1082,15 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): # Convert to a pandas dataframe. df = table.to_pandas() - # Normalise column names to :.5f string format so that comparisons are - # consistent regardless of whether the parquet was written with float keys - # (old sire) or formatted string keys (new sire). + # Normalise column names to floats so that comparisons are consistent + # regardless of whether the parquet was written with float keys (old + # sire) or formatted string keys (new sire). float("0.10000") and + # float("0.1") give the same IEEE754 value, so old and new files are + # handled identically and the alchemlyb index check passes. df.columns = [ - f"{float(c):.5f}" - if isinstance(c, (int, float)) - or ( - isinstance(c, str) - and c.replace(".", "", 1).replace("-", "", 1).isdigit() - ) + float(c) + if isinstance(c, str) + and c.replace(".", "", 1).replace("-", "", 1).isdigit() else c for c in df.columns ] @@ -1102,7 +1100,7 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): df = df[[x for x in df.columns if x not in lambda_grad]] # Subtract the potential at the simulated lambda. - df = df.subtract(df[lam_key], axis=0) + df = df.subtract(df[lam], axis=0) # Apply the existing attributes. df.attrs = attrs @@ -1115,19 +1113,19 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"): lam_delta = lambda_grad[0] # Forward difference. - if float(lam_delta) > lam: - incr = float(lam_delta) - lam - grad = (df[lam_delta] - df[lam_key]) / incr + if lam_delta > lam: + incr = lam_delta - lam + grad = (df[lam_delta] - df[lam]) / incr # Backward difference. else: - incr = lam - float(lam_delta) - grad = (df[lam_key] - df[lam_delta]) / incr + incr = lam - lam_delta + grad = (df[lam] - df[lam_delta]) / incr # Central difference. else: lam_below, lam_above = lambda_grad - double_incr = float(lam_above) - float(lam_below) + double_incr = lam_above - lam_below grad = (df[lam_above] - df[lam_below]) / double_incr # Create a DataFrame with the multi-index