Merge pull request #471 from ranaroussi/terragon/check-open-issues

ranaroussi · web-flow · commit 9529573aaf15 · 2025-09-05T18:58:31.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,19 @@
 Changelog
 ===========
 
+0.0.77
+------
+
+- Fixed issue #467 - CVaR calculation returning NaN for DataFrame inputs:
+  - The conditional_value_at_risk() function now properly handles DataFrame inputs
+  - When filtering DataFrames, NaN values are now correctly removed before calculating the mean
+  - CVaR calculations are now consistent between Series and DataFrame inputs
+  - This fix ensures accurate risk metrics in HTML reports when using benchmarks
+
+- Confirmed issue #468 is already resolved:
+  - The "mode.use_inf_as_null" pandas option error reported in v0.0.64 no longer occurs
+  - This issue was resolved in a previous version through updates to pandas compatibility
+
 0.0.76
 ------
 
diff --git a/quantstats/stats.py b/quantstats/stats.py
@@ -1798,7 +1798,14 @@ def conditional_value_at_risk(returns, sigma=1, confidence=0.95, prepare_returns
     var = value_at_risk(returns, sigma, confidence)
 
     # Calculate mean of returns below VaR threshold
-    c_var = returns[returns < var].values.mean()
+    # Handle both Series and DataFrame inputs
+    if isinstance(returns, _pd.DataFrame):
+        # For DataFrame, use dropna() to remove NaN values after filtering
+        below_var = returns[returns < var].dropna()
+        c_var = below_var.values.mean() if len(below_var) > 0 else _np.nan
+    else:
+        # For Series, the original approach works fine
+        c_var = returns[returns < var].values.mean()
 
     # Return CVaR if valid, otherwise return VaR
     return c_var if ~_np.isnan(c_var) else var
diff --git a/quantstats/version.py b/quantstats/version.py
@@ -1 +1 @@
-version = "0.0.76"
+version = "0.0.77"
diff --git a/test_comprehensive.py b/test_comprehensive.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+"""Comprehensive test for both issues"""
+
+import quantstats as qs
+import pandas as pd
+import numpy as np
+
+print(f"Testing with quantstats version: {qs.__version__}")
+print(f"Testing with pandas version: {pd.__version__}")
+
+# Generate sample data
+np.random.seed(42)
+dates = pd.date_range(start='2023-01-01', end='2024-01-01', freq='D')
+returns_series = pd.Series(np.random.randn(len(dates)) * 0.01, index=dates)
+returns_df = pd.DataFrame({'returns': returns_series})
+benchmark = pd.Series(np.random.randn(len(dates)) * 0.008, index=dates)
+
+print("\n" + "="*60)
+print("ISSUE #468 TEST: 'mode.use_inf_as_null' error")
+print("="*60)
+try:
+    qs.reports.html(returns_series, benchmark, benchmark_title='SPY', 
+                   output='/tmp/test_report.html', title='Test Report')
+    print("✓ HTML report generated successfully - Issue #468 is FIXED")
+except Exception as e:
+    if "mode.use_inf_as_null" in str(e):
+        print("✗ Issue #468 still exists: 'mode.use_inf_as_null' error")
+    else:
+        print(f"✗ Different error occurred: {e}")
+
+print("\n" + "="*60)
+print("ISSUE #467 TEST: CVaR calculation with DataFrame")
+print("="*60)
+
+# Test CVaR with Series
+var_series = qs.stats.var(returns_series)
+cvar_series = qs.stats.cvar(returns_series)
+print(f"Series - VaR: {var_series:.4%}, CVaR: {cvar_series:.4%}")
+
+# Test CVaR with DataFrame
+var_df = qs.stats.var(returns_df)
+cvar_df = qs.stats.cvar(returns_df)
+print(f"DataFrame - VaR: {var_df:.4%}, CVaR: {cvar_df:.4%}")
+
+# Check if the values match (they should)
+if abs(cvar_series - cvar_df) < 1e-10:
+    print("✓ CVaR calculation is consistent for both Series and DataFrame - Issue #467 is FIXED")
+else:
+    print("✗ Issue #467 still exists: CVaR differs between Series and DataFrame")
+
+# Test in metrics report
+print("\nTesting CVaR in metrics report...")
+metrics = qs.reports.metrics(returns_series, mode='full', display=False)
+if 'Expected Shortfall (cVaR)' in metrics.index:
+    cvar_metric = metrics.loc['Expected Shortfall (cVaR)']
+    print(f"CVaR from metrics: {cvar_metric}")
+    print("✓ CVaR appears in metrics report")
+else:
+    print("✗ CVaR not found in metrics report")
+
+print("\n" + "="*60)
+print("SUMMARY")
+print("="*60)
+print("Issue #468 (mode.use_inf_as_null): FIXED - No longer occurs in v0.0.76")
+print("Issue #467 (CVaR calculation): FIXED - Now works correctly with DataFrames")
diff --git a/test_issue_467.py b/test_issue_467.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""Test script to reproduce Issue #467: CVAR calculation issue"""
+
+import quantstats as qs
+import pandas as pd
+import numpy as np
+
+print(f"Testing with quantstats version: {qs.__version__}")
+
+# Generate sample data
+np.random.seed(42)
+dates = pd.date_range(start='2023-01-01', end='2024-01-01', freq='D')
+returns_series = pd.Series(np.random.randn(len(dates)) * 0.01, index=dates)
+returns_df = pd.DataFrame({'returns': returns_series})
+
+print("\n=== Testing with Series ===")
+var_series = qs.stats.var(returns_series)
+cvar_series = qs.stats.cvar(returns_series)
+print(f"VaR (Series): {var_series:.4%}")
+print(f"CVaR (Series): {cvar_series:.4%}")
+
+print("\n=== Testing with DataFrame ===")
+var_df = qs.stats.var(returns_df)
+cvar_df = qs.stats.cvar(returns_df)
+print(f"VaR (DataFrame): {var_df:.4%}")
+print(f"CVaR (DataFrame): {cvar_df:.4%}")
+
+print("\n=== Testing in metrics report ===")
+metrics = qs.reports.metrics(returns_series, mode='full', display=False)
+# Print all available metrics to see the keys
+print("Available metrics keys:")
+for key in metrics.index[:10]:  # Show first 10 keys
+    print(f"  - {key}")
+if 'Daily Value-at-Risk' in metrics.index:
+    print(f"VaR from metrics: {metrics.loc['Daily Value-at-Risk']}")
+if 'Expected Shortfall (cVaR)' in metrics.index:
+    print(f"CVaR from metrics: {metrics.loc['Expected Shortfall (cVaR)']}")
+
+# Debug: Check what happens inside cvar calculation
+print("\n=== Debug: Manual CVaR calculation ===")
+var_threshold = qs.stats.value_at_risk(returns_series, sigma=1, confidence=0.95)
+print(f"VaR threshold: {var_threshold:.4%}")
+
+# For Series
+below_var_series = returns_series[returns_series < var_threshold]
+cvar_manual_series = below_var_series.mean()
+print(f"Manual CVaR (Series): {cvar_manual_series:.4%}")
+
+# For DataFrame - this is where the issue likely occurs
+var_threshold_df = qs.stats.value_at_risk(returns_df, sigma=1, confidence=0.95)
+print(f"VaR threshold (DataFrame): {var_threshold_df}")
+below_var_df = returns_df[returns_df < var_threshold_df]
+print(f"Shape of below_var_df: {below_var_df.shape}")
+print(f"Type of below_var_df: {type(below_var_df)}")
+print(f"below_var_df.values type: {type(below_var_df.values)}")
+print(f"below_var_df.values shape: {below_var_df.values.shape}")
+result = below_var_df.values.mean()
+print(f"below_var_df.values.mean(): {result}")
+print(f"Is result NaN? {np.isnan(result)}")
+
+# The issue is that when filtering a DataFrame, the result has NaN values
+print("\n=== The Issue ===")
+print("When filtering DataFrame with condition returns_df < var_threshold_df,")
+print("rows that don't meet the condition become NaN, not filtered out.")
+print("This causes .values.mean() to return NaN.")
+print("\nVerification:")
+print(f"Number of non-NaN values in below_var_df: {below_var_df.count().values[0]}")
+print(f"Correct CVaR calculation should be: {below_var_df.dropna().values.mean():.4%}")
+
+print("\n=== ISSUE #467 CONFIRMED ===")
+if np.isnan(result):
+    print("The CVaR calculation is broken for DataFrames!")
+    print("The issue is that the function uses .values.mean() on a DataFrame with NaN values,")
diff --git a/test_issue_468.py b/test_issue_468.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+"""Test script to reproduce Issue #468: mode.use_inf_as_null error"""
+
+import quantstats as qs
+import pandas as pd
+import numpy as np
+import yfinance as yf
+from datetime import datetime
+
+print(f"Testing with quantstats version: {qs.__version__}")
+print(f"Testing with pandas version: {pd.__version__}")
+print(f"Testing with seaborn version: ")
+import seaborn as sns
+print(f"{sns.__version__}")
+
+# Generate sample data
+np.random.seed(42)
+dates = pd.date_range(start='2023-01-01', end='2024-01-01', freq='D')
+returns = pd.Series(np.random.randn(len(dates)) * 0.01, index=dates)
+benchmark = pd.Series(np.random.randn(len(dates)) * 0.008, index=dates)
+
+try:
+    # Try to generate HTML report (this is where the error occurs)
+    print("\nTrying to generate HTML report...")
+    qs.reports.html(returns, benchmark, benchmark_title='SPY', output='/tmp/test_report.html', title='Test Report')
+    print("SUCCESS: HTML report generated without error!")
+except Exception as e:
+    print(f"ERROR encountered: {type(e).__name__}: {e}")
+    if "mode.use_inf_as_null" in str(e):
+        print("\nISSUE #468 CONFIRMED: The 'mode.use_inf_as_null' error still occurs!")
+    import traceback
+    traceback.print_exc()

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-version = "0.0.76"`
	`1`	`+version = "0.0.77"`