1+ #!/usr/bin/env python3
2+ """Test script to reproduce Issue #467: CVAR calculation issue"""
3+
4+ import quantstats as qs
5+ import pandas as pd
6+ import numpy as np
7+
8+ print (f"Testing with quantstats version: { qs .__version__ } " )
9+
10+ # Generate sample data
11+ np .random .seed (42 )
12+ dates = pd .date_range (start = '2023-01-01' , end = '2024-01-01' , freq = 'D' )
13+ returns_series = pd .Series (np .random .randn (len (dates )) * 0.01 , index = dates )
14+ returns_df = pd .DataFrame ({'returns' : returns_series })
15+
16+ print ("\n === Testing with Series ===" )
17+ var_series = qs .stats .var (returns_series )
18+ cvar_series = qs .stats .cvar (returns_series )
19+ print (f"VaR (Series): { var_series :.4%} " )
20+ print (f"CVaR (Series): { cvar_series :.4%} " )
21+
22+ print ("\n === Testing with DataFrame ===" )
23+ var_df = qs .stats .var (returns_df )
24+ cvar_df = qs .stats .cvar (returns_df )
25+ print (f"VaR (DataFrame): { var_df :.4%} " )
26+ print (f"CVaR (DataFrame): { cvar_df :.4%} " )
27+
28+ print ("\n === Testing in metrics report ===" )
29+ metrics = qs .reports .metrics (returns_series , mode = 'full' , display = False )
30+ # Print all available metrics to see the keys
31+ print ("Available metrics keys:" )
32+ for key in metrics .index [:10 ]: # Show first 10 keys
33+ print (f" - { key } " )
34+ if 'Daily Value-at-Risk' in metrics .index :
35+ print (f"VaR from metrics: { metrics .loc ['Daily Value-at-Risk' ]} " )
36+ if 'Expected Shortfall (cVaR)' in metrics .index :
37+ print (f"CVaR from metrics: { metrics .loc ['Expected Shortfall (cVaR)' ]} " )
38+
39+ # Debug: Check what happens inside cvar calculation
40+ print ("\n === Debug: Manual CVaR calculation ===" )
41+ var_threshold = qs .stats .value_at_risk (returns_series , sigma = 1 , confidence = 0.95 )
42+ print (f"VaR threshold: { var_threshold :.4%} " )
43+
44+ # For Series
45+ below_var_series = returns_series [returns_series < var_threshold ]
46+ cvar_manual_series = below_var_series .mean ()
47+ print (f"Manual CVaR (Series): { cvar_manual_series :.4%} " )
48+
49+ # For DataFrame - this is where the issue likely occurs
50+ var_threshold_df = qs .stats .value_at_risk (returns_df , sigma = 1 , confidence = 0.95 )
51+ print (f"VaR threshold (DataFrame): { var_threshold_df } " )
52+ below_var_df = returns_df [returns_df < var_threshold_df ]
53+ print (f"Shape of below_var_df: { below_var_df .shape } " )
54+ print (f"Type of below_var_df: { type (below_var_df )} " )
55+ print (f"below_var_df.values type: { type (below_var_df .values )} " )
56+ print (f"below_var_df.values shape: { below_var_df .values .shape } " )
57+ result = below_var_df .values .mean ()
58+ print (f"below_var_df.values.mean(): { result } " )
59+ print (f"Is result NaN? { np .isnan (result )} " )
60+
61+ # The issue is that when filtering a DataFrame, the result has NaN values
62+ print ("\n === The Issue ===" )
63+ print ("When filtering DataFrame with condition returns_df < var_threshold_df," )
64+ print ("rows that don't meet the condition become NaN, not filtered out." )
65+ print ("This causes .values.mean() to return NaN." )
66+ print ("\n Verification:" )
67+ print (f"Number of non-NaN values in below_var_df: { below_var_df .count ().values [0 ]} " )
68+ print (f"Correct CVaR calculation should be: { below_var_df .dropna ().values .mean ():.4%} " )
69+
70+ print ("\n === ISSUE #467 CONFIRMED ===" )
71+ if np .isnan (result ):
72+ print ("The CVaR calculation is broken for DataFrames!" )
73+ print ("The issue is that the function uses .values.mean() on a DataFrame with NaN values," )
0 commit comments