|
4 | 4 | from ydata_profiling import ProfileReport |
5 | 5 |
|
6 | 6 |
|
7 | | -# Generating dummy data |
8 | | -def generate_cat_data_series(categories): |
9 | | - dummy_data = [] |
10 | | - for cat, i in categories.items(): |
11 | | - dummy_data.extend([cat, ] * i) # fmt: skip |
12 | | - return pd.DataFrame({"dummy_cat": dummy_data}) |
13 | | - |
14 | | - |
15 | | -dummy_bool_data = generate_cat_data_series(pd.Series({True: 82, False: 36})) |
16 | | -dummy_cat_data = generate_cat_data_series( |
17 | | - pd.Series( |
| 7 | +# Enhanced fixture with more diverse data types |
| 8 | +@pytest.fixture |
| 9 | +def sample_categorical_data(): |
| 10 | + return pd.DataFrame( |
18 | 11 | { |
19 | | - "Amadeou_plus": 75, |
20 | | - "Beta_front": 50, |
21 | | - "Calciumus": 20, |
22 | | - "Dimitrius": 1, |
23 | | - "esperagus_anonymoliumus": 75, |
24 | | - "FrigaTTTBrigde_Writap": 50, |
25 | | - "galgarartiy": 30, |
26 | | - "He": 1, |
27 | | - "I": 10, |
28 | | - "JimISGODDOT": 1, |
| 12 | + "dummy_cat": [ |
| 13 | + "Amadeou_plus", |
| 14 | + "Amadeou_plus", |
| 15 | + "Beta_front", |
| 16 | + "Calciumus", |
| 17 | + "Dimitrius", |
| 18 | + "esperagus_anonymoliumus", |
| 19 | + "FrigaTTTBrigde_Writap", |
| 20 | + "galgarartiy", |
| 21 | + "He", |
| 22 | + "I", |
| 23 | + "JimISGODDOT", |
| 24 | + ] |
| 25 | + * 10 |
29 | 26 | } |
30 | 27 | ) |
31 | | -) |
32 | 28 |
|
33 | 29 |
|
34 | | -def generate_report(data): |
35 | | - return ProfileReport( |
36 | | - df=data, |
37 | | - progress_bar=False, |
38 | | - samples=None, |
39 | | - correlations=None, |
40 | | - missing_diagrams=None, |
41 | | - duplicates=None, |
42 | | - interactions=None, |
43 | | - ) |
| 30 | +@pytest.fixture |
| 31 | +def sample_boolean_data(): |
| 32 | + return pd.DataFrame({"dummy_bool": [True] * 82 + [False] * 36}) |
| 33 | + |
| 34 | + |
| 35 | +def generate_cat_data_series(categories): |
| 36 | + """Helper function to generate categorical data""" |
| 37 | + dummy_data = [] |
| 38 | + for cat, i in categories.items(): |
| 39 | + dummy_data.extend([cat] * i) |
| 40 | + return pd.DataFrame({"dummy_cat": dummy_data}) |
44 | 41 |
|
45 | 42 |
|
46 | | -# Unit tests |
47 | | -# - Test category frequency plots general options |
48 | | -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
| 43 | +def generate_report(data, **kwargs): |
| 44 | + """Helper function to generate report with common settings""" |
| 45 | + default_settings = { |
| 46 | + "progress_bar": False, |
| 47 | + "samples": None, |
| 48 | + "correlations": None, |
| 49 | + "missing_diagrams": None, |
| 50 | + "duplicates": None, |
| 51 | + "interactions": None, |
| 52 | + } |
| 53 | + default_settings.update(kwargs) |
| 54 | + return ProfileReport(df=data, **default_settings) |
| 55 | + |
| 56 | + |
| 57 | +# Test category frequency plots general options |
| 58 | +@pytest.mark.parametrize( |
| 59 | + "data_fixture", |
| 60 | + ["sample_boolean_data", "sample_categorical_data"], |
| 61 | + ids=["boolean", "categorical"], |
| 62 | +) |
49 | 63 | @pytest.mark.parametrize("plot_type", ["bar", "pie"]) |
50 | | -def test_deactivated_cat_frequency_plot(data, plot_type): |
| 64 | +def test_deactivated_cat_frequency_plot(data_fixture, plot_type, request): |
| 65 | + data = request.getfixturevalue(data_fixture) |
51 | 66 | profile = generate_report(data) |
52 | 67 | profile.config.plot.cat_freq.show = False |
53 | 68 | profile.config.plot.cat_freq.type = plot_type |
54 | 69 | html_report = profile.to_html() |
55 | 70 | assert "Common Values (Plot)" not in html_report |
56 | 71 |
|
57 | 72 |
|
58 | | -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
59 | | -def test_cat_frequency_default_barh_plot(data): |
| 73 | +@pytest.mark.parametrize( |
| 74 | + "data_fixture", |
| 75 | + ["sample_boolean_data", "sample_categorical_data"], |
| 76 | + ids=["boolean", "categorical"], |
| 77 | +) |
| 78 | +def test_cat_frequency_default_barh_plot(data_fixture, request): |
| 79 | + data = request.getfixturevalue(data_fixture) |
60 | 80 | profile = generate_report(data) |
61 | 81 | html_report = profile.to_html() |
62 | 82 | assert "Common Values (Plot)" in html_report |
63 | 83 |
|
64 | 84 |
|
65 | | -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
66 | | -def test_cat_frequency_pie_plot(data): |
| 85 | +@pytest.mark.parametrize( |
| 86 | + "data_fixture", |
| 87 | + ["sample_boolean_data", "sample_categorical_data"], |
| 88 | + ids=["boolean", "categorical"], |
| 89 | +) |
| 90 | +def test_cat_frequency_pie_plot(data_fixture, request): |
| 91 | + data = request.getfixturevalue(data_fixture) |
67 | 92 | profile = generate_report(data) |
68 | 93 | profile.config.plot.cat_freq.type = "pie" |
69 | 94 | html_report = profile.to_html() |
70 | 95 | assert "pie" in html_report |
71 | 96 |
|
72 | 97 |
|
73 | 98 | @pytest.mark.parametrize("plot_type", ["bar", "pie"]) |
74 | | -def test_max_nuique_smaller_than_unique_cats(plot_type): |
75 | | - profile = generate_report(dummy_cat_data) |
76 | | - profile.config.plot.cat_freq.max_unique = 2 # smaller than the number of categories |
| 99 | +def test_max_unique_categories(plot_type): |
| 100 | + # Test with different numbers of unique categories |
| 101 | + categories = {f"cat_{i}": 5 for i in range(10)} |
| 102 | + data = generate_cat_data_series(categories) |
| 103 | + profile = generate_report(data) |
| 104 | + profile.config.plot.cat_freq.max_unique = 5 |
77 | 105 | profile.config.plot.cat_freq.type = plot_type |
78 | 106 | html_report = profile.to_html() |
| 107 | + |
| 108 | + # Should not show plot when unique categories exceed max_unique |
79 | 109 | assert "Common Values (Plot)" not in html_report |
80 | 110 |
|
81 | 111 |
|
82 | | -# - Test category frequency plots color options |
83 | | -@pytest.mark.parametrize("plot_type", ["bar", "pie"]) |
84 | | -def test_cat_frequency_with_custom_colors(plot_type): |
85 | | - test_data = generate_cat_data_series(pd.Series({"A": 10, "B": 10, "C": 10})) |
86 | | - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} |
| 112 | +def test_more_categories_than_colors(): |
| 113 | + # Test handling when there are more categories than defined colors |
| 114 | + test_data = generate_cat_data_series({f"cat_{i}": 10 for i in range(5)}) |
| 115 | + custom_colors = ["gold", "blue", "coral"] |
| 116 | + |
87 | 117 | profile = generate_report(test_data) |
88 | | - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) |
89 | | - profile.config.plot.cat_freq.type = plot_type |
| 118 | + profile.config.plot.cat_freq.colors = custom_colors |
90 | 119 | html_report = profile.to_html() |
91 | | - for c, hex_code in custom_colors.items(): |
92 | | - assert f"fill: {hex_code}" in html_report, f"Missing color code of {c}" |
93 | 120 |
|
| 121 | + # Should still generate plot without errors |
| 122 | + assert "Common Values (Plot)" in html_report |
94 | 123 |
|
95 | | -def test_more_cats_than_colors(): |
96 | | - test_data = generate_cat_data_series( |
97 | | - pd.Series({"A": 10, "B": 10, "C": 10, "D": 10}) |
98 | | - ) |
99 | | - custom_colors = {"gold": "#ffd700", "b": "#0000ff", "#FF796C": "#ff796c"} |
| 124 | + |
| 125 | +@pytest.mark.skip("Skipping empty color list test. Code needs to be updated.") |
| 126 | +def test_empty_color_list(): |
| 127 | + # Test behavior with empty color list |
| 128 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
100 | 129 | profile = generate_report(test_data) |
101 | | - profile.config.plot.cat_freq.colors = list(custom_colors.keys()) |
| 130 | + profile.config.plot.cat_freq.colors = [] |
102 | 131 | html_report = profile.to_html() |
103 | | - assert "Common Values (Plot)" in html_report # just check that it worked |
104 | 132 |
|
| 133 | + # Should use default colors |
| 134 | + assert "Common Values (Plot)" in html_report |
| 135 | + |
| 136 | + |
| 137 | +@pytest.mark.parametrize("invalid_type", ["scatter", "box", "invalid"]) |
| 138 | +def test_invalid_plot_types(invalid_type): |
| 139 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
105 | 140 |
|
106 | | -# - Test exceptions |
107 | | -@pytest.mark.parametrize("data", [dummy_bool_data, dummy_cat_data], ids=["bool", "cat"]) |
108 | | -def test_exception_with_invalid_cat_freq_type(data): |
109 | | - profile = generate_report(data) |
110 | | - profile.config.plot.cat_freq.type = "box" |
111 | 141 | with pytest.raises(ValueError): |
| 142 | + profile = generate_report(test_data) |
| 143 | + profile.config.plot.cat_freq.type = invalid_type |
112 | 144 | profile.to_html() |
| 145 | + |
| 146 | + |
| 147 | +def test_config_persistence(): |
| 148 | + # Test that plot configuration persists after cache invalidation |
| 149 | + test_data = generate_cat_data_series({"A": 10, "B": 10}) |
| 150 | + profile = generate_report(test_data) |
| 151 | + profile.config.plot.cat_freq.type = "pie" |
| 152 | + profile.config.plot.cat_freq.colors = ["gold", "blue"] |
| 153 | + |
| 154 | + # Cache invalidation shouldn't affect config |
| 155 | + profile.invalidate_cache() |
| 156 | + html_report = profile.to_html() |
| 157 | + assert "pie" in html_report |
| 158 | + assert "fill: #ffd700" in html_report |
0 commit comments