Skip to content

Commit 96febab

Browse files
test refactor for floating point failures + mypy fix (#1169)
* test refactor for floating point failures + 1 mypy fix * adding inline ignores for mypy and TODOs for context * ran black for changes in previous commit
1 parent 1639641 commit 96febab

File tree

9 files changed

+70
-10
lines changed

9 files changed

+70
-10
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ repos:
1919
# Flake8: complexity and style checking
2020
# https://flake8.pycqa.org/en/latest/user/using-hooks.html
2121
- repo: https://github.com/pycqa/flake8
22-
rev: 4.0.1
22+
rev: 5.0.4
2323
hooks:
2424
- id: flake8
2525
additional_dependencies: [flake8-docstrings]

dataprofiler/data_readers/parquet_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def __init__(
6868
self._load_data(data)
6969

7070
@property
71-
def file_encoding(self) -> None:
71+
def file_encoding(self) -> Optional[str]:
7272
"""Set file encoding to None since not detected for avro."""
7373
return None
7474

dataprofiler/profilers/float_column_profile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,11 @@ def load_from_dict(cls, data, config: dict | None = None):
194194

195195
return profile
196196

197+
# TODO: refactor BaseColumnProfiler.profile to not be an @property
198+
# NumericStatsMixin inherits from BaseColumnProfile and adding @property to
199+
# NumericStatisMixin.profile() results in a breaking change - ignoring [override]
197200
@property
198-
def profile(self) -> dict:
201+
def profile(self) -> dict: # type: ignore[override]
199202
"""
200203
Return the profile of the column.
201204

dataprofiler/profilers/int_column_profile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,11 @@ def load_from_dict(cls, data, config: dict | None = None):
9292
profile._reformat_numeric_stats_types_on_serialized_profiles()
9393
return profile
9494

95+
# TODO: refactor BaseColumnProfiler.profile to not be an @property
96+
# NumericStatsMixin inherits from BaseColumnProfile and adding @property to
97+
# NumericStatisMixin.profile() results in a breaking change - ignoring [override]
9598
@property
96-
def profile(self) -> dict:
99+
def profile(self) -> dict: # type: ignore[override]
97100
"""
98101
Return the profile of the column.
99102

dataprofiler/profilers/numerical_column_stats.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,10 @@ def _add_helper(
365365
other1._median_abs_dev_is_enabled and other2._median_abs_dev_is_enabled
366366
)
367367

368-
def profile(self) -> dict:
368+
# TODO: refactor BaseColumnProfiler.profile to not be an @property
369+
# NumericStatsMixin inherits from BaseColumnProfile and adding @property to
370+
# NumericStatisMixin.profile() results in a breaking change - ignoring [override]
371+
def profile(self) -> dict: # type: ignore[override]
369372
"""
370373
Return profile of the column.
371374

dataprofiler/profilers/text_column_profile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ def report(self, remove_disabled_flag: bool = False) -> dict:
8484

8585
return profile
8686

87+
# TODO: refactor BaseColumnProfiler.profile to not be an @property
88+
# NumericStatsMixin inherits from BaseColumnProfile and adding @property to
89+
# NumericStatisMixin.profile() results in a breaking change - ignoring [override]
8790
@property
88-
def profile(self) -> dict:
91+
def profile(self) -> dict: # type: ignore[override]
8992
"""
9093
Return the profile of the column.
9194

dataprofiler/tests/profilers/test_categorical_column_profile.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import math
23
import os
34
import unittest
45
from collections import defaultdict
@@ -731,7 +732,44 @@ def test_categorical_diff(self):
731732
},
732733
}
733734
actual_diff = profile.diff(profile2)
734-
self.assertDictEqual(expected_diff, actual_diff)
735+
736+
assert expected_diff["categorical"] == actual_diff["categorical"]
737+
assert (
738+
expected_diff["statistics"]["unique_count"]
739+
== actual_diff["statistics"]["unique_count"]
740+
)
741+
assert math.isclose(
742+
expected_diff["statistics"]["unique_ratio"],
743+
actual_diff["statistics"]["unique_ratio"],
744+
)
745+
assert (
746+
expected_diff["statistics"]["categories"]
747+
== actual_diff["statistics"]["categories"]
748+
)
749+
assert math.isclose(
750+
expected_diff["statistics"]["gini_impurity"],
751+
actual_diff["statistics"]["gini_impurity"],
752+
)
753+
assert math.isclose(
754+
expected_diff["statistics"]["unalikeability"],
755+
actual_diff["statistics"]["unalikeability"],
756+
)
757+
assert (
758+
expected_diff["statistics"]["categorical_count"]
759+
== actual_diff["statistics"]["categorical_count"]
760+
)
761+
assert math.isclose(
762+
expected_diff["statistics"]["chi2-test"]["chi2-statistic"],
763+
actual_diff["statistics"]["chi2-test"]["chi2-statistic"],
764+
)
765+
assert (
766+
expected_diff["statistics"]["chi2-test"]["deg_of_free"]
767+
== actual_diff["statistics"]["chi2-test"]["deg_of_free"]
768+
)
769+
assert math.isclose(
770+
expected_diff["statistics"]["chi2-test"]["p-value"],
771+
actual_diff["statistics"]["chi2-test"]["p-value"],
772+
)
735773

736774
# Test with one categorical column matching
737775
df_not_categorical = pd.Series(

dataprofiler/tests/profilers/test_profile_builder.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import logging
3+
import math
34
import os
45
import random
56
import re
@@ -2162,8 +2163,18 @@ def test_diff_categorical_chi2_test(self, *mocks):
21622163
"deg_of_free": 2,
21632164
"p-value": 0.3099238764710244,
21642165
}
2165-
self.assertDictEqual(
2166-
expected_chi2_test_dict, diff["data_stats"][0]["statistics"]["chi2-test"]
2166+
actual_chi2_test_dict = diff["data_stats"][0]["statistics"]["chi2-test"]
2167+
2168+
assert math.isclose(
2169+
expected_chi2_test_dict["chi2-statistic"],
2170+
actual_chi2_test_dict["chi2-statistic"],
2171+
)
2172+
assert (
2173+
expected_chi2_test_dict["deg_of_free"]
2174+
== actual_chi2_test_dict["deg_of_free"]
2175+
)
2176+
assert math.isclose(
2177+
expected_chi2_test_dict["p-value"], actual_chi2_test_dict["p-value"]
21672178
)
21682179

21692180
@mock.patch(

setup.cfg

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ warn_unused_configs = True
1717
ignore_missing_imports = True
1818
no_implicit_optional = False
1919
exclude = ^dataprofiler/tests/|^resources/|^examples|venv*/
20-
disable_error_code = override
2120

2221
[check-manifest]
2322
ignore-default-rules=True

0 commit comments

Comments
 (0)