Skip to content

Commit 099cc1d

Browse files
authored
Make list comparison accept lists in documents and support matching networks (#885)
* Make list comparison accept lists in documents and support matching networks * Fix black formatting * Split out network comparison from list_comparison processor to network_comparison processor * Make network comparison work with getter callbacks and add tests * Refactor list_comparison and network_comparison processor * Refactor network_comparison processor testdata and fix name in docstring * Make network_comparison handle warning errors * Add missing line endings * Fix mypy * Separate network_comparison config from list_comparison config * Make network_comparison rule loading fail if network is invalid
1 parent 6bff44f commit 099cc1d

File tree

25 files changed

+1622
-29
lines changed

25 files changed

+1622
-29
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
* make generic adder processor be refreshable with http getter
2727
* make generic resolver processor be refreshable with http getter
2828
* add option for refreshable getters to return default values if no value could be obtained
29+
* list comparison processor can now also match fields that contain lists in documents
30+
* add network comparison processor that can match IPs with networks in CIDR notation
2931

3032
### Improvements
3133

logprep/ng/processor/list_comparison/processor.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ def setup(self) -> None:
5454
for rule in self.rules:
5555
rule.init_list_comparison(self._config.list_search_base_path)
5656

57-
def _apply_rules(self, event: dict, rule: ListComparisonRule) -> None:
58-
"""
59-
Apply matching rule to given log event.
57+
def _apply_rules(self, event, rule):
58+
"""Apply matching rule to given log event.
59+
6060
In the process of doing so, add the result of comparing
6161
the log with a given list to the specified subfield. This subfield will contain
6262
a list of list comparison results that might be based on multiple rules.
@@ -75,20 +75,38 @@ def _apply_rules(self, event: dict, rule: ListComparisonRule) -> None:
7575
add_fields_to(event, fields, rule=rule, merge_with_target=True)
7676

7777
def _list_comparison(self, rule: ListComparisonRule, event: dict) -> tuple[list[str], str]:
78-
"""
79-
Check if field value violates block or allow list.
80-
Returns the result of the comparison (res_key), as well as a dictionary containing
81-
the result (key) and a list of filenames pertaining to said result (value).
78+
"""Check if field value violates block or allow list.
79+
80+
Returns
81+
-------
82+
tuple[list[str], str]
83+
The result of the comparison, as well as a dictionary containing the result and a list
84+
of filenames pertaining to said result.
85+
8286
"""
8387

84-
field_value = get_dotted_field_value(event, rule.source_fields[0])
88+
field_value_to_be_checked = get_dotted_field_value(event, rule.source_fields[0])
89+
value_list = (
90+
field_value_to_be_checked
91+
if isinstance(field_value_to_be_checked, list)
92+
else [field_value_to_be_checked]
93+
)
8594

86-
list_matches = [
87-
compare_list
88-
for compare_list in rule.compare_sets
89-
if field_value in rule.compare_sets[compare_list]
90-
]
95+
list_matches = self._get_lists_matching_with_values(rule, value_list, event)
9196

92-
if not list_matches:
97+
if len(list_matches) == 0:
9398
return list(rule.compare_sets.keys()), "not_in_list"
9499
return list_matches, "in_list"
100+
101+
def _get_lists_matching_with_values(
102+
self, rule: ListComparisonRule, value_list: list, _: dict
103+
) -> list:
104+
"""Iterate over string lists, check if element is in any."""
105+
list_matches = []
106+
for value in value_list:
107+
for compare_list in rule.compare_sets:
108+
if compare_list in list_matches:
109+
continue
110+
if value in rule.compare_sets[compare_list]:
111+
list_matches.append(compare_list)
112+
return list_matches

logprep/ng/processor/network_comparison/__init__.py

Whitespace-only changes.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
NetworkComparison
3+
=================
4+
5+
The `ng_network_comparison` processor allows to compare values of source fields against lists provided
6+
as files.
7+
8+
9+
Processor Configuration
10+
^^^^^^^^^^^^^^^^^^^^^^^
11+
.. code-block:: yaml
12+
:linenos:
13+
14+
- networkcomparisonname:
15+
type: ng_network_comparison
16+
rules:
17+
- tests/testdata/rules/rules
18+
list_search_base_path: /path/to/list/dir
19+
20+
.. autoclass:: logprep.ng.processor.network_comparison.processor.NetworkComparison.Config
21+
:members:
22+
:undoc-members:
23+
:inherited-members:
24+
:noindex:
25+
26+
.. automodule:: logprep.ng.processor.network_comparison.rule
27+
"""
28+
29+
from ipaddress import ip_address
30+
31+
from logprep.ng.processor.list_comparison.processor import ListComparison
32+
from logprep.processor.network_comparison.rule import NetworkComparisonRule
33+
34+
35+
class NetworkComparison(ListComparison):
36+
"""Resolve values in documents by referencing a mapping list."""
37+
38+
rule_class = NetworkComparisonRule
39+
40+
def _get_lists_matching_with_values(
41+
self, rule: NetworkComparisonRule, value_list: list, event: dict
42+
) -> list:
43+
"""Iterate over network lists, check if element is in any."""
44+
list_matches: list = []
45+
for value in value_list:
46+
try:
47+
ip_address_object = ip_address(value)
48+
except ValueError as error:
49+
self._handle_warning_error(event, rule, error)
50+
continue
51+
52+
for compare_list in rule.compare_sets:
53+
for network in rule.compare_sets[compare_list]:
54+
if compare_list in list_matches:
55+
continue
56+
if ip_address_object in network:
57+
list_matches.append(compare_list)
58+
return list_matches

logprep/processor/list_comparison/processor.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,39 @@ def _apply_rules(self, event, rule):
7474
fields = {f"{rule.target_field}.{comparison_key}": comparison_result}
7575
add_fields_to(event, fields, rule=rule, merge_with_target=True)
7676

77-
def _list_comparison(self, rule: ListComparisonRule, event: dict):
78-
"""
79-
Check if field value violates block or allow list.
80-
Returns the result of the comparison (res_key), as well as a dictionary containing
81-
the result (key) and a list of filenames pertaining to said result (value).
77+
def _list_comparison(self, rule: ListComparisonRule, event: dict) -> tuple[list, str]:
78+
"""Check if field value violates block or allow list.
79+
80+
Returns
81+
-------
82+
tuple[list[str], str]
83+
The result of the comparison, as well as a dictionary containing the result and a list
84+
of filenames pertaining to said result.
85+
8286
"""
8387

84-
# get value that should be checked in the lists
85-
field_value = get_dotted_field_value(event, rule.source_fields[0])
88+
field_value_to_be_checked = get_dotted_field_value(event, rule.source_fields[0])
89+
value_list = (
90+
field_value_to_be_checked
91+
if isinstance(field_value_to_be_checked, list)
92+
else [field_value_to_be_checked]
93+
)
8694

87-
# iterate over lists and check if element is in any
88-
list_matches = []
89-
for compare_list in rule.compare_sets:
90-
if field_value in rule.compare_sets[compare_list]:
91-
list_matches.append(compare_list)
95+
list_matches = self._get_lists_matching_with_values(rule, value_list, event)
9296

93-
# if matching list was found return it, otherwise return all list names
9497
if len(list_matches) == 0:
9598
return list(rule.compare_sets.keys()), "not_in_list"
9699
return list_matches, "in_list"
100+
101+
def _get_lists_matching_with_values(
102+
self, rule: ListComparisonRule, value_list: list, _: dict
103+
) -> list:
104+
"""Iterate over string lists, check if element is in any."""
105+
list_matches = []
106+
for value in value_list:
107+
for compare_list in rule.compare_sets:
108+
if compare_list in list_matches:
109+
continue
110+
if value in rule.compare_sets[compare_list]:
111+
list_matches.append(compare_list)
112+
return list_matches

logprep/processor/network_comparison/__init__.py

Whitespace-only changes.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
NetworkComparison
3+
=================
4+
5+
The `network_comparison` processor allows to compare values of source fields against lists provided
6+
as files.
7+
8+
9+
Processor Configuration
10+
^^^^^^^^^^^^^^^^^^^^^^^
11+
.. code-block:: yaml
12+
:linenos:
13+
14+
- networkcomparisonname:
15+
type: network_comparison
16+
rules:
17+
- tests/testdata/rules/rules
18+
list_search_base_path: /path/to/list/dir
19+
20+
.. autoclass:: logprep.processor.network_comparison.processor.NetworkComparison.Config
21+
:members:
22+
:undoc-members:
23+
:inherited-members:
24+
:noindex:
25+
26+
.. automodule:: logprep.processor.network_comparison.rule
27+
"""
28+
29+
from ipaddress import ip_address
30+
31+
from logprep.processor.list_comparison.processor import ListComparison
32+
from logprep.processor.network_comparison.rule import NetworkComparisonRule
33+
from logprep.util.helper import get_dotted_field_value
34+
35+
36+
class NetworkComparison(ListComparison):
37+
"""Resolve values in documents by referencing a mapping list."""
38+
39+
rule_class = NetworkComparisonRule
40+
41+
def _get_lists_matching_with_values(
42+
self, rule: NetworkComparisonRule, value_list: list, event: dict
43+
) -> list:
44+
"""Iterate over network lists, check if element is in any."""
45+
list_matches: list = []
46+
for value in value_list:
47+
try:
48+
ip_address_object = ip_address(value)
49+
except ValueError as error:
50+
self._handle_warning_error(event, rule, error)
51+
continue
52+
53+
for compare_list in rule.compare_sets:
54+
for network in rule.compare_sets[compare_list]:
55+
if compare_list in list_matches:
56+
continue
57+
if ip_address_object in network:
58+
list_matches.append(compare_list)
59+
return list_matches
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Rule Configuration
3+
^^^^^^^^^^^^^^^^^^
4+
5+
The network comparison enricher can match IPs to IP strings and networks in CIDR notation.
6+
7+
The network comparison enricher requires the additional field :code:`network_comparison`.
8+
The mandatory keys under :code:`network_comparison` are :code:`source_fields`
9+
(as list with one element) and :code:`target_field`. Former
10+
is used to identify the field which is to be checked against the provided lists.
11+
And the latter is used to define the parent field where the results should
12+
be written to. Both fields can be dotted subfields.
13+
14+
Additionally, a list or array of lists can be provided underneath the
15+
required field :code:`list_file_paths`.
16+
17+
In the following example, the field :code:`ip` will be checked against the provided list
18+
(:code:`networks.txt`).
19+
Assuming that the value :code:`127.0.0.1` will match the provided list,
20+
the result of the network comparison (:code:`in_list`) will be added to the
21+
target field :code:`network_comparison.example`.
22+
23+
.. code-block:: yaml
24+
:linenos:
25+
:caption: Example Rule to compare a single field against a provided list.
26+
27+
filter: 'ip'
28+
network_comparison:
29+
source_fields: ['ip']
30+
target_field: 'network_comparison.example'
31+
list_file_paths:
32+
- lists/networks.txt
33+
description: '...'
34+
35+
.. note::
36+
37+
Currently, it is not possible to check in more than one :code:`source_field` per rule.
38+
39+
.. autoclass:: logprep.processor.network_comparison.rule.NetworkComparisonRule.Config
40+
:members:
41+
:undoc-members:
42+
:inherited-members:
43+
:noindex:
44+
"""
45+
46+
from ipaddress import ip_network
47+
from typing import Optional, List
48+
49+
from attrs import define, field, validators
50+
51+
from logprep.processor.field_manager.rule import FieldManagerRule
52+
from logprep.processor.list_comparison.rule import ListComparisonRule
53+
from logprep.util.getter import HttpGetter
54+
55+
56+
class NetworkComparisonRule(ListComparisonRule):
57+
"""Check if documents match a filter."""
58+
59+
_compare_sets: dict
60+
61+
@define(kw_only=True)
62+
class Config(FieldManagerRule.Config):
63+
"""RuleConfig for NetworkComparisonRule"""
64+
65+
list_file_paths: List[str] = field(
66+
validator=validators.deep_iterable(member_validator=validators.instance_of(str))
67+
)
68+
"""List of files. For string format see :ref:`getters`.
69+
70+
.. security-best-practice::
71+
:title: Processor - Network Comparison list file paths Memory Consumption
72+
73+
Be aware that all values of the remote files were loaded into memory. Consider to avoid
74+
dynamic increasing lists without setting limits for Memory consumption. Additionally
75+
avoid loading large files all at once to avoid exceeding http body limits.
76+
77+
.. security-best-practice::
78+
:title: Processor - Network Comparison list file paths Authenticity and Integrity
79+
80+
Consider to use TLS protocol with authentication via mTLS or Oauth to ensure
81+
authenticity and integrity of the loaded values.
82+
83+
"""
84+
list_search_base_path: str = field(validator=validators.instance_of(str), factory=str)
85+
"""Base Path from where to find relative files from :code:`list_file_paths`.
86+
You can also pass a template with keys from environment,
87+
e.g., :code:`${<your environment variable>}`. The special key :code:`${LOGPREP_LIST}`
88+
will be filled by this processor. """
89+
mapping: dict = field(default="", init=False, repr=False, eq=False)
90+
ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False)
91+
92+
def init_list_comparison(self, list_search_base_path: Optional[str] = None) -> None:
93+
"""init method for list_comparison lists"""
94+
super().init_list_comparison(list_search_base_path)
95+
self._convert_compare_sets_to_networks()
96+
97+
def _update_compare_sets_via_http(self, http_getter: HttpGetter, list_path: str) -> None:
98+
super()._update_compare_sets_via_http(http_getter, list_path)
99+
self._convert_compare_sets_to_networks()
100+
101+
def _convert_compare_sets_to_networks(self) -> None:
102+
network_comparison: dict = {}
103+
for list_name, compare_strings in self._compare_sets.items():
104+
if compare_strings:
105+
network_comparison[list_name] = set()
106+
for compare_string in compare_strings:
107+
network_comparison[list_name].add(ip_network(compare_string))
108+
self._compare_sets = network_comparison

logprep/registry.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@
6969
from logprep.ng.processor.list_comparison.processor import (
7070
ListComparison as NgListComparison,
7171
)
72+
from logprep.ng.processor.network_comparison.processor import (
73+
NetworkComparison as NgNetworkComparison,
74+
)
7275
from logprep.ng.processor.pre_detector.processor import PreDetector as NgPreDetector
7376
from logprep.ng.processor.pseudonymizer.processor import (
7477
Pseudonymizer as NgPseudonymizer,
@@ -108,6 +111,7 @@
108111
from logprep.processor.key_checker.processor import KeyChecker
109112
from logprep.processor.labeler.processor import Labeler
110113
from logprep.processor.list_comparison.processor import ListComparison
114+
from logprep.processor.network_comparison.processor import NetworkComparison
111115
from logprep.processor.pre_detector.processor import PreDetector
112116
from logprep.processor.pseudonymizer.processor import Pseudonymizer
113117
from logprep.processor.replacer.processor import Replacer
@@ -143,6 +147,7 @@ class Registry:
143147
"key_checker": KeyChecker,
144148
"labeler": Labeler,
145149
"list_comparison": ListComparison,
150+
"network_comparison": NetworkComparison,
146151
"ng_amides": NgAmides,
147152
"ng_calculator": NGCalculator,
148153
"ng_clusterer": NgClusterer,
@@ -162,6 +167,7 @@ class Registry:
162167
"ng_key_checker": NgKeyChecker,
163168
"ng_labeler": NgLabeler,
164169
"ng_list_comparison": NgListComparison,
170+
"ng_network_comparison": NgNetworkComparison,
165171
"ng_replacer": NgReplacer,
166172
"ng_requester": NgRequester,
167173
"ng_string_splitter": NgStringSplitter,

tests/testdata/unit/list_comparison/lists/empty_list.txt

Whitespace-only changes.

0 commit comments

Comments
 (0)