Skip to content

Commit 7a97af9

Browse files
committed
Add config option to pre-detector for copying log fields to detection events
1 parent 7c20288 commit 7a97af9

File tree

13 files changed

+926
-261
lines changed

13 files changed

+926
-261
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ repos:
2929
hooks:
3030
- id: isort
3131
name: isort (python)
32+
- repo: https://github.com/pre-commit/mirrors-mypy
33+
rev: v1.18.2
34+
hooks:
35+
- id: mypy
36+
additional_dependencies: [attrs]
3237
- repo: https://github.com/google/yamlfmt
3338
rev: v0.15.0
3439
hooks:

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
## Upcoming Changes
22

33
### Breaking
4+
* pre detector events now also include host.name if the field value is None
45

56
### Features
67
* add support for python 3.14
8+
* allow pre-detector to copy a configurable list of fields from log to detection event
79

810
### Improvements
911
* add workflow to partially run & check the compose example
@@ -15,6 +17,7 @@
1517
* fix docker-compose and k8s example setups
1618
* fix handling of non-string values (e.g. int) as replacement argument for `generic_resolver`
1719
* fix documentation for `generic_resolver` rule `append_to_list -> merge_with_target` option
20+
* fix grokker using a fixed directory for downloaded patterns, potentially leading to conflicts between processes
1821

1922
## 17.0.3
2023
### Breaking

doc/source/installation.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ contribute to them.
4040
git clone https://github.com/fkie-cad/Logprep.git
4141
cd Logprep
4242
pip install .
43+
pip install .[dev] # if you intend to contribute
4344
4445
To see if the installation was successful run
4546
:code:`logprep --version`.

logprep/abc/processor.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import os
55
from abc import abstractmethod
6-
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Type
6+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Type, cast
77

88
from attrs import define, field, validators
99

@@ -105,27 +105,43 @@ class Config(Component.Config):
105105
__slots__ = [
106106
"_event",
107107
"_rule_tree",
108-
"result",
108+
"_result",
109109
"_bypass_rule_tree",
110110
]
111111

112-
rule_class: ClassVar["Type[Rule] | None"] = None
112+
rule_class: ClassVar[Type["Rule"] | None] = None
113113
_event: dict
114114
_rule_tree: RuleTree
115115
_strategy = None
116116
_bypass_rule_tree: bool
117-
result: ProcessorResult | None
117+
_result: ProcessorResult | None
118118

119119
def __init__(self, name: str, configuration: "Processor.Config"):
120120
super().__init__(name, configuration)
121121
self._rule_tree = RuleTree(config=self._config.tree_config)
122122
self.load_rules(rules_targets=self._config.rules)
123-
self.result = None
123+
self._result = None
124124
self._bypass_rule_tree = False
125125
if os.environ.get("LOGPREP_BYPASS_RULE_TREE"):
126126
self._bypass_rule_tree = True
127127
logger.debug("Bypassing rule tree for processor %s", self.name)
128128

129+
@property
130+
def result(self) -> ProcessorResult:
131+
"""Returns the current result object which is guaranteed to be non-None
132+
during processing of an event.
133+
134+
Returns
135+
-------
136+
ProcessorResult
137+
The current result to be modified in-place
138+
"""
139+
return cast(ProcessorResult, self._result)
140+
141+
@result.setter
142+
def result(self, value: ProcessorResult):
143+
self._result = value
144+
129145
@property
130146
def rules(self):
131147
"""Returns all rules
@@ -161,7 +177,7 @@ def process(self, event: dict) -> ProcessorResult:
161177
extra data and a list of target outputs.
162178
163179
"""
164-
self.result = ProcessorResult(processor_name=self.name, event=event) # type: ignore
180+
self._result = ProcessorResult(processor_name=self.name, event=event) # type: ignore
165181
logger.debug("%s processing event %s", self.describe(), event)
166182
if self._bypass_rule_tree:
167183
self._process_all_rules(event)

logprep/ng/processor/grokker/processor.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import logging
3333
import re
34+
import tempfile
3435
from pathlib import Path
3536
from zipfile import ZipFile
3637

@@ -99,24 +100,24 @@ def setup(self) -> None:
99100
super().setup()
100101
custom_patterns_dir = self._config.custom_patterns_dir
101102
if re.search(r"http(s)?:\/\/.*?\.zip", custom_patterns_dir):
102-
patterns_tmp_path = Path("/tmp/grok_patterns")
103-
self._download_zip_file(source_file=custom_patterns_dir, target_dir=patterns_tmp_path)
104-
for rule in self.rules:
105-
rule.set_mapping_actions(patterns_tmp_path)
106-
return
103+
with tempfile.TemporaryDirectory("grok") as patterns_tmp_path:
104+
self._download_zip_file(
105+
source_file=custom_patterns_dir, target_dir=Path(patterns_tmp_path)
106+
)
107+
for rule in self.rules:
108+
rule.set_mapping_actions(patterns_tmp_path)
109+
return
107110
if custom_patterns_dir:
108111
for rule in self.rules:
109112
rule.set_mapping_actions(custom_patterns_dir)
110113
return
111114
for rule in self.rules:
112115
rule.set_mapping_actions()
113116

114-
def _download_zip_file(self, source_file: str, target_dir: Path) -> None:
115-
if not target_dir.exists():
116-
logger.debug("start grok pattern download...")
117-
archive = Path(f"{target_dir}.zip")
118-
archive.touch()
119-
archive.write_bytes(GetterFactory.from_string(source_file).get_raw())
117+
def _download_zip_file(self, source_file: str, target_dir: Path):
118+
logger.debug("start grok pattern download...")
119+
with tempfile.TemporaryFile("wb+") as archive:
120+
archive.write(GetterFactory.from_string(source_file).get_raw())
120121
logger.debug("finished grok pattern download.")
121-
with ZipFile(str(archive), mode="r") as zip_file:
122+
with ZipFile(archive, mode="r") as zip_file:
122123
zip_file.extractall(target_dir)

logprep/ng/processor/pre_detector/processor.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"""
3030

3131
from functools import cached_property
32+
from typing import cast
3233
from uuid import uuid4
3334

3435
from attr import define, field, validators
@@ -38,7 +39,12 @@
3839
from logprep.processor.base.exceptions import ProcessingWarning
3940
from logprep.processor.pre_detector.ip_alerter import IPAlerter
4041
from logprep.processor.pre_detector.rule import PreDetectorRule
41-
from logprep.util.helper import add_fields_to, get_dotted_field_value
42+
from logprep.util.helper import (
43+
FieldValue,
44+
add_fields_to,
45+
copy_fields_to_event,
46+
get_dotted_field_value,
47+
)
4248
from logprep.util.time import TimeParser, TimeParserException
4349

4450

@@ -92,16 +98,16 @@ class Config(Processor.Config):
9298
def _ip_alerter(self) -> IPAlerter:
9399
return IPAlerter(self._config.alert_ip_list_path)
94100

95-
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: str) -> str:
101+
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: FieldValue) -> str:
96102
"""method for normalizing the timestamp"""
97103
try:
98104
parsed_datetime = TimeParser.parse_datetime(
99-
timestamp, rule.source_format, rule.source_timezone
105+
cast(str, timestamp), rule.source_format, rule.source_timezone
100106
)
101107
return (
102108
parsed_datetime.astimezone(rule.target_timezone).isoformat().replace("+00:00", "Z")
103109
)
104-
except TimeParserException as error:
110+
except (TimeParserException, TypeError) as error:
105111
raise ProcessingWarning(
106112
"Could not parse timestamp",
107113
rule,
@@ -132,15 +138,19 @@ def _get_detection_result(self, event: dict, rule: PreDetectorRule) -> None:
132138

133139
@staticmethod
134140
def _generate_detection_result(
135-
pre_detection_id: str, event: dict, rule: PreDetectorRule
141+
pre_detection_id: FieldValue, event: dict, rule: PreDetectorRule
136142
) -> dict:
137143
detection_result = {
138144
**rule.detection_data,
139145
"rule_filter": rule.filter_str,
140146
"description": rule.description,
141147
"pre_detection_id": pre_detection_id,
142148
}
143-
144-
if host_name := get_dotted_field_value(event, "host.name"):
145-
detection_result.update({"host": {"name": host_name}})
149+
copy_fields_to_event(
150+
target_event=detection_result,
151+
source_event=event,
152+
dotted_field_names=rule.copy_fields_to_detection_event,
153+
rule=rule,
154+
skip_missing=True,
155+
)
146156
return detection_result

logprep/processor/grokker/processor.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import logging
3333
import re
34+
import tempfile
3435
from pathlib import Path
3536
from zipfile import ZipFile
3637

@@ -106,16 +107,18 @@ def _apply_rules(self, event: dict, rule: GrokkerRule):
106107
if not matches:
107108
raise ProcessingWarning("no grok pattern matched", rule, event)
108109

109-
def setup(self):
110+
def setup(self) -> None:
110111
"""Loads the action mapping. Has to be called before processing"""
111112
super().setup()
112113
custom_patterns_dir = self._config.custom_patterns_dir
113114
if re.search(r"http(s)?:\/\/.*?\.zip", custom_patterns_dir):
114-
patterns_tmp_path = Path("/tmp/grok_patterns")
115-
self._download_zip_file(source_file=custom_patterns_dir, target_dir=patterns_tmp_path)
116-
for rule in self.rules:
117-
rule.set_mapping_actions(patterns_tmp_path)
118-
return
115+
with tempfile.TemporaryDirectory("grok") as patterns_tmp_path:
116+
self._download_zip_file(
117+
source_file=custom_patterns_dir, target_dir=Path(patterns_tmp_path)
118+
)
119+
for rule in self.rules:
120+
rule.set_mapping_actions(patterns_tmp_path)
121+
return
119122
if custom_patterns_dir:
120123
for rule in self.rules:
121124
rule.set_mapping_actions(custom_patterns_dir)
@@ -124,11 +127,9 @@ def setup(self):
124127
rule.set_mapping_actions()
125128

126129
def _download_zip_file(self, source_file: str, target_dir: Path):
127-
if not target_dir.exists():
128-
logger.debug("start grok pattern download...")
129-
archive = Path(f"{target_dir}.zip")
130-
archive.touch()
131-
archive.write_bytes(GetterFactory.from_string(source_file).get_raw())
130+
logger.debug("start grok pattern download...")
131+
with tempfile.TemporaryFile("wb+") as archive:
132+
archive.write(GetterFactory.from_string(source_file).get_raw())
132133
logger.debug("finished grok pattern download.")
133-
with ZipFile(str(archive), mode="r") as zip_file:
134+
with ZipFile(archive, mode="r") as zip_file:
134135
zip_file.extractall(target_dir)

logprep/processor/pre_detector/processor.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"""
3030

3131
from functools import cached_property
32+
from typing import cast
3233
from uuid import uuid4
3334

3435
from attr import define, field, validators
@@ -37,7 +38,12 @@
3738
from logprep.processor.base.exceptions import ProcessingWarning
3839
from logprep.processor.pre_detector.ip_alerter import IPAlerter
3940
from logprep.processor.pre_detector.rule import PreDetectorRule
40-
from logprep.util.helper import add_fields_to, get_dotted_field_value
41+
from logprep.util.helper import (
42+
FieldValue,
43+
add_fields_to,
44+
copy_fields_to_event,
45+
get_dotted_field_value,
46+
)
4147
from logprep.util.time import TimeParser, TimeParserException
4248

4349

@@ -101,19 +107,19 @@ class Config(Processor.Config):
101107
rule_class = PreDetectorRule
102108

103109
@cached_property
104-
def _ip_alerter(self):
110+
def _ip_alerter(self) -> IPAlerter:
105111
return IPAlerter(self._config.alert_ip_list_path)
106112

107-
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: str) -> str:
113+
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: FieldValue) -> str:
108114
"""method for normalizing the timestamp"""
109115
try:
110116
parsed_datetime = TimeParser.parse_datetime(
111-
timestamp, rule.source_format, rule.source_timezone
117+
cast(str, timestamp), rule.source_format, rule.source_timezone
112118
)
113119
return (
114120
parsed_datetime.astimezone(rule.target_timezone).isoformat().replace("+00:00", "Z")
115121
)
116-
except TimeParserException as error:
122+
except (TimeParserException, TypeError) as error:
117123
raise ProcessingWarning(
118124
"Could not parse timestamp",
119125
rule,
@@ -143,16 +149,19 @@ def _get_detection_result(self, event: dict, rule: PreDetectorRule):
143149

144150
@staticmethod
145151
def _generate_detection_result(
146-
pre_detection_id: str, event: dict, rule: PreDetectorRule
152+
pre_detection_id: FieldValue, event: dict, rule: PreDetectorRule
147153
) -> dict:
148-
detection_result = rule.detection_data
149-
detection_result.update(
150-
{
151-
"rule_filter": rule.filter_str,
152-
"description": rule.description,
153-
"pre_detection_id": pre_detection_id,
154-
}
154+
detection_result = {
155+
**rule.detection_data,
156+
"rule_filter": rule.filter_str,
157+
"description": rule.description,
158+
"pre_detection_id": pre_detection_id,
159+
}
160+
copy_fields_to_event(
161+
target_event=detection_result,
162+
source_event=event,
163+
dotted_field_names=rule.copy_fields_to_detection_event,
164+
rule=rule,
165+
skip_missing=True,
155166
)
156-
if host_name := get_dotted_field_value(event, "host.name"):
157-
detection_result.update({"host": {"name": host_name}})
158167
return detection_result

0 commit comments

Comments
 (0)