Skip to content

Commit 6a6c54c

Browse files
committed
Add config option to pre-detector for copying log fields to detection events
1 parent 7c20288 commit 6a6c54c

File tree

13 files changed

+929
-260
lines changed

13 files changed

+929
-260
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ repos:
2929
hooks:
3030
- id: isort
3131
name: isort (python)
32+
- repo: https://github.com/pre-commit/mirrors-mypy
33+
rev: v1.18.2
34+
hooks:
35+
- id: mypy
36+
additional_dependencies: [attrs]
3237
- repo: https://github.com/google/yamlfmt
3338
rev: v0.15.0
3439
hooks:

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Features
66
* add support for python 3.14
7+
* allow pre-detector to copy a configurable list of fields from log to detection event
78

89
### Improvements
910
* add workflow to partially run & check the compose example
@@ -15,6 +16,7 @@
1516
* fix docker-compose and k8s example setups
1617
* fix handling of non-string values (e.g. int) as replacement argument for `generic_resolver`
1718
* fix documentation for `generic_resolver` rule `append_to_list -> merge_with_target` option
19+
* fix grokker using a fixed directory for downloaded patterns, potentially leading to conflicts between processes
1820

1921
## 17.0.3
2022
### Breaking

doc/source/installation.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ contribute to them.
4040
git clone https://github.com/fkie-cad/Logprep.git
4141
cd Logprep
4242
pip install .
43+
pip install .[dev] # if you intend to contribute
4344
4445
To see if the installation was successful run
4546
:code:`logprep --version`.

logprep/abc/processor.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
import os
5+
import typing
56
from abc import abstractmethod
67
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Type
78

@@ -105,27 +106,43 @@ class Config(Component.Config):
105106
__slots__ = [
106107
"_event",
107108
"_rule_tree",
108-
"result",
109+
"_result",
109110
"_bypass_rule_tree",
110111
]
111112

112-
rule_class: ClassVar["Type[Rule] | None"] = None
113+
rule_class: ClassVar[Type["Rule"] | None] = None
113114
_event: dict
114115
_rule_tree: RuleTree
115116
_strategy = None
116117
_bypass_rule_tree: bool
117-
result: ProcessorResult | None
118+
_result: ProcessorResult | None
118119

119120
def __init__(self, name: str, configuration: "Processor.Config"):
120121
super().__init__(name, configuration)
121122
self._rule_tree = RuleTree(config=self._config.tree_config)
122123
self.load_rules(rules_targets=self._config.rules)
123-
self.result = None
124+
self._result = None
124125
self._bypass_rule_tree = False
125126
if os.environ.get("LOGPREP_BYPASS_RULE_TREE"):
126127
self._bypass_rule_tree = True
127128
logger.debug("Bypassing rule tree for processor %s", self.name)
128129

130+
@property
131+
def result(self) -> ProcessorResult:
132+
"""Returns the current result object which is guaranteed to be non-None
133+
during processing of an event.
134+
135+
Returns
136+
-------
137+
ProcessorResult
138+
The current result to be modified in-place
139+
"""
140+
return typing.cast(ProcessorResult, self._result)
141+
142+
@result.setter
143+
def result(self, value: ProcessorResult):
144+
self._result = value
145+
129146
@property
130147
def rules(self):
131148
"""Returns all rules
@@ -161,7 +178,7 @@ def process(self, event: dict) -> ProcessorResult:
161178
extra data and a list of target outputs.
162179
163180
"""
164-
self.result = ProcessorResult(processor_name=self.name, event=event) # type: ignore
181+
self._result = ProcessorResult(processor_name=self.name, event=event) # type: ignore
165182
logger.debug("%s processing event %s", self.describe(), event)
166183
if self._bypass_rule_tree:
167184
self._process_all_rules(event)

logprep/ng/processor/grokker/processor.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import logging
3333
import re
34+
import tempfile
3435
from pathlib import Path
3536
from zipfile import ZipFile
3637

@@ -99,24 +100,24 @@ def setup(self) -> None:
99100
super().setup()
100101
custom_patterns_dir = self._config.custom_patterns_dir
101102
if re.search(r"http(s)?:\/\/.*?\.zip", custom_patterns_dir):
102-
patterns_tmp_path = Path("/tmp/grok_patterns")
103-
self._download_zip_file(source_file=custom_patterns_dir, target_dir=patterns_tmp_path)
104-
for rule in self.rules:
105-
rule.set_mapping_actions(patterns_tmp_path)
106-
return
103+
with tempfile.TemporaryDirectory("grok") as patterns_tmp_path:
104+
self._download_zip_file(
105+
source_file=custom_patterns_dir, target_dir=Path(patterns_tmp_path)
106+
)
107+
for rule in self.rules:
108+
rule.set_mapping_actions(patterns_tmp_path)
109+
return
107110
if custom_patterns_dir:
108111
for rule in self.rules:
109112
rule.set_mapping_actions(custom_patterns_dir)
110113
return
111114
for rule in self.rules:
112115
rule.set_mapping_actions()
113116

114-
def _download_zip_file(self, source_file: str, target_dir: Path) -> None:
115-
if not target_dir.exists():
116-
logger.debug("start grok pattern download...")
117-
archive = Path(f"{target_dir}.zip")
118-
archive.touch()
119-
archive.write_bytes(GetterFactory.from_string(source_file).get_raw())
117+
def _download_zip_file(self, source_file: str, target_dir: Path):
118+
logger.debug("start grok pattern download...")
119+
with tempfile.TemporaryFile("wb+") as archive:
120+
archive.write(GetterFactory.from_string(source_file).get_raw())
120121
logger.debug("finished grok pattern download.")
121-
with ZipFile(str(archive), mode="r") as zip_file:
122+
with ZipFile(archive, mode="r") as zip_file:
122123
zip_file.extractall(target_dir)

logprep/ng/processor/pre_detector/processor.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
.. automodule:: logprep.processor.pre_detector.rule
2929
"""
3030

31+
import typing
3132
from functools import cached_property
3233
from uuid import uuid4
3334

@@ -38,7 +39,12 @@
3839
from logprep.processor.base.exceptions import ProcessingWarning
3940
from logprep.processor.pre_detector.ip_alerter import IPAlerter
4041
from logprep.processor.pre_detector.rule import PreDetectorRule
41-
from logprep.util.helper import add_fields_to, get_dotted_field_value
42+
from logprep.util.helper import (
43+
FieldValue,
44+
add_fields_to,
45+
copy_fields_to_event,
46+
get_dotted_field_value,
47+
)
4248
from logprep.util.time import TimeParser, TimeParserException
4349

4450

@@ -92,16 +98,16 @@ class Config(Processor.Config):
9298
def _ip_alerter(self) -> IPAlerter:
9399
return IPAlerter(self._config.alert_ip_list_path)
94100

95-
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: str) -> str:
101+
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: FieldValue) -> str:
96102
"""method for normalizing the timestamp"""
97103
try:
98104
parsed_datetime = TimeParser.parse_datetime(
99-
timestamp, rule.source_format, rule.source_timezone
105+
typing.cast(str, timestamp), rule.source_format, rule.source_timezone
100106
)
101107
return (
102108
parsed_datetime.astimezone(rule.target_timezone).isoformat().replace("+00:00", "Z")
103109
)
104-
except TimeParserException as error:
110+
except (TimeParserException, TypeError) as error:
105111
raise ProcessingWarning(
106112
"Could not parse timestamp",
107113
rule,
@@ -132,15 +138,19 @@ def _get_detection_result(self, event: dict, rule: PreDetectorRule) -> None:
132138

133139
@staticmethod
134140
def _generate_detection_result(
135-
pre_detection_id: str, event: dict, rule: PreDetectorRule
141+
pre_detection_id: FieldValue, event: dict, rule: PreDetectorRule
136142
) -> dict:
137143
detection_result = {
138144
**rule.detection_data,
139145
"rule_filter": rule.filter_str,
140146
"description": rule.description,
141147
"pre_detection_id": pre_detection_id,
142148
}
143-
144-
if host_name := get_dotted_field_value(event, "host.name"):
145-
detection_result.update({"host": {"name": host_name}})
149+
copy_fields_to_event(
150+
target_event=detection_result,
151+
source_event=event,
152+
dotted_field_names=rule.copy_fields_to_detection_event,
153+
rule=rule,
154+
skip_missing=True,
155+
)
146156
return detection_result

logprep/processor/grokker/processor.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import logging
3333
import re
34+
import tempfile
3435
from pathlib import Path
3536
from zipfile import ZipFile
3637

@@ -106,16 +107,18 @@ def _apply_rules(self, event: dict, rule: GrokkerRule):
106107
if not matches:
107108
raise ProcessingWarning("no grok pattern matched", rule, event)
108109

109-
def setup(self):
110+
def setup(self) -> None:
110111
"""Loads the action mapping. Has to be called before processing"""
111112
super().setup()
112113
custom_patterns_dir = self._config.custom_patterns_dir
113114
if re.search(r"http(s)?:\/\/.*?\.zip", custom_patterns_dir):
114-
patterns_tmp_path = Path("/tmp/grok_patterns")
115-
self._download_zip_file(source_file=custom_patterns_dir, target_dir=patterns_tmp_path)
116-
for rule in self.rules:
117-
rule.set_mapping_actions(patterns_tmp_path)
118-
return
115+
with tempfile.TemporaryDirectory("grok") as patterns_tmp_path:
116+
self._download_zip_file(
117+
source_file=custom_patterns_dir, target_dir=Path(patterns_tmp_path)
118+
)
119+
for rule in self.rules:
120+
rule.set_mapping_actions(patterns_tmp_path)
121+
return
119122
if custom_patterns_dir:
120123
for rule in self.rules:
121124
rule.set_mapping_actions(custom_patterns_dir)
@@ -124,11 +127,9 @@ def setup(self):
124127
rule.set_mapping_actions()
125128

126129
def _download_zip_file(self, source_file: str, target_dir: Path):
127-
if not target_dir.exists():
128-
logger.debug("start grok pattern download...")
129-
archive = Path(f"{target_dir}.zip")
130-
archive.touch()
131-
archive.write_bytes(GetterFactory.from_string(source_file).get_raw())
130+
logger.debug("start grok pattern download...")
131+
with tempfile.TemporaryFile("wb+") as archive:
132+
archive.write(GetterFactory.from_string(source_file).get_raw())
132133
logger.debug("finished grok pattern download.")
133-
with ZipFile(str(archive), mode="r") as zip_file:
134+
with ZipFile(archive, mode="r") as zip_file:
134135
zip_file.extractall(target_dir)

logprep/processor/pre_detector/processor.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
.. automodule:: logprep.processor.pre_detector.rule
2929
"""
3030

31+
import typing
3132
from functools import cached_property
3233
from uuid import uuid4
3334

@@ -37,7 +38,12 @@
3738
from logprep.processor.base.exceptions import ProcessingWarning
3839
from logprep.processor.pre_detector.ip_alerter import IPAlerter
3940
from logprep.processor.pre_detector.rule import PreDetectorRule
40-
from logprep.util.helper import add_fields_to, get_dotted_field_value
41+
from logprep.util.helper import (
42+
FieldValue,
43+
add_fields_to,
44+
copy_fields_to_event,
45+
get_dotted_field_value,
46+
)
4147
from logprep.util.time import TimeParser, TimeParserException
4248

4349

@@ -101,19 +107,19 @@ class Config(Processor.Config):
101107
rule_class = PreDetectorRule
102108

103109
@cached_property
104-
def _ip_alerter(self):
110+
def _ip_alerter(self) -> IPAlerter:
105111
return IPAlerter(self._config.alert_ip_list_path)
106112

107-
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: str) -> str:
113+
def normalize_timestamp(self, rule: PreDetectorRule, timestamp: FieldValue) -> str:
108114
"""method for normalizing the timestamp"""
109115
try:
110116
parsed_datetime = TimeParser.parse_datetime(
111-
timestamp, rule.source_format, rule.source_timezone
117+
typing.cast(str, timestamp), rule.source_format, rule.source_timezone
112118
)
113119
return (
114120
parsed_datetime.astimezone(rule.target_timezone).isoformat().replace("+00:00", "Z")
115121
)
116-
except TimeParserException as error:
122+
except (TimeParserException, TypeError) as error:
117123
raise ProcessingWarning(
118124
"Could not parse timestamp",
119125
rule,
@@ -143,16 +149,19 @@ def _get_detection_result(self, event: dict, rule: PreDetectorRule):
143149

144150
@staticmethod
145151
def _generate_detection_result(
146-
pre_detection_id: str, event: dict, rule: PreDetectorRule
152+
pre_detection_id: FieldValue, event: dict, rule: PreDetectorRule
147153
) -> dict:
148-
detection_result = rule.detection_data
149-
detection_result.update(
150-
{
151-
"rule_filter": rule.filter_str,
152-
"description": rule.description,
153-
"pre_detection_id": pre_detection_id,
154-
}
154+
detection_result = {
155+
**rule.detection_data,
156+
"rule_filter": rule.filter_str,
157+
"description": rule.description,
158+
"pre_detection_id": pre_detection_id,
159+
}
160+
copy_fields_to_event(
161+
target_event=detection_result,
162+
source_event=event,
163+
dotted_field_names=rule.copy_fields_to_detection_event,
164+
rule=rule,
165+
skip_missing=True,
155166
)
156-
if host_name := get_dotted_field_value(event, "host.name"):
157-
detection_result.update({"host": {"name": host_name}})
158167
return detection_result

0 commit comments

Comments
 (0)