Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from data_designer.engine.resources.agent_rollout.utils import (
build_message,
coerce_optional_str,
min_max_timestamps,
require_string,
stringify_json_value,
)
Expand Down Expand Up @@ -157,6 +158,7 @@ def parse_file(
project_path = coerce_optional_str(agent_extra.get("project_path")) or cwd
git_branch = coerce_optional_str(agent_extra.get("git_branch"))

started_at, ended_at = min_max_timestamps(timestamps)
return [
NormalizedAgentRolloutRecord(
trace_id=session_id,
Expand All @@ -168,8 +170,8 @@ def parse_file(
cwd=cwd,
project_path=project_path,
git_branch=git_branch,
started_at=min(timestamps) if timestamps else None,
ended_at=max(timestamps) if timestamps else None,
started_at=started_at,
ended_at=ended_at,
messages=messages,
source_meta=source_meta,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
build_message,
coerce_optional_str,
load_jsonl_rows,
min_max_timestamps,
require_string,
stringify_json_value,
stringify_text_value,
Expand Down Expand Up @@ -86,6 +87,7 @@ def parse_file(
elif record_type == "user":
messages.extend(normalize_claude_user_messages(raw_record))

started_at, ended_at = min_max_timestamps(timestamps)
session_key = session_id or file_path.stem
index_entry = session_index.get(session_key, {})
project_path = coerce_optional_str(index_entry.get("projectPath")) or cwd
Expand All @@ -112,8 +114,8 @@ def parse_file(
cwd=cwd,
project_path=project_path,
git_branch=git_branch,
started_at=min(timestamps) if timestamps else None,
ended_at=max(timestamps) if timestamps else None,
started_at=started_at,
ended_at=ended_at,
messages=messages,
source_meta=source_meta,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
build_message,
coerce_optional_str,
load_jsonl_rows,
min_max_timestamps,
require_string,
stringify_json_value,
stringify_text_value,
Expand Down Expand Up @@ -143,6 +144,7 @@ def parse_file(
if pending_reasoning:
source_meta["unattached_reasoning"] = list(pending_reasoning)

earliest, latest = min_max_timestamps(timestamps)
return [
NormalizedAgentRolloutRecord(
trace_id=session_id,
Expand All @@ -154,9 +156,8 @@ def parse_file(
cwd=coerce_optional_str(session_meta.get("cwd")),
project_path=coerce_optional_str(session_meta.get("cwd")),
git_branch=coerce_optional_str(session_meta.get("git_branch")),
started_at=coerce_optional_str(session_meta.get("timestamp"))
or (min(timestamps) if timestamps else None),
ended_at=max(timestamps) if timestamps else None,
started_at=coerce_optional_str(session_meta.get("timestamp")) or earliest,
ended_at=latest,
messages=messages,
source_meta=source_meta,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import json
from collections.abc import Iterator
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal

Expand Down Expand Up @@ -110,3 +111,39 @@ def stringify_text_value(value: Any) -> str:
if isinstance(value, str):
return value
return str(value)


def min_max_timestamps(timestamps: list[str]) -> tuple[str | None, str | None]:
"""Return the chronologically earliest and latest timestamps.

Values are parsed as ISO 8601 before comparison so that mixed UTC offsets
and precisions order correctly (e.g. ``2025-01-01T00:30:00+01:00`` is
earlier than ``2025-01-01T00:00:00Z``). Naive timestamps are treated as
UTC. Unparseable values are skipped. The winning entries are returned in
their original string form.
"""
parsed: list[tuple[datetime, str]] = []
for original in timestamps:
instant = parse_iso8601(original)
if instant is not None:
parsed.append((instant, original))
if not parsed:
return None, None
earliest = min(parsed, key=lambda pair: pair[0])[1]
latest = max(parsed, key=lambda pair: pair[0])[1]
return earliest, latest


def parse_iso8601(value: str) -> datetime | None:
"""Parse an ISO 8601 timestamp, treating naive values as UTC.

Returns ``None`` for strings that cannot be parsed so callers can silently
skip malformed entries.
"""
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
if parsed.tzinfo is None:
return parsed.replace(tzinfo=timezone.utc)
return parsed
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import pytest

from data_designer.engine.resources.agent_rollout.utils import min_max_timestamps


@pytest.mark.parametrize(
("timestamps", "expected"),
[
pytest.param([], (None, None), id="empty"),
pytest.param(
["2025-01-01T00:30:00+01:00", "2025-01-01T00:00:00Z"],
("2025-01-01T00:30:00+01:00", "2025-01-01T00:00:00Z"),
id="mixed-offset-lex-disagrees-with-chrono",
),
pytest.param(
["2025-01-01T00:00:00.500Z", "2025-01-01T00:00:00Z"],
("2025-01-01T00:00:00Z", "2025-01-01T00:00:00.500Z"),
id="mixed-precision",
),
pytest.param(
["2025-01-01T00:00:00", "2025-01-02T00:00:00Z"],
("2025-01-01T00:00:00", "2025-01-02T00:00:00Z"),
id="naive-treated-as-utc-and-compared-against-aware",
),
pytest.param(
["not-a-timestamp", "2025-01-01T00:00:00Z"],
("2025-01-01T00:00:00Z", "2025-01-01T00:00:00Z"),
id="unparseable-values-skipped",
),
pytest.param(["not-a-timestamp"], (None, None), id="only-unparseable"),
],
)
def test_min_max_timestamps(timestamps: list[str], expected: tuple[str | None, str | None]) -> None:
assert min_max_timestamps(timestamps) == expected
Loading