Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,32 +1,90 @@
#!/usr/bin/env python3
"""Runs environment setup, build, benchmark prep, and experiment runs checks for EGWALKER."""

from __future__ import annotations

import os
import sys
from pathlib import Path
from typing import Dict

# from oracle_artifact_build import OracleArtifactBuild
_AGENT_EVAL_DIR = Path(__file__).resolve().parent
_AGENT_SRC_DIR = _AGENT_EVAL_DIR.parents[3] / "src"
sys.path.append(str(_AGENT_SRC_DIR))

from oracle_artifact_build import OracleArtifactBuild
from oracle_benchmark_prep import OracleBenchmarkPrep
from oracle_env_setup import OracleEnvSetup
# from oracle_benchmark_prep import OracleBenchmarkPrep
# from oracle_experiment_runs import OracleExperimentRuns
from oracle_experiment_runs import OracleExperimentRuns
from evaluator.utils import EntryConfig, LoggerConfig, get_logger, record_result

from utils import logger

def main():
results: Dict[str, int] = {}
EGWALKER_CONFIG = EntryConfig(
name="eurosys25-egwalker",
home_dir=Path.home() / "eurosys25_egwalker",
repository_paths={
"eurosys25-egwalker": Path.home() / "eurosys25_egwalker" / "egwalker",
},
results_paths={
# Matches legacy: <repo>/results/timings.json
"timings": Path.home()
/ "eurosys25_egwalker"
/ "egwalker"
/ "results"
/ "timings.json",
},
ground_truth_paths={
"datasets": (
Path.home()
/ "eurosys25_egwalker"
/ "_agent_eval"
/ "refs"
/ "datasets.ref.json"
),
"timings": (
Path.home()
/ "eurosys25_egwalker"
/ "_agent_eval"
/ "refs"
/ "timings.ref.json"
),
},
similarity_ratio=0.75,
)


def main(argv: list[str]) -> int:
results: Dict[str, int] = {}
score = 0
for cls in (OracleEnvSetup, OracleArtifactBuild, OracleBenchmarkPrep, OracleExperimentRuns):
checker = cls()
ok = checker.run()
name = cls.__name__
logger.info(f"{name}: {'PASS' if ok else 'FAIL'}")
if ok:
results[name] = 1
score += 1
else:
results[name] = 0

logger.info(f"Agent scores: {results}")

verbose = "--verbose" in argv

logger_name = os.environ.get("EVAL_LOGGER_NAME", "EGWALKER-EVAL")
logger = get_logger(LoggerConfig(root_name=logger_name))

env_checker = OracleEnvSetup(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(env_checker).__name__, env_checker.run(verbose=verbose)
)

build_checker = OracleArtifactBuild(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(build_checker).__name__, build_checker.run(verbose=verbose)
)

prep_checker = OracleBenchmarkPrep(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(prep_checker).__name__, prep_checker.run(verbose=verbose)
)

runs_checker = OracleExperimentRuns(config=EGWALKER_CONFIG, logger=logger)
score += record_result(
logger, results, type(runs_checker).__name__, runs_checker.run(verbose=verbose)
)

logger.info("Agent scores: %s", results)
return score


if __name__ == "__main__":
main()
raise SystemExit(main(sys.argv[1:]))
Original file line number Diff line number Diff line change
@@ -1,93 +1,114 @@
#!/usr/bin/env python3
import os
import subprocess
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple
"""Artifact build oracle for the Eurosys'25 EGWALKER artifact.

Validates:
- Required repository working directories exist.
- Build commands execute successfully (captures stdout/stderr/return code).
"""

from __future__ import annotations

from collections.abc import Mapping, Sequence
from dataclasses import dataclass, field
import logging
from pathlib import Path

from utils import REPO_DIR
from utils import logger
from evaluator.oracle_artifact_build_primitives import (
BuildCommandRequirement,
BuildRequirement,
OracleArtifactBuildBase,
)
from evaluator.utils import EntryConfig


@dataclass(frozen=True)
@dataclass(frozen = True, slots = True, kw_only = True)
class BuildTarget:
"""Declarative description of one build command to run."""

name: str
repo_key: str
cmd: List[str]


BUILD_TARGETS: List[BuildTarget] = [
BuildTarget(
name="artifact-core",
repo_key="artifact-core",
cmd=[
"make",
"-j8",
"tools/diamond-types/target/release/dt",
"tools/crdt-converter/target/release/crdt-converter",
"tools/diamond-types/target/release/paper-stats",
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
"tools/paper-benchmarks/target/release/paper-benchmarks",
"tools/ot-bench/target/memusage/ot-bench",
"tools/ot-bench/target/release/ot-bench"
],
),
]


class OracleArtifactBuild:

def __init__(self) -> None:
self.repo_dir = REPO_DIR

def run_shell_command(
command: Sequence[str]
cwd_relative: Path | None = None
optional: bool = False
timeout_seconds: float = 60.0
env_overrides: Mapping[str, str] = field(default_factory = dict)

def __post_init__(self) -> None:
if not self.name:
raise ValueError("BuildTarget.name must be non-empty")
if not self.command:
raise ValueError(f"{self.name}: command must be non-empty")
if self.timeout_seconds <= 0:
raise ValueError(f"{self.name}: timeout_seconds must be > 0")

# Normalize for downstream requirements.
if self.cwd_relative is not None and not isinstance(self.cwd_relative, Path):
object.__setattr__(self, "cwd_relative", Path(self.cwd_relative))

# Freeze command to avoid accidental mutation.
object.__setattr__(self, "command", tuple(self.command))


class OracleArtifactBuild(OracleArtifactBuildBase):
"""The artifact build oracle for artifact-core.

Defaults:
* Runs build commands in the repo keyed by config.name.
* EntryConfig.repository_paths must contain an entry for config.name.
"""

_DEFAULT_TARGET_SPECS: tuple[tuple[str, tuple[str, ...], float], ...] = (
(
"artifact-core: make tools",
(
"make",
"-j8",
"tools/diamond-types/target/release/dt",
"tools/crdt-converter/target/release/crdt-converter",
"tools/diamond-types/target/release/paper-stats",
"tools/paper-benchmarks/target/memusage/paper-benchmarks",
"tools/paper-benchmarks/target/release/paper-benchmarks",
"tools/ot-bench/target/memusage/ot-bench",
"tools/ot-bench/target/release/ot-bench",
),
60.0,
),
)

def __init__(
self,
cmd: Iterable[str],
cwd: Optional[Path] = None,
) -> Tuple[int, str, str]:
"""
Run a command and return (rc, stdout, stderr) tuple.
"""
try:
cp = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
cwd=str(cwd) if cwd is not None else None,
*,
config: EntryConfig,
logger: logging.Logger,
targets: Sequence[BuildTarget] | None = None,
) -> None:
super().__init__(logger = logger)
self._config = config

if targets is None:
targets = self._make_default_targets()
self._targets = tuple(targets)

names = [t.name for t in self._targets]
if len(names) != len(set(names)):
raise ValueError(f"Duplicate build target names: {names!r}")

def _make_default_targets(self) -> tuple[BuildTarget, ...]:
"""Creates default targets (stored in the EntryConfig object)."""
return tuple(
BuildTarget(name = name, command = command, timeout_seconds = timeout_seconds)
for (name, command, timeout_seconds) in self._DEFAULT_TARGET_SPECS
)

def requirements(self) -> Sequence[BuildRequirement]:
"""Returns an ordered list of build requirements to validate."""
return tuple(
BuildCommandRequirement(
name = target.name,
optional = target.optional,
cwd = self._config.repository_paths[self._config.name],
command = target.command,
cwd_relative = target.cwd_relative,
timeout_seconds = target.timeout_seconds,
env_overrides = target.env_overrides,
)
return cp.returncode, cp.stdout or "", cp.stderr or ""
except FileNotFoundError:
return 127, "", ""

def build_target(self, target: BuildTarget) -> Optional[str]:
"""
Build a single target using its configured repository and command.
"""
repo_path = Path(os.path.expanduser(self.repo_dir))
if not repo_path.exists():
return f"{target.name} repo directory missing"

rc, out, err = self.run_shell_command(target.cmd, cwd=repo_path)
if rc != 0:
return f"{target.name} build failed (error code: {rc}; error message: {err})"

return None

def build_check(self):
"""
Run builds for all configured targets and collect failures.
"""
problems: List[str] = []
for target in BUILD_TARGETS:
msg = self.build_target(target)
if msg:
problems.append(msg)
if problems:
return False, "; ".join(problems)
return True, ""

def run(self):
ok, why = self.build_check()
logger.info(f"Build: {'PASS' if ok else 'FAIL' + (' - ' + why if why else '')}")
return ok
for target in self._targets
)
Loading
Loading