Skip to content

feat(cli): add evaluate subcommand for automata ground-truth evaluation #934

feat(cli): add evaluate subcommand for automata ground-truth evaluation

feat(cli): add evaluate subcommand for automata ground-truth evaluation #934

name: Performance Benchmarking
on:
workflow_dispatch:
inputs:
iterations:
description: 'Number of benchmark iterations'
required: false
default: '1000'
type: string
baseline_ref:
description: 'Git reference for baseline comparison (branch/tag/commit)'
required: false
default: 'main'
type: string
pull_request:
paths:
- 'crates/terraphim_*/src/**'
- 'terraphim_server/src/**'
- 'scripts/run-performance-benchmarks.sh'
- '.github/workflows/performance-benchmarking.yml'
push:
branches: [main, develop]
paths:
- 'crates/terraphim_*/src/**'
- 'terraphim_server/src/**'
- 'scripts/run-performance-benchmarks.sh'
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
permissions:
contents: read
issues: write
jobs:
performance-benchmarks:
name: Performance Benchmarks
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch full history for baseline comparison
- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
- name: Cache Rust dependencies
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y curl jq bc wrk
- name: Download baseline (if comparing)
if: github.event.inputs.baseline_ref || github.event_name == 'pull_request'
run: |
if [ "${{ github.event.inputs.baseline_ref }}" != "" ]; then
BASELINE_REF="${{ github.event.inputs.baseline_ref }}"
elif [ "${{ github.event_name }}" == "pull_request" ]; then
BASELINE_REF="${{ github.event.pull_request.base.ref }}"
else
BASELINE_REF="main"
fi
echo "Downloading baseline from ref: $BASELINE_REF"
# Download baseline results from previous run
# This assumes you have baseline results stored as artifacts or in a separate repo
# For now, create an empty but schema-valid baseline if none exists
mkdir -p benchmark-results
cat <<'EOF' > benchmark-results/baseline.json
{
"timestamp": "2024-01-01T00:00:00Z",
"config": {
"iterations": 1000,
"warmup_iterations": 100,
"concurrent_users": [1, 5, 10, 25, 50],
"data_scales": [1000, 10000, 100000, 1000000],
"slos": {
"max_startup_time_ms": 5000,
"max_api_response_time_ms": 500,
"max_search_time_ms": 1000,
"max_indexing_time_per_doc_ms": 50,
"max_memory_mb": 1024,
"max_cpu_idle_percent": 5.0,
"max_cpu_load_percent": 80.0,
"min_rps": 10.0,
"max_concurrent_users": 100,
"max_data_scale": 1000000
},
"monitoring_interval_ms": 1000,
"enable_profiling": false
},
"results": {},
"slo_compliance": {
"overall_compliance": 100.0,
"violations": [],
"critical_violations": []
},
"system_info": {
"os": "unknown",
"os_version": "unknown",
"cpu_model": "unknown",
"cpu_cores": 0,
"total_memory_mb": 0,
"available_memory_mb": 0,
"rust_version": "unknown",
"terraphim_version": "unknown"
},
"trends": null
}
EOF
- name: Start Terraphim server
run: |
# Build and start the server in background
cargo build --release --package terraphim_server
./target/release/terraphim_server &
SERVER_PID=$!
# Wait for server to start
for i in {1..30}; do
if curl -s http://localhost:3000/health > /dev/null; then
echo "Server started successfully"
break
fi
sleep 2
done
# Store PID for cleanup
echo $SERVER_PID > server.pid
- name: Run performance benchmarks
run: |
# Set environment variables
export TERRAPHIM_BENCH_ITERATIONS="${{ github.event.inputs.iterations || '1000' }}"
export TERRAPHIM_SERVER_URL="http://localhost:3000"
# Make script executable
chmod +x scripts/run-performance-benchmarks.sh
# Run benchmarks
./scripts/run-performance-benchmarks.sh --verbose
- name: Stop Terraphim server
if: always()
run: |
if [ -f server.pid ]; then
kill $(cat server.pid) || true
rm server.pid
fi
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always()
with:
name: benchmark-results-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
- name: Generate performance report
if: always()
run: |
# Create a summary for GitHub Actions
REPORT_FILE=$(find benchmark-results -name "benchmark_report.md" | head -1)
if [ -n "$REPORT_FILE" ] && [ -f "$REPORT_FILE" ]; then
echo "## Performance Benchmark Report" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
cat "$REPORT_FILE" >> $GITHUB_STEP_SUMMARY
fi
- name: Check performance gates
run: |
# Check if benchmark results meet performance requirements
RESULTS_FILE=$(find benchmark-results -name "benchmark_results.json" | head -1)
if [ -n "$RESULTS_FILE" ] && [ -f "$RESULTS_FILE" ]; then
# Extract SLO compliance percentage
SLO_COMPLIANCE=$(jq -r '.slo_compliance.overall_compliance // 0' "$RESULTS_FILE")
echo "SLO Compliance: ${SLO_COMPLIANCE}%"
# Set output for other jobs
echo "slo-compliance=${SLO_COMPLIANCE}" >> $GITHUB_OUTPUT
# Check critical violations
CRITICAL_VIOLATIONS=$(jq -r '.slo_compliance.critical_violations | length' "$RESULTS_FILE")
if [ "$CRITICAL_VIOLATIONS" -gt 0 ]; then
echo "❌ Critical performance violations detected!"
jq -r '.slo_compliance.critical_violations[] | "🚨 \(.metric): \(.actual_value) (threshold: \(.threshold_value))"' "$RESULTS_FILE"
echo "performance-gates-passed=false" >> $GITHUB_OUTPUT
exit 1
else
echo "✅ All performance gates passed"
echo "performance-gates-passed=true" >> $GITHUB_OUTPUT
fi
else
echo "No benchmark results found"
echo "slo-compliance=0" >> $GITHUB_OUTPUT
echo "performance-gates-passed=false" >> $GITHUB_OUTPUT
exit 1
fi
- name: Comment on PR (if applicable)
if: github.event_name == 'pull_request' && always()
continue-on-error: true
uses: actions/github-script@v9
with:
script: |
const fs = require('fs');
const { execSync } = require('child_process');
// Find the benchmark report using shell (glob npm module not available in runner)
let reportPath;
try {
reportPath = execSync('find benchmark-results -name "benchmark_report.md" | head -1', { encoding: 'utf8' }).trim();
} catch (e) {
reportPath = null;
}
if (reportPath && fs.existsSync(reportPath)) {
const report = fs.readFileSync(reportPath, 'utf8');
// Extract key metrics for comment
const sloMatch = report.match(/SLO Compliance: (\d+\.?\d*)%/);
const sloCompliance = sloMatch ? sloMatch[1] : 'N/A';
const comment = [
"## Performance Benchmark Results",
"",
`**SLO Compliance:** ${sloCompliance}%`,
"",
"### Key Findings:",
report.includes('violations')
? 'Some performance thresholds were not met'
: 'All performance requirements satisfied',
"",
`[View full report](https://github.com/terraphim/terraphim-ai/actions/runs/${process.env.GITHUB_RUN_ID})`,
].join("\n");
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
}
performance-regression-check:
name: Performance Regression Check
runs-on: ubuntu-latest
needs: performance-benchmarks
if: always() && needs.performance-benchmarks.result == 'success'
steps:
- name: Check for regressions
run: |
# Compare current results with baseline
# This is a simplified check - in practice you'd want more sophisticated analysis
if [ "${{ needs.performance-benchmarks.outputs.performance-gates-passed }}" == "false" ]; then
echo "Performance regression detected!"
exit 1
else
echo "No performance regressions detected"
fi
update-baseline:
name: Update Performance Baseline
runs-on: ubuntu-latest
needs: [performance-benchmarks, performance-regression-check]
if: github.ref == 'refs/heads/main' && needs.performance-regression-check.result == 'success'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download benchmark results
uses: actions/download-artifact@v4
with:
name: benchmark-results-${{ github.run_id }}
- name: Update baseline
run: |
# Copy latest results as new baseline
RESULTS_FILE=$(find benchmark-results -name "benchmark_results.json" | head -1)
if [ -n "$RESULTS_FILE" ] && [ -f "$RESULTS_FILE" ]; then
cp "$RESULTS_FILE" "benchmark-results/baseline.json"
echo "Updated performance baseline"
fi
- name: Build headline snapshot for terraphim.ai
env:
GITHUB_SHA: ${{ github.sha }}
WORKFLOW_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
# Emits benchmark-results/snapshot.json consumed by terraphim.ai's
# content/data/benchmarks.json (see Gitea zestic-ai/terraphim-ai #574).
# Script is idempotent; falls back to static values when Criterion
# parameter labels do not match.
bash scripts/update-benchmark-snapshot.sh
- name: Commit baseline + snapshot update
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git add benchmark-results/baseline.json benchmark-results/snapshot.json
git commit -m "chore: update performance baseline and snapshot from run ${{ github.run_id }}" || echo "No changes to commit"
git push origin main