From c5734f2e58dbb9e37f7ab26490e01d87bbf644dc Mon Sep 17 00:00:00 2001 From: deepssin Date: Fri, 8 May 2026 13:57:23 +0000 Subject: [PATCH] Add auto-ptl-batch pipeline for grouped PR testing. Adding auto-ptl-batch job config, Jenkins pipeline flow, PR grouping/conflict logic, and overall CI status checks before pushing batch branches and triggering teuthology via teuthology-runner. Signed-off-by: deepssin --- auto-ptl-batch/build/Jenkinsfile | 372 ++++++++++++++++ .../config/definitions/auto-ptl-batch.yml | 157 +++++++ auto-ptl-batch/scripts/check_pr_ci_status.py | 146 +++++++ auto-ptl-batch/scripts/github_api.py | 104 +++++ auto-ptl-batch/scripts/group_prs.py | 413 ++++++++++++++++++ 5 files changed, 1192 insertions(+) create mode 100644 auto-ptl-batch/build/Jenkinsfile create mode 100644 auto-ptl-batch/config/definitions/auto-ptl-batch.yml create mode 100755 auto-ptl-batch/scripts/check_pr_ci_status.py create mode 100644 auto-ptl-batch/scripts/github_api.py create mode 100644 auto-ptl-batch/scripts/group_prs.py diff --git a/auto-ptl-batch/build/Jenkinsfile b/auto-ptl-batch/build/Jenkinsfile new file mode 100644 index 000000000..10b56333f --- /dev/null +++ b/auto-ptl-batch/build/Jenkinsfile @@ -0,0 +1,372 @@ +/** + * auto-ptl-batch: automated PR batch testing for ceph/ceph. + * + * Flow: + * 1. group_prs.py discovers all open PRs carrying needs-QA + a component label + * present in COMPONENT_SUITE_MAP, with no blocking labels + * (needs-rebase, ready-to-merge, etc.), groups them + * by (component, base_branch), detects file-level conflicts, and splits + * into sub-batches of up to MAX_PRS_PER_BATCH. + * 2. For each sub-batch: + * a. CI green check (overall commit status on PR HEAD via check_pr_ci_status.py) + * b. Local merge via ptl-tool.py (--branch HEAD; local merge only) + * c. Create branch from merged HEAD + push to ceph-ci + * (SKIP_STATUS_POST=true skips the commit-status POST while still pushing) + * d. Trigger teuthology-runner with the component suite + * 3. ceph-trigger-build fires ceph-dev-pipeline (slim) which on + * success auto-schedules teuthology-runner with the teuthology + * suite derived from COMPONENT_SUITE_MAP. + * + * ptl-tool.py is called with positional PR numbers . + * PTL_TOOL_BASE_REMOTE=origin tells ptl-tool to use the 'origin' remote + * (ceph/ceph) instead of its default 'upstream'. + */ + +// Component label text may include '/' or other characters unsafe in git refs and image tags. +String sanitizeBatchBranch(String component) { + def s = (component ?: 'unknown').trim() + s = s.replaceAll('[^a-zA-Z0-9._-]+', '-') + s = s.replaceAll('^-+|-+$', '') + return s ?: 'unknown' +} + +String requireGitRef(String ref, String label) { + def r = (ref ?: '').trim() + if (!r || !(r ==~ '^[a-zA-Z0-9/._-]+$')) { + error("${label} contains unsupported characters: ${ref}") + } + return r +} + +String requireTrailerValue(String val, String label) { + def v = (val ?: '').trim() + if (!v || !(v ==~ /^[a-zA-Z0-9._ -]+$/)) { + error("${label} rejected (unsupported characters): ${val}") + } + return v +} + +String requireSuiteName(String suite) { + def s = (suite ?: '').trim() + if (!s || !(s ==~ /^[a-zA-Z0-9._-]+$/)) { + error("suite name rejected (unsupported characters): ${suite}") + } + return s +} + +String requirePrArgs(List prs) { + def args = [] + for (pr in prs) { + def s = pr.toString().trim() + if (!(s ==~ /^\d+$/)) { + error("Invalid PR number: ${s}") + } + args << s + } + return args.join(' ') +} + +/** Inject github-readonly-token like JJB credentials-binding (env only, never argv). */ +void withGithubApi(Closure body) { + withCredentials([ + usernamePassword( + credentialsId: 'github-readonly-token', + usernameVariable: 'GITHUB_USER', + passwordVariable: 'GITHUB_PASS', + ), + ]) { + body() + } +} + +pipeline { + agent any + options { + timestamps() + timeout(time: 4, unit: 'HOURS') + buildDiscarder(logRotator(numToKeepStr: '50')) + } + + stages { + + stage('Setup') { + steps { + script { + dir('ceph-src') { + checkout scmGit( + branches: [[name: 'main']], + userRemoteConfigs: [[ + url: 'git@github.com:ceph/ceph.git', + credentialsId: 'jenkins-build', + ]], + extensions: [ + [$class: 'CleanBeforeCheckout'], + [ + $class: 'CloneOption', + shallow: true, + depth: 50, + noTags: true, + timeout: 90, + ], + ], + ) + sh 'git remote add ci git@github.com:ceph/ceph-ci.git' + } + } + sh ''' + python3 -m venv ptl-venv + ptl-venv/bin/pip install -q --upgrade pip + ptl-venv/bin/pip install -q GitPython requests python-redmine + ''' + } + } + + stage('Discover & Group') { + environment { + REQUIRED_LABELS = "${params.REQUIRED_LABELS ?: 'needs-qa'}" + EXCLUDE_LABELS = "${params.EXCLUDE_LABELS ?: 'needs-rebase,ready-to-merge,passed-qa'}" + COMPONENT_SUITE_MAP = "${params.COMPONENT_SUITE_MAP ?: ''}" + UPDATED_WITHIN_DAYS = "${params.UPDATED_WITHIN_DAYS ?: '90'}" + CONFLICT_PATH_DEPTH = "${params.CONFLICT_PATH_DEPTH ?: '3'}" + MAX_PRS_PER_BATCH = "${params.MAX_PRS_PER_BATCH ?: '5'}" + BASE_BRANCH_FILTER = "${params.CEPH_BASE_BRANCH ?: ''}" + } + steps { + withGithubApi { + script { + sh(script: 'python3 auto-ptl-batch/scripts/group_prs.py > batches.json') + + def batchesRaw = readJSON file: 'batches.json' + // Convert from JSONArray to plain ArrayList for sandbox compatibility. + def batches = batchesRaw ? new ArrayList(batchesRaw as List) : [] + if (!batches) { + echo 'No eligible PRs found; nothing to do.' + currentBuild.description = 'no eligible PRs' + return + } + echo "Planned sub-batches (${batches.size()}):" + batches.each { b -> + def msg = " component=${b.component} branch=${b.branch}" + + " suite=${b.suite} batch=${b.batch} prs=${b.prs}" + if (b.split_reason) { msg += "\n split: ${b.split_reason}" } + echo msg + } + } + } + } + } + + stage('Check CI, Merge & Push') { + steps { + withGithubApi { + script { + def batchesRaw = readJSON file: 'batches.json' + def batches = batchesRaw ? new ArrayList(batchesRaw as List) : [] + if (!batches) { + echo 'No batches to process.' + return + } + + def pushed = [] + def skipped = [] + def maxPushes = (params.MAX_PUSHES ?: '0').toInteger() + def buildDistros = requireTrailerValue( + (params.BUILD_DISTROS ?: 'jammy centos9 rocky10').trim(), + 'BUILD_DISTROS', + ) + def buildArchs = requireTrailerValue( + (params.BUILD_ARCHS ?: 'x86_64').trim(), + 'BUILD_ARCHS', + ) + def buildFlavors = requireTrailerValue( + (params.BUILD_FLAVORS ?: 'default').trim(), + 'BUILD_FLAVORS', + ) + def buildCiContainer = params.BUILD_CI_CONTAINER ? 'true' : 'false' + def runnerDelaySeconds = (params.TEUTHOLOGY_TRIGGER_DELAY_SECONDS ?: '5400').toInteger() + if (runnerDelaySeconds < 0) { + error("TEUTHOLOGY_TRIGGER_DELAY_SECONDS must be >= 0 (got ${runnerDelaySeconds})") + } + + for (batch in batches) { + def component = batch.component as String + def branch = requireGitRef(batch.branch as String, 'base branch') + def suite = requireSuiteName(batch.suite as String) + def batchNum = batch.batch as int + // readJSON returns JSONArray. convert to plain ArrayList so + // sandbox-whitelisted methods like .join() and .each() work. + def prs = new ArrayList(batch.prs as List) + def sanitizedComponent = sanitizeBatchBranch(component) + def branchName = requireGitRef( + "wip-${sanitizedComponent}-${branch}-auto-batch${batchNum}", + 'batch branch', + ) + def prArgs = requirePrArgs(prs) + + echo "=== ${branchName}: prs=${prs} suite=${suite} ===" + if (batch.split_reason) { + echo " (split reason: ${batch.split_reason})" + } + + def ciOk = sh( + script: "python3 auto-ptl-batch/scripts/check_pr_ci_status.py ${prArgs}", + returnStatus: true, + ) + if (ciOk == 3) { + error('GitHub API error during CI check (missing token or rate limit)') + } + if (ciOk != 0) { + echo "CI not green for ${branchName}; skipping." + skipped << branchName + continue + } + + // Local merge via ptl-tool.py (no push) + // --branch HEAD leaves HEAD detached with the merged + // commits; push is handled separately below. + def mergeOk = sh( + script: """ + cd ceph-src + PTL_TOOL_BASE_REMOTE=origin \\ + PTL_TOOL_BASE_PATH=refs/remotes/origin/ \\ + PTL_TOOL_GITHUB_TOKEN=\$GITHUB_PASS \\ + PTL_TOOL_GITHUB_USER=\$GITHUB_USER \\ + ../ptl-venv/bin/python3 src/script/ptl-tool.py \\ + ${prArgs} \\ + --base ${branch} \\ + --branch HEAD \\ + --merge-branch-name ${branchName} + """, + returnStatus: true, + ) + if (mergeOk != 0) { + echo "Merge conflict in ${branchName}; skipping." + skipped << branchName + sh "cd ceph-src && git checkout -f origin/${branch} 2>/dev/null || true" + continue + } + + // Inject git trailers consumed by ceph-trigger-build so the + // batch branch builds only the platforms needed by this flow. + def trailerOk = sh( + script: """ + cd ceph-src + msg_file=\$(mktemp) + git log -1 --pretty=%B > "\${msg_file}" + git interpret-trailers --in-place --if-exists replace --if-missing add \\ + --trailer "CEPH-BUILD-JOB=ceph-dev-pipeline" \\ + --trailer "DISTROS=${buildDistros}" \\ + --trailer "ARCHS=${buildArchs}" \\ + --trailer "FLAVORS=${buildFlavors}" \\ + --trailer "CI-CONTAINER=${buildCiContainer}" \\ + "\${msg_file}" + git commit --amend -F "\${msg_file}" + rm -f "\${msg_file}" + """, + returnStatus: true, + ) + if (trailerOk != 0) { + echo "Failed to set build trailers for ${branchName}; skipping." + skipped << "${branchName}(trailer-failed)" + sh "cd ceph-src && git checkout -f origin/${branch} 2>/dev/null || true" + continue + } + + if (params.DRY_RUN) { + echo "DRY_RUN=true; skipping push for ${branchName}." + skipped << "${branchName}(dry-run)" + sh "cd ceph-src && git checkout -f origin/${branch} 2>/dev/null || true" + continue + } + + if (maxPushes > 0 && pushed.size() >= maxPushes) { + echo "MAX_PUSHES=${maxPushes} reached; skipping ${branchName}." + skipped << "${branchName}(max-pushes)" + sh "cd ceph-src && git checkout -f origin/${branch} 2>/dev/null || true" + continue + } + + // Create branch on merged HEAD and push to ceph-ci + // ptl-tool.py left HEAD detached with the merged commits; + // we just anchor it to a branch name and push. + def pushOk = 1 + sshagent(['jenkins-build']) { + pushOk = sh( + script: """ + cd ceph-src + git checkout -B ${branchName} + git push -f ci ${branchName} + """, + returnStatus: true, + ) + } + if (pushOk == 0) { + echo "Pushed ${branchName} (suite=${suite})" + pushed << "${branchName}(suite=${suite})" + + // Mark each PR's HEAD SHA as 'pending' so the + // next daily run skips it until SHA changes or + // teuthology posts a final result. + // Skipped when SKIP_STATUS_POST=true (e.g. push-path testing). + if (!params.SKIP_STATUS_POST) { + def prShas = batch.pr_shas ? new HashMap(batch.pr_shas as Map) : [:] + def statusDesc = "Batched: ${branchName}" + prs.each { pr -> + def sha = prShas[pr.toString()] ?: '' + if (sha) { + withEnv([ + "STATUS_SHA=${sha}", + "STATUS_DESC=${statusDesc}", + "STATUS_URL=${env.BUILD_URL}", + ]) { + sh( + script: ''' + python3 auto-ptl-batch/scripts/check_pr_ci_status.py \ + --post-status "$STATUS_SHA" "$STATUS_DESC" "$STATUS_URL" + ''', + returnStatus: true, + ) + } + } + } + } else { + echo "SKIP_STATUS_POST=true; not posting commit status for ${branchName}." + } + + // Trigger teuthology asynchronously after a delay so package + // artifacts have time to appear on Shaman/Chacra. + echo "Triggering teuthology-runner for ${branchName} suite=${suite} (quietPeriod=${runnerDelaySeconds}s) ..." + build( + job: 'teuthology-runner', + wait: false, + quietPeriod: runnerDelaySeconds, + parameters: [ + string(name: 'CEPH_BUILD_BRANCH', value: params.CEPH_BUILD_BRANCH ?: 'main'), + string(name: 'CEPH_BRANCH', value: branchName), + string(name: 'CEPH_REPO', value: 'https://github.com/ceph/ceph-ci.git'), + string(name: 'SUITE_REPO', value: 'https://github.com/ceph/ceph.git'), + string(name: 'SUITE_LIST', value: suite), + booleanParam(name: 'SKIP_SHAMAN_WAIT', value: false), + ], + ) + } else { + echo "Push failed for ${branchName}." + skipped << branchName + } + + sh "cd ceph-src && git checkout -f origin/${branch} 2>/dev/null || true" + } + + echo "Pushed: ${pushed}" + echo "Skipped: ${skipped}" + currentBuild.description = ( + "pushed=${pushed.join(',') ?: 'none'} " + + "skipped=${skipped.join(',') ?: 'none'}" + ) + } + } + } + } + + } +} diff --git a/auto-ptl-batch/config/definitions/auto-ptl-batch.yml b/auto-ptl-batch/config/definitions/auto-ptl-batch.yml new file mode 100644 index 000000000..818044a0c --- /dev/null +++ b/auto-ptl-batch/config/definitions/auto-ptl-batch.yml @@ -0,0 +1,157 @@ +- job: + name: auto-ptl-batch + description: | + Automated PR batch testing for ceph/ceph. + + The job discovers all open PRs that carry every label in REQUIRED_LABELS + (default: needs-QA) plus a component label, have green CI, and + carry none of the labels in EXCLUDE_LABELS. PRs are grouped by + (component, base_branch), then split into sub-batches using file-path + conflict detection: two PRs that touch overlapping paths at + CONFLICT_PATH_DEPTH directory levels are placed into separate batches so + that merge conflicts and root-cause analysis stay isolated. + + For each resulting sub-batch: + 1. CI green check (overall commit status on each PR HEAD SHA) + 2. Local merge via ptl-tool.py (--branch HEAD; local merge only) + 3. Push to ceph-ci as wip---auto-batch + (component label is sanitized for the branch name: allowed chars only, + others become '-') + - commit trailers constrain ceph-dev-pipeline build scope + (DISTROS/ARCHS/FLAVORS/CI-CONTAINER are configurable job parameters) + - teuthology-runner is triggered asynchronously (delay configurable) + and schedules suite from + COMPONENT_SUITE_MAP + + Runs once daily via a cron trigger. Can also be triggered manually; + set CEPH_BASE_BRANCH to limit processing to a specific target branch, + or leave empty to process all branches. + project-type: pipeline + quiet-period: 2 + concurrent: false + pipeline-scm: + scm: + - git: + url: https://github.com/ceph/ceph-build + branches: + - ${{CEPH_BUILD_BRANCH}} + shallow-clone: true + submodule: + disable: true + wipe-workspace: true + script-path: auto-ptl-batch/build/Jenkinsfile + lightweight-checkout: true + do-not-fetch-tags: true + + triggers: + - timed: 'H 0 * * *' + + parameters: + - string: + name: CEPH_BUILD_BRANCH + description: "ceph-build branch to use for this job's Jenkinsfile" + default: "main" + - string: + name: CEPH_BASE_BRANCH + description: | + If set, only process PRs targeting this branch (e.g. main, tentacle). + Leave empty to process PRs across all branches. + default: "" + - string: + name: REQUIRED_LABELS + description: | + Comma-separated labels that every PR must carry to be eligible. + A component label is always required in addition to these. + default: "needs-qa" + - string: + name: EXCLUDE_LABELS + description: | + Comma-separated labels that exclude a PR, even if CI is green. + default: "needs-rebase,ready-to-merge,passed-qa" + - string: + name: COMPONENT_SUITE_MAP + description: | + JSON object mapping each component label value to the + teuthology suite to run for that component's batches. + Add or override entries here without changing the Jenkinsfile. + # Double braces so Jenkins Job Builder does not treat JSON as str.format(). + default: >- + {{"bluestore":"rados","build/ops":"smoke","cephfs":"fs", + "common":"smoke","core":"rados","crimson":"crimson", + "dashboard":"dashboard","mds":"fs","mgr":"mgr","mon":"rados", + "msgr":"rados","osd":"rados","pybind":"smoke","rados":"rados", + "rbd":"rbd","rgw":"rgw","tools":"smoke"}} + - string: + name: UPDATED_WITHIN_DAYS + description: | + Only consider PRs that have been updated within this many days. + Keeps the scan fast and ignores stale PRs unlikely to need QA. + default: "7" + - string: + name: CONFLICT_PATH_DEPTH + description: | + Directory depth used to detect file-path conflicts between PRs. + 3 -> src/rgw/multisite/ (recommended: catches sub-component conflicts) + 2 -> src/rgw/ (component-level only) + 0 -> exact file match (fewest splits) + default: "3" + - string: + name: MAX_PRS_PER_BATCH + description: "Hard cap on the number of PRs in a single sub-batch." + default: "5" + - string: + name: MAX_PUSHES + description: | + Maximum number of branches to push to ceph-ci in a single run. + Set to 1 to test only the first eligible batch. 0 = no limit. + default: "0" + - string: + name: BUILD_DISTROS + description: | + Value for DISTROS trailer on pushed batch branches (consumed by ceph-trigger-build). + default: "jammy centos9 rocky10" + - string: + name: BUILD_ARCHS + description: | + Value for ARCHS trailer on pushed batch branches. + default: "x86_64" + - string: + name: BUILD_FLAVORS + description: | + Value for FLAVORS trailer on pushed batch branches. + default: "default" + - bool: + name: BUILD_CI_CONTAINER + description: | + Value for CI-CONTAINER trailer on pushed batch branches. + default: false + - string: + name: TEUTHOLOGY_TRIGGER_DELAY_SECONDS + description: | + Jenkins quietPeriod for triggering teuthology-runner after push. + Use 0 for immediate trigger. + default: "5400" + - bool: + name: DRY_RUN + description: | + If true, run all discovery, CI checks, and merges + but skip the final push to ceph-ci. + default: false + - bool: + name: SKIP_STATUS_POST + description: | + If true, push branches to ceph-ci as normal but do NOT post the + "auto-ptl-batch=pending" commit status to any PR on GitHub. + Useful for testing the push path without touching existing PRs. + default: false + wrappers: + - inject-passwords: + global: true + mask-password-params: true + - ssh-agent-credentials: + user: 'jenkins-build' + - credentials-binding: + - username-password-separated: + credential-id: github-readonly-token + username: GITHUB_USER + password: GITHUB_PASS diff --git a/auto-ptl-batch/scripts/check_pr_ci_status.py b/auto-ptl-batch/scripts/check_pr_ci_status.py new file mode 100755 index 000000000..67d6a0774 --- /dev/null +++ b/auto-ptl-batch/scripts/check_pr_ci_status.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +"""check_pr_ci_status.py - Commit status helpers for auto-ptl-batch. + +Usage: + check_pr_ci_status.py [pr2 ...] + Verify each PR HEAD has overall green commit status. + check_pr_ci_status.py --post-status + Post auto-ptl-batch=pending on a commit SHA. + +Env: GITHUB_PASS GitHub token (repo scope on ceph/ceph) +Exit: check mode: 0 all green / 1 not green / 2 usage / 3 API or auth error + post mode: 0 posted / 1 API error / 2 usage or validation error +""" + +import re +import sys +import urllib.error + +import github_api + +_PR_NUM_RE = re.compile(r'^\d+$') +_SHA_RE = re.compile(r'^[0-9a-fA-F]{7,40}$') +BATCH_STATUS_CONTEXT = 'auto-ptl-batch' + + +def _latest_statuses_by_context(statuses): + latest = {} + for s in statuses: + ctx = s.get("context", "") + if ctx and ctx not in latest: + latest[ctx] = s + return latest + + +def check_pr(pr_num): + try: + pr = github_api.gh_get( + "https://api.github.com/repos/ceph/ceph/pulls/" + str(pr_num) + ) + sha = pr["head"]["sha"] + combined = github_api.gh_get( + "https://api.github.com/repos/ceph/ceph/commits/" + + sha + + "/status" + ) + except urllib.error.HTTPError as exc: + sys.stderr.write( + "GitHub API error checking PR #" + + str(pr_num) + + ": " + + str(exc) + + "\n", + ) + sys.exit(github_api.EXIT_API_ERROR) + + state = combined.get("state", "unknown") + print("PR #" + str(pr_num) + ": [overall-ci] = " + state, flush=True) + + # Helpful diagnostics when not green: show latest non-success contexts. + if state != "success": + latest = _latest_statuses_by_context(combined.get("statuses", [])) + for ctx in sorted(latest): + c_state = latest[ctx].get("state", "missing") + if c_state != "success": + print( + "PR #" + + str(pr_num) + + ": [" + + ctx + + "] = " + + c_state, + flush=True, + ) + return state == "success" + + +def post_batch_status(sha, description, target_url): + if not _SHA_RE.match(sha): + sys.stderr.write("Invalid commit SHA: " + sha + "\n") + sys.exit(2) + if not description: + sys.stderr.write("Description must not be empty\n") + sys.exit(2) + if not target_url.startswith("https://"): + sys.stderr.write("target_url must be an https URL\n") + sys.exit(2) + + body = { + "state": "pending", + "context": BATCH_STATUS_CONTEXT, + "description": description[:140], + "target_url": target_url, + } + url = "https://api.github.com/repos/ceph/ceph/statuses/" + sha + try: + github_api.gh_post(url, body) + except urllib.error.HTTPError as exc: + sys.stderr.write("GitHub API error: " + str(exc) + "\n") + sys.exit(1) + print( + "Posted " + BATCH_STATUS_CONTEXT + "=pending on " + sha[:12], + flush=True, + ) + + +def main(): + github_api.require_token() + + if len(sys.argv) < 2: + sys.stderr.write( + "Usage: " + + sys.argv[0] + + " [pr2 ...]\n " + + sys.argv[0] + + " --post-status \n" + ) + sys.exit(2) + + if sys.argv[1] == "--post-status": + if len(sys.argv) != 5: + sys.stderr.write( + "Usage: " + + sys.argv[0] + + " --post-status \n" + ) + sys.exit(2) + post_batch_status( + sys.argv[2].strip(), + sys.argv[3].strip(), + sys.argv[4].strip(), + ) + sys.exit(0) + + all_ok = True + for pr in sys.argv[1:]: + pr_num = pr.strip() + if not _PR_NUM_RE.match(pr_num): + sys.stderr.write("Invalid PR number: " + pr_num + "\n") + sys.exit(2) + if not check_pr(pr_num): + all_ok = False + sys.exit(0 if all_ok else 1) + + +if __name__ == "__main__": + main() diff --git a/auto-ptl-batch/scripts/github_api.py b/auto-ptl-batch/scripts/github_api.py new file mode 100644 index 000000000..c432193ad --- /dev/null +++ b/auto-ptl-batch/scripts/github_api.py @@ -0,0 +1,104 @@ +"""Shared GitHub REST helpers for auto-ptl-batch. + +Credentials come from the environment only (GITHUB_PASS, optionally GITHUB_USER). +Jenkins should inject them via credentials-binding or withCredentials — never +pass tokens on the command line. +""" + +import json +import os +import sys +import time +import urllib.error +import urllib.request + +EXIT_API_ERROR = 3 +_API_VERSION = '2022-11-28' +_RETRYABLE = frozenset({403, 429}) + + +def token(): + return os.environ.get('GITHUB_PASS', '').strip() + + +def require_token(): + if not token(): + sys.stderr.write( + 'GITHUB_PASS is not set. Bind credential github-readonly-token ' + '(JJB credentials-binding or pipeline withCredentials).\n', + ) + sys.exit(EXIT_API_ERROR) + + +def _headers(): + headers = { + 'Accept': 'application/vnd.github+json', + 'X-GitHub-Api-Version': _API_VERSION, + } + tok = token() + if tok: + headers['Authorization'] = 'Bearer ' + tok + return headers + + +def _retry_after_seconds(exc): + try: + return int(exc.headers.get('Retry-After', '0')) + except (TypeError, ValueError): + return 0 + + +def _request(method, url, body=None, timeout=20, retries=3): + require_token() + data = None + headers = _headers() + if body is not None: + headers = dict(headers) + headers['Content-Type'] = 'application/json' + data = json.dumps(body).encode('utf-8') + + last_exc = None + for attempt in range(retries): + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + raw = resp.read() + return json.loads(raw) if raw else {} + except urllib.error.HTTPError as exc: + last_exc = exc + if exc.code not in _RETRYABLE or attempt >= retries - 1: + raise + wait = _retry_after_seconds(exc) + if wait <= 0: + wait = min(60, 2 ** attempt) + time.sleep(wait) + + raise last_exc + + +def gh_get(url, timeout=20, retries=3): + return _request('GET', url, timeout=timeout, retries=retries) + + +def gh_post(url, body, timeout=20, retries=3): + return _request('POST', url, body=body, timeout=timeout, retries=retries) + + +def gh_paginate(base_url, timeout=20, retries=3): + """Fetch all pages from a GitHub list endpoint.""" + results = [] + page = 1 + while True: + sep = '&' if '?' in base_url else '?' + data = gh_get( + f'{base_url}{sep}per_page=100&page={page}', + timeout=timeout, + retries=retries, + ) + if not data: + break + results.extend(data) + if len(data) < 100: + break + page += 1 + return results diff --git a/auto-ptl-batch/scripts/group_prs.py b/auto-ptl-batch/scripts/group_prs.py new file mode 100644 index 000000000..37c5082cc --- /dev/null +++ b/auto-ptl-batch/scripts/group_prs.py @@ -0,0 +1,413 @@ +#!/usr/bin/env python3 +""" +group_prs.py - Discover, filter, and batch ceph/ceph PRs for auto-ptl-batch. + +Reads configuration from environment variables, writes a JSON array of batch +objects to stdout, and logs progress to stderr. + +Environment variables +--------------------- +GITHUB_PASS GitHub token (read:org + repo scope) +REQUIRED_LABELS Comma-separated; every PR must carry all of these + (default: needs-QA) +EXCLUDE_LABELS Comma-separated; any PR carrying any of these is skipped + (default: needs-rebase,ready-to-merge) +COMPONENT_SUITE_MAP JSON object: component label value -> teuthology suite name + (default: built-in map below) +CONFLICT_PATH_DEPTH Directory depth for file-path conflict detection (default: 3) + 3 -> src/rgw/multisite/ + 2 -> src/rgw/ + 0 -> exact file match only +MAX_PRS_PER_BATCH Hard cap on PRs per sub-batch (default: 5) +BASE_BRANCH_FILTER If non-empty, only process PRs targeting this branch +UPDATED_WITHIN_DAYS Only scan PRs updated within this many days (default: 90) + +Idempotency +----------- +Before queuing a PR the script checks the GitHub commit status for context +'auto-ptl-batch' on the PR's HEAD SHA. If the state is 'pending' (already +in ceph-ci) or 'success' (teuthology passed), the PR is skipped. A new push +to the PR branch changes the HEAD SHA and resets eligibility automatically. + +Output JSON schema +------------------ +[ + { + "component": "rgw", + "branch": "main", + "suite": "rgw", + "batch": 1, + "prs": [101, 102], + "pr_shas": {"101": "", "102": ""}, + "split_reason": "PR#101<->PR#105: src/rgw/multisite" // only when split + }, + ... +] +""" + +import datetime +import json +import os +import sys +import urllib.error +from collections import defaultdict + +import github_api + + +# Defaults + +DEFAULT_REQUIRED_LABELS = 'needs-qa' +DEFAULT_EXCLUDE_LABELS = 'needs-rebase,ready-to-merge,passed-qa' + +DEFAULT_COMPONENT_SUITE_MAP = { + 'bluestore': 'rados', + 'build/ops': 'smoke', + 'cephfs': 'fs', + 'common': 'smoke', + 'core': 'rados', + 'crimson': 'crimson', + 'dashboard': 'dashboard', + 'mds': 'fs', + 'mgr': 'mgr', + 'mon': 'rados', + 'msgr': 'rados', + 'osd': 'rados', + 'pybind': 'smoke', + 'rados': 'rados', + 'rbd': 'rbd', + 'rgw': 'rgw', + 'tools': 'smoke', +} + +DEFAULT_CONFLICT_PATH_DEPTH = 3 +DEFAULT_MAX_PRS_PER_BATCH = 5 +DEFAULT_UPDATED_WITHIN_DAYS = 90 + +# GitHub commit-status context used by this pipeline for idempotency tracking. +BATCH_STATUS_CONTEXT = 'auto-ptl-batch' +# States that mean "this SHA is already queued or passed; do not re-batch". +SKIP_STATES = frozenset({'pending', 'success'}) + +# File paths and extensions that are considered documentation-only. +# A PR whose entire changed file set matches these patterns needs no +# teuthology suite and is excluded from this pipeline. +DOC_PATH_PREFIXES = ('doc/', 'Documentation/') +DOC_FILE_SUFFIXES = ('.rst', '.md', '.txt') + + +# GitHub API helpers +def get_pr_detail(pr_num): + return github_api.gh_get(f'https://api.github.com/repos/ceph/ceph/pulls/{pr_num}') + + +def get_pr_files(pr_num): + """Return frozenset of changed file paths for a PR.""" + files = github_api.gh_paginate( + f'https://api.github.com/repos/ceph/ceph/pulls/{pr_num}/files' + ) + return frozenset(f['filename'] for f in files) + + +def get_latest_batch_status(sha): + """ + Return the state string of the most recent auto-ptl-batch commit status + for the given SHA, or None if no status has been posted yet. + """ + try: + statuses = github_api.gh_get( + f'https://api.github.com/repos/ceph/ceph/commits/{sha}/statuses' + f'?per_page=100' + ) + except urllib.error.HTTPError: + return None + for s in statuses: + if s.get('context') == BATCH_STATUS_CONTEXT: + return s['state'] + return None + + +def path_at_depth(filepath, depth): + """ + Truncate a file path to `depth` components. + e.g. 'src/rgw/multisite/sync.cc' at depth=3 -> 'src/rgw/multisite' + depth=0 returns the full path (exact-file matching). + """ + if depth == 0: + return filepath + return '/'.join(filepath.split('/')[:depth]) + + +def conflict_path_set(file_set, depth): + return frozenset(path_at_depth(f, depth) for f in file_set) + + +def is_doc_only(file_set): + """ + Return True when every changed file in a PR is documentation. + Such PRs need no package build or teuthology run and are excluded + from this pipeline regardless of component label. + """ + if not file_set: + return False + return all( + any(f.startswith(p) for p in DOC_PATH_PREFIXES) + or any(f.endswith(s) for s in DOC_FILE_SUFFIXES) + for f in file_set + ) + + +def build_conflict_graph(pr_file_map, depth): + """ + Compare every PR pair within the group. + + Returns: + edges - {pr_num: set of conflicting pr_nums} + evidence - {(a, b): [shared paths]} where a < b + """ + edges = defaultdict(set) + evidence = {} + prs = list(pr_file_map.keys()) + pr_paths = {pr: conflict_path_set(files, depth) for pr, files in pr_file_map.items()} + + for i in range(len(prs)): + for j in range(i + 1, len(prs)): + a, b = prs[i], prs[j] + shared = pr_paths[a] & pr_paths[b] + if shared: + edges[a].add(b) + edges[b].add(a) + evidence[(a, b)] = sorted(shared)[:5] + + return dict(edges), evidence + + +def greedy_color(pr_nums, conflict_edges, max_batch): + """ + Greedy graph colouring: assign PRs to sub-batches so that no two + conflicting PRs share a batch, and each batch contains <= max_batch PRs. + + Returns a list of lists (sub-batches), preserving original PR order + within each batch. + """ + batches = [] + for pr in pr_nums: + placed = False + for batch in batches: + if len(batch) >= max_batch: + continue + if any(other in conflict_edges.get(pr, set()) for other in batch): + continue + batch.append(pr) + placed = True + break + if not placed: + batches.append([pr]) + return batches + + +def _env_str(var, default): + """Return env var value, falling back to default if unset, empty, or 'null'.""" + val = os.environ.get(var, '').strip() + return val if val and val.lower() != 'null' else str(default) + + +def _env_int(var, default): + """Return env var as int, falling back to default if unset, empty, or 'null'.""" + try: + return int(_env_str(var, default)) + except ValueError: + return default + + +def _env_label_set(var, default): + return set( + x.strip() + for x in _env_str(var, default).split(',') + if x.strip() + ) + + +def main(): + github_api.require_token() + + required_labels = _env_label_set('REQUIRED_LABELS', DEFAULT_REQUIRED_LABELS) + exclude_labels = _env_label_set('EXCLUDE_LABELS', DEFAULT_EXCLUDE_LABELS) + suite_map = json.loads( + _env_str('COMPONENT_SUITE_MAP', json.dumps(DEFAULT_COMPONENT_SUITE_MAP)) + ) + depth = _env_int('CONFLICT_PATH_DEPTH', DEFAULT_CONFLICT_PATH_DEPTH) + max_batch = _env_int('MAX_PRS_PER_BATCH', DEFAULT_MAX_PRS_PER_BATCH) + updated_within = _env_int('UPDATED_WITHIN_DAYS', DEFAULT_UPDATED_WITHIN_DAYS) + branch_filter = _env_str('BASE_BRANCH_FILTER', '').strip() + cutoff = (datetime.datetime.utcnow() + - datetime.timedelta(days=updated_within)).strftime('%Y-%m-%dT%H:%M:%SZ') + + def log(msg): + print(msg, file=sys.stderr, flush=True) + + + # 1. Fetch all open issues and filter to eligible PRs + log(f'Fetching open issues from ceph/ceph updated since {cutoff} ...') + try: + issues = github_api.gh_paginate( + f'https://api.github.com/repos/ceph/ceph/issues?state=open&since={cutoff}&sort=updated&direction=desc' + ) + except urllib.error.HTTPError as exc: + log(f'GitHub API error listing issues: {exc}') + sys.exit(github_api.EXIT_API_ERROR) + eligible = [] + skipped = [] + + for issue in issues: + if 'pull_request' not in issue: + continue + + num = issue['number'] + labels = {lbl['name'] for lbl in issue.get('labels', [])} + + missing = required_labels - labels + if missing: + skipped.append((num, f'missing required labels: {sorted(missing)}')) + continue + + blocked = exclude_labels & labels + if blocked: + skipped.append((num, f'has excluded labels: {sorted(blocked)}')) + continue + + # Match any PR label that is a key in suite_map. + component = next((lbl for lbl in labels if lbl in suite_map), None) + if not component: + skipped.append((num, 'no component label matching COMPONENT_SUITE_MAP')) + continue + + if component not in suite_map: + skipped.append((num, f'component "{component}" not in COMPONENT_SUITE_MAP')) + continue + + try: + detail = get_pr_detail(num) + except urllib.error.HTTPError as exc: + skipped.append((num, f'GH API error fetching PR detail: {exc}')) + continue + + base = detail['base']['ref'] + head_sha = detail['head']['sha'] + + if branch_filter and base != branch_filter: + skipped.append((num, f'base branch "{base}" excluded by BASE_BRANCH_FILTER')) + continue + + # Idempotency: skip if this exact SHA was already batched and is still + # pending (in ceph-ci) or passed (teuthology success). + batch_state = get_latest_batch_status(head_sha) + if batch_state in SKIP_STATES: + skipped.append((num, f'commit status {BATCH_STATUS_CONTEXT}={batch_state}' + f' on SHA {head_sha[:8]}; already batched')) + continue + + eligible.append({ + 'number': num, + 'component': component, + 'branch': base, + 'suite': suite_map[component], + 'head_sha': head_sha, + }) + + # Log a per-reason summary instead of one line per PR to keep output readable. + skip_summary = {} + for _num, reason in skipped: + # Normalise to the reason category (strip the dynamic label list). + category = reason.split(':')[0] + skip_summary[category] = skip_summary.get(category, 0) + 1 + for category, count in sorted(skip_summary.items()): + log(f' Skipped {count} PR(s): {category}') + log(f'Eligible PRs ({len(eligible)}): {[p["number"] for p in eligible]}') + + if not eligible: + log('No eligible PRs found.') + print('[]') + return + + + # 2. Group by (component, base_branch) + # Build a SHA lookup so the Jenkinsfile can post commit statuses without + # extra API calls. + sha_map = {pr['number']: pr['head_sha'] for pr in eligible} + + groups = defaultdict(list) + for pr in eligible: + groups[(pr['component'], pr['branch'])].append(pr['number']) + + + # 3. Fetch changed files, detect conflicts, split into sub-batches + output = [] + + for (component, branch), pr_nums in sorted(groups.items()): + suite = suite_map[component] + log(f'Group (component={component}, branch={branch}): PRs {pr_nums}') + pr_file_map = {} + for pr_num in pr_nums: + log(f' Fetching changed files for PR#{pr_num} ...') + try: + pr_file_map[pr_num] = get_pr_files(pr_num) + except urllib.error.HTTPError as exc: + log(f' WARNING: cannot fetch files for PR#{pr_num}: {exc}' + '; skipping conflict check for this PR') + pr_file_map[pr_num] = frozenset() + + # Exclude documentation-only PRs: they need no teuth suite + doc_only = [pr for pr, files in pr_file_map.items() if is_doc_only(files)] + if doc_only: + for pr in doc_only: + log(f' Skipping PR#{pr}: documentation-only changes' + ' (no teuthology suite needed)') + pr_nums = [pr for pr in pr_nums if pr not in doc_only] + pr_file_map = {pr: f for pr, f in pr_file_map.items() if pr not in doc_only} + + if not pr_nums: + log(f' No non-doc PRs remain in group ({component}, {branch}); skipping.') + continue + + conflict_edges, evidence = build_conflict_graph(pr_file_map, depth) + + for (a, b), paths in evidence.items(): + log(f' Conflict PR#{a}<->PR#{b}: {paths}') + + sub_batches = greedy_color(pr_nums, conflict_edges, max_batch) + log(f' -> {len(sub_batches)} sub-batch(es): {sub_batches}') + + for idx, batch_prs in enumerate(sub_batches, 1): + obj = { + 'component': component, + 'branch': branch, + 'suite': suite, + 'batch': idx, + 'prs': batch_prs, + # pr_shas: str(pr_num) -> HEAD SHA, used by Jenkinsfile to post + # commit statuses without extra API calls. + 'pr_shas': {str(p): sha_map[p] for p in batch_prs}, + } + + if len(sub_batches) > 1: + reasons = [] + for pr in batch_prs: + for other in conflict_edges.get(pr, set()): + if other not in batch_prs: + key = (min(pr, other), max(pr, other)) + paths = evidence.get(key, []) + reasons.append( + f'PR#{pr}<->PR#{other}: {", ".join(paths[:3])}' + ) + if reasons: + obj['split_reason'] = '; '.join(sorted(set(reasons))) + + output.append(obj) + + log(f'Total sub-batches: {len(output)}') + print(json.dumps(output, indent=2)) + + +if __name__ == '__main__': + main()