spack
diff --git a/‎images/protected-publish/pkg/common.py‎
Lines changed: 241 additions & 5 deletions b/‎images/protected-publish/pkg/common.py‎
Lines changed: 241 additions & 5 deletions
@@ -51,6 +51,31 @@
 MAX_CONCURRENCY = 10
 USE_THREADS = True
 
+SNAPSHOT_TAG_REGEXES = [
+    re.compile(r"^develop-[\d]{4}-[\d]{2}-[\d]{2}$"),
+    re.compile(r"^v([\d])+\.([\d])+\.[\d]+$"),
+]
+
+PROTECTED_BRANCH_REGEXES = [
+    re.compile(r"^develop$"),
+    re.compile(r"^releases/v[\d]+\.[\d]+$"),
+]
+
+
+def tag_source_branch(tag):
+    """Parse a tag and return the source branch
+    """
+    m = SNAPSHOT_TAG_REGEXES[0].match(tag):
+    if m:
+        return "develop"
+
+    m = SNAPSHOT_TAG_REGEXES[1].match(tag)
+    if m:
+        major, minor = m.groups()
+        return "releases/v{major}.{minor}"
+
+    return None
+
 
 ################################################################################
 # Encapsulate information about a built spec in a mirror
@@ -85,12 +110,13 @@ def bucket_name_from_s3_url(url):
 
 ################################################################################
 #
-def spec_catalogs_from_listing_v2(listing_path: str) -> Dict[str, Dict[str, BuiltSpec]]:
+def spec_catalogs_from_listing_v2(listing_path: Optional[str] = None) -> Dict[str, Dict[str, BuiltSpec]]:
     """Return a complete catalog of all the built specs in the listing
 
     Return a complete catalog of all the built specs for every prefix in the
     listing.  The returned dictionary of catalogs is keyed by unique prefix.
     """
+    listing_path = list_prefix_contents(list_url, listing_path)
     all_catalogs: Dict[str, Dict[str, BuiltSpec]] = defaultdict(
         lambda: defaultdict(BuiltSpec)
     )
@@ -128,7 +154,8 @@ def spec_catalogs_from_listing_v2(listing_path: str) -> Dict[str, Dict[str, Buil
 
 ################################################################################
 #
-def spec_catalogs_from_listing_v3(listing_path: str) -> Dict[str, Dict[str, BuiltSpec]]:
+def spec_catalogs_from_listing_v3(listing_path: Optional[str] = None) -> Dict[str, Dict[str, BuiltSpec]]:
+    listing_path = list_prefix_contents(list_url, listing_path)
     all_catalogs: Dict[str, Dict[str, BuiltSpec]] = defaultdict(
         lambda: defaultdict(BuiltSpec)
     )
@@ -149,6 +176,189 @@ def spec_catalogs_from_listing_v3(listing_path: str) -> Dict[str, Dict[str, Buil
     return all_catalogs
 
 
+################################################################################
+#
+def generate_spec_catalogs_v2(
+    ref: str, exclude: List[str] = [], listing_path: Optional[str] = None
+) -> tuple[Dict[str, Dict[str, BuiltSpec]], Dict[str, BuiltSpec]]:
+    """Return information about specs in stacks and at the root
+
+    Read the listing file, populate and return a tuple of dicts indicating which
+    specs exist in stacks, and which exist in the top-level buildcache. Stacks
+    appearing in the ``exclude`` list are ignoreed.
+
+    Returns a tuple like the following:
+
+        (
+            # First element of tuple is the stack specs
+            {
+                <hash>: {
+                    <stack>: <BuiltSpec>,
+                    ...
+                },
+                ...
+            },
+            # Followed by specs at the top level
+            {
+                <hash>: <BuiltSpec>,
+                ...
+            }
+        )
+    """
+    listing_path = list_prefix_contents(list_url, listing_path)
+
+    stack_prefix_regex = re.compile(rf"{ref}/(.+)")
+    stack_specs: Dict[str, Dict[str, BuiltSpec]] = defaultdict(
+        lambda: defaultdict(BuiltSpec)
+    )
+    all_catalogs = spec_catalogs_from_listing_v2(listing_path)
+    top_level_specs = all_catalogs[ref]
+
+    for prefix in all_catalogs:
+        m = stack_prefix_regex.search(prefix)
+        if not m:
+            continue
+
+        stack = m.group(1)
+        if stack in exclude:
+            continue
+
+        for spec_hash, built_spec in all_catalogs[prefix].items():
+            stack_specs[stack][spec_hash] = built_spec
+
+    return stack_specs, top_level_specs
+
+
+def format_blob_url(prefix: str, blob_record: Dict[str, str]) -> str:
+    """Use prefix and algorithm/checksum from record to build full prefix"""
+    hash_algo = blob_record.get("checksumAlgorithm", None)
+    checksum = blob_record.get("checksum", None)
+
+    if not hash_algo:
+        raise MalformedManifestError("Missing 'checksumAlgorithm'")
+
+    if not checksum:
+        raise MalformedManifestError("Missing 'checksum'")
+
+    return f"{prefix}/blobs/{hash_algo}/{checksum[:2]}/{checksum}"
+
+
+def find_data_with_media_type(
+    data: List[Dict[str, str]], mediaType: str
+) -> Dict[str, str]:
+    """Return data element with matching mediaType, or else raise"""
+    for elt in data:
+        if elt["mediaType"] == mediaType:
+            return elt
+    raise NoSuchMediaTypeError(mediaType)
+
+
+################################################################################
+#
+def generate_spec_catalogs_v3(
+    bucket: str,
+    ref: str,
+    exclude: List[str] = [],
+    listing_path: Optional[str] = None,
+    parallel: int = 8,
+) -> tuple[Dict[str, Dict[str, BuiltSpec]], Dict[str, BuiltSpec]]:
+    """Return information about specs in stacks and at the root"""
+    listing_path = list_prefix_contents(list_url, listing_path)
+
+    stack_prefix_regex = re.compile(rf"{ref}/(.+)")
+    stack_specs: Dict[str, Dict[str, BuiltSpec]] = defaultdict(
+        lambda: defaultdict(BuiltSpec)
+    )
+    all_catalogs = spec_catalogs_from_listing_v3(listing_path)
+    top_level_specs = all_catalogs[ref]
+
+    task_list = []
+    tmpdir = tempfile.mkdtemp()
+
+    for prefix in all_catalogs:
+        m = stack_prefix_regex.search(prefix)
+        if not m:
+            continue
+
+        stack = m.group(1)
+        if stack in exclude:
+            continue
+
+        stack_manifests_dir = os.path.join(tmpdir, stack)
+        os.makedirs(stack_manifests_dir)
+        stack_manifest_sync_cmd = [
+            "aws",
+            "s3",
+            "sync",
+            "--exclude",
+            "*",
+            "--include",
+            "*.spec.manifest.json",
+            f"s3://{bucket}/{prefix}/v3/manifests/spec",
+            stack_manifests_dir,
+        ]
+
+        start_time = datetime.now()
+
+        try:
+            print(f"Downloading manifests for stack {stack}")
+            subprocess.run(stack_manifest_sync_cmd, check=True)
+        except subprocess.CalledProcessError as cpe:
+            error_msg = getattr(cpe, "message", cpe)
+            print(f"Failed to download manifests for {stack} due to: {error_msg}")
+            continue
+
+        end_time = datetime.now()
+        elapsed = end_time - start_time
+        print(f"Downloaded manifests for stack {stack}, elapsed time: {elapsed}")
+
+        for spec_hash, built_spec in all_catalogs[prefix].items():
+            stack_specs[stack][spec_hash] = built_spec
+            task_list.append((built_spec.hash, stack))
+
+    def _process_manifest_fn(spec_hash, stack):
+        download_dir = os.path.join(tmpdir, stack)
+        find_cmd = ["find", download_dir, "-type", "f", "-name", f"*{spec_hash}*"]
+        find_result = subprocess.run(find_cmd, capture_output=True)
+
+        if find_result.returncode != 0:
+            print(f"[{find_cmd}] failed to find manifest for {spec_hash} in {stack}")
+            return (None, None, None, None)
+
+        manifest_path = find_result.stdout.decode("utf-8").strip()
+        manifest_dict = extract_json_from_clearsig(manifest_path)
+        return (spec_hash, stack, manifest_dict, manifest_path)
+
+    with ThreadPoolExecutor(max_workers=parallel) as executor:
+        futures = [executor.submit(_process_manifest_fn, *task) for task in task_list]
+        for future in as_completed(futures):
+            try:
+                spec_hash, stack, manifest_dict, manifest_path = future.result()
+                if not spec_hash or not stack or not manifest_dict or not manifest_path:
+                    continue
+
+                stack_specs[stack][spec_hash].stack = stack
+                stack_specs[stack][spec_hash].manifest_path = manifest_path
+                stack_specs[stack][spec_hash].meta = format_blob_url(
+                    f"{ref}/{stack}",
+                    find_data_with_media_type(
+                        manifest_dict["data"], SPEC_METADATA_MEDIA_TYPE
+                    ),
+                )
+                stack_specs[stack][spec_hash].archive = format_blob_url(
+                    f"{ref}/{stack}",
+                    find_data_with_media_type(
+                        manifest_dict["data"], TARBALL_MEDIA_TYPE
+                    ),
+                )
+            except Exception as exc:
+                print(f"Exception processing manifests: {exc}")
+
+    # Cleanup the tmpdir
+    shutil.rmtree(tmpdir)
+    return stack_specs, top_level_specs
+
+
 ################################################################################
 # If the cli didn't provide a working directory, we will create (and clean up)
 # a temporary directory.
@@ -159,15 +369,26 @@ def get_workdir_context(workdir: Optional[str] = None):
     return contextlib.nullcontext(workdir)
 
 
+listing_file = None
 ################################################################################
 # Given a url and a file path to use for writing, get a recursive listing of
 # everything under the prefix defined by the url, and write it to disk using the
 # supplied path.
-def list_prefix_contents(url: str, output_file: str):
+def list_prefix_contents(url: str, output_file: Optional[str] = None, force: bool = False):
     list_cmd = ["aws", "s3", "ls", "--recursive", url]
 
-    with open(output_file, "w") as f:
-        subprocess.run(list_cmd, stdout=f, check=True)
+    # Auto caching of listing file
+    global listing_file
+    if not output_file:
+        if not listing_file:
+            listing_file = tempfile.mkstemp()
+        output_file = listing_file
+
+    if not os.path.isfile(output_file) or force:
+        with open(output_file, "w") as f:
+            subprocess.run(list_cmd, stdout=f, check=True)
+
+    return output_file
 
 
 ################################################################################
@@ -303,6 +524,21 @@ def s3_upload_file(file_path: str, bucket: str, prefix: str, client=None):
         s3_client.upload_fileobj(fd, bucket, prefix)
 
 
+def s3_object_exists(bucket: str, key: str, client=None):
+    """Check if an s3 object exists"""
+
+    if client:
+        s3_client = client
+    else:
+        s3_client = s3_create_client():
+
+    try:
+        _ = s3_client.head_object(Bucket=bucket, Key=key)
+        return True
+    except Exception:
+        return False
+
+
 ################################################################################
 #
 def compute_checksum(input_file: str, buf_size: int = 65536) -> str: