Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
## New Features

- Declare support for python 3.13 `PR #1848`
- Allow moving to PEP691 simple API for package metadata `PR #2075`

## Big Fixes
## Bug Fixes

- Support reading HTTP proxy URLs from environment variables, and SOCKS proxy URLs from the 'mirror.proxy' config option `PR #1861`

Expand Down
10 changes: 10 additions & 0 deletions src/bandersnatch/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class SetConfigValues(NamedTuple):
download_mirror: str
download_mirror_no_fallback: bool
simple_format: SimpleFormat
api_method: str


class Singleton(type): # pragma: no cover
Expand Down Expand Up @@ -218,6 +219,14 @@ def validate_config_values( # noqa: C901

cleanup = config.getboolean("mirror", "cleanup", fallback=False)

api_method = config.get("mirror", "api-method", fallback="xmlrpc")
if api_method not in ("xmlrpc", "simple"):
raise ValueError(
f"Supplied api-method {api_method} is not supported! Please "
+ "update api-method to one of ('xmlrpc', 'simple') in the [mirror] "
+ "section."
)

return SetConfigValues(
json_save,
root_uri,
Expand All @@ -231,4 +240,5 @@ def validate_config_values( # noqa: C901
download_mirror,
download_mirror_no_fallback,
simple_format,
api_method,
)
4 changes: 4 additions & 0 deletions src/bandersnatch/defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ diff-file =
diff-append-epoch = false

log-config =

; API method to use for fetching package lists and changelogs
; Options: xmlrpc, simple
api-method = xmlrpc
8 changes: 8 additions & 0 deletions src/bandersnatch/example.conf
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,11 @@ compare-method = hash
; be appended to the filename (i.e. /path/to/diff-1568129735)
; diff-file = /srv/pypi/mirrored-files
; diff-append-epoch = true

; Configure the API method to use for fetching package lists and changelogs.
; Options are:
; - xmlrpc: Use the XML-RPC API (default, traditional method)
; - simple: Use the Simple (PEP 691 v1) API (newer method)
; The xmlrpc option is the default and more stable, while simple is newer but may
; be faster in some cases.
; api-method = xmlrpc
2 changes: 2 additions & 0 deletions src/bandersnatch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,12 @@ def _make_parser() -> argparse.ArgumentParser:

async def async_main(args: argparse.Namespace, config: ConfigParser) -> int:
if args.op.lower() == "delete":
config_values = bandersnatch.configuration.validate_config_values(config)
async with bandersnatch.master.Master(
config.get("mirror", "master"),
config.getfloat("mirror", "timeout"),
config.getfloat("mirror", "global-timeout", fallback=None),
api_method=config_values.api_method,
) as master:
return await bandersnatch.delete.delete_packages(config, args, master)
elif args.op.lower() == "verify":
Expand Down
72 changes: 72 additions & 0 deletions src/bandersnatch/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ def __init__(
global_timeout: float | None = FIVE_HOURS_FLOAT,
proxy: str | None = None,
allow_non_https: bool = False,
api_method: str = "xmlrpc",
) -> None:
self.url = url
self.timeout = timeout
self.global_timeout = global_timeout or FIVE_HOURS_FLOAT
self.api_method = api_method

proxy_url = proxy if proxy else proxy_address_from_env()
self.proxy_kwargs = get_aiohttp_proxy_kwargs(proxy_url) if proxy_url else {}
Expand Down Expand Up @@ -144,6 +146,10 @@ async def url_fetch(
def xmlrpc_url(self) -> str:
return f"{self.url}/pypi"

@property
def simple_url(self) -> str:
return f"{self.url}/simple"

# TODO: Potentially make USER_AGENT more accessible from aiohttp-xmlrpc
async def _gen_custom_headers(self) -> dict[str, str]:
# Create dummy client so we can copy the USER_AGENT + prepend bandersnatch info
Expand Down Expand Up @@ -177,13 +183,59 @@ async def rpc(self, method_name: str, serial: int = 0) -> Any:
except TimeoutError as te:
logger.error(f"Call to {method_name} @ {self.xmlrpc_url} timed out: {te}")

async def fetch_simple_index(self) -> Any:
"""Return a mapping of all project data from the PyPI Index API"""
custom_headers = await self._gen_custom_headers()
custom_headers["Accept"] = "application/vnd.pypi.simple.v1+json"
logger.debug(
f"Fetching simple JSON index from {self.simple_url} "
f"w/headers {custom_headers}"
)
async with self.session.get(
self.simple_url, headers=custom_headers
) as response:
simple_index = await response.json()
return simple_index
Comment on lines +186 to +198
Copy link

Copilot AI Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing error handling for the fetch_simple_index method. If the HTTP request fails or returns invalid JSON, the error will propagate uncaught. Consider adding try-except blocks similar to the rpc method (lines 181-188) to handle TimeoutError and other potential exceptions, and potentially raise SimpleRpcError for consistency with the XML-RPC error handling pattern.

Copilot uses AI. Check for mistakes.

async def all_packages(self) -> Any:
if self.api_method == "simple":
return await self._all_packages_simple()
else:
return await self._all_packages_xmlrpc()

async def _all_packages_xmlrpc(self) -> Any:
all_packages_with_serial = await self.rpc("list_packages_with_serial")
if not all_packages_with_serial:
raise XmlRpcError("Unable to get full list of packages")
return all_packages_with_serial

async def _all_packages_simple(self) -> dict[str, int]:
"""
Fetch all packages using the PEP 691 Simple API JSON endpoint.
Returns a dict mapping package names to their serial numbers.
"""
logger.info("Fetching all packages via Simple (PEP 691 v1) API")
simple_index = await self.fetch_simple_index()
if not simple_index:
return {}
all_packages = {}
for project in simple_index.get("projects", []):
name = project.get("name")
serial = project.get("_last-serial")
if name is not None and serial is not None:
all_packages[name] = serial
else:
logger.warning(f"Skipping malformed project entry in simple index: {project}")
logger.debug(f"Fetched #{len(all_packages)} from simple JSON index")
return all_packages

async def changed_packages(self, last_serial: int) -> dict[str, int]:
if self.api_method == "simple":
return await self._changed_packages_simple(last_serial)
else:
return await self._changed_packages_xmlrpc(last_serial)

async def _changed_packages_xmlrpc(self, last_serial: int) -> dict[str, int]:
changelog = await self.rpc("changelog_since_serial", last_serial)
if changelog is None:
changelog = []
Expand All @@ -194,6 +246,26 @@ async def changed_packages(self, last_serial: int) -> dict[str, int]:
packages[package] = serial
return packages

async def _changed_packages_simple(self, last_serial: int) -> dict[str, int]:
"""
For the Simple (PEP 691 v1) API, we need to fetch all packages and compare serials.
The Simple API doesn't have a direct "changelog since serial" equivalent,
so we fetch all packages and return those with serial > last_serial.

Note: This is less efficient than XML-RPC changelog, but works with Simple API.
"""
logger.info(
f"Fetching changed packages since serial {last_serial} via Simple (PEP 691 v1) API"
)

# Get all packages with current serial
all_packages = await self._all_packages_simple()
changed_packages = {
pkg: ser for pkg, ser in all_packages.items() if ser > last_serial
}
logger.debug(f"Fetched #{len(changed_packages)} changed packages")
return changed_packages

async def get_package_metadata(self, package_name: str, serial: int = 0) -> Any:
try:
metadata_generator = self.get(f"/pypi/{package_name}/json", serial)
Expand Down
7 changes: 6 additions & 1 deletion src/bandersnatch/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,12 @@ async def mirror(
# Always reference those classes here with the fully qualified name to
# allow them being patched by mock libraries!
async with Master(
mirror_url, timeout, global_timeout, proxy, allow_non_https
mirror_url,
timeout,
global_timeout,
proxy,
allow_non_https,
config_values.api_method,
) as master:
mirror = BandersnatchMirror(
homedir,
Expand Down
65 changes: 65 additions & 0 deletions src/bandersnatch/tests/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_single_config__default__mirror__setting_attributes(self) -> None:
options,
{
"allow-non-https",
"api-method",
"cleanup",
"compare-method",
"diff-append-epoch",
Expand Down Expand Up @@ -101,6 +102,7 @@ def test_single_config__default__mirror__setting__types(self) -> None:
("global-timeout", int),
("workers", int),
("compare-method", str),
("api-method", str),
]:
self.assertIsInstance(
option_type(instance["mirror"].get(option)), option_type
Expand Down Expand Up @@ -146,6 +148,7 @@ def test_validate_config_values(self) -> None:
"",
False,
SimpleFormat.ALL,
"xmlrpc",
)
no_options_configparser = BandersnatchConfig(load_defaults=True)
self.assertEqual(
Expand All @@ -166,6 +169,7 @@ def test_validate_config_values_release_files_false_sets_root_uri(self) -> None:
"",
False,
SimpleFormat.ALL,
"xmlrpc",
)
release_files_false_configparser = BandersnatchConfig(load_defaults=True)
release_files_false_configparser["mirror"].update({"release-files": "false"})
Expand All @@ -189,6 +193,7 @@ def test_validate_config_values_download_mirror_false_sets_no_fallback(
"",
False,
SimpleFormat.ALL,
"xmlrpc",
)
release_files_false_configparser = BandersnatchConfig(load_defaults=True)
release_files_false_configparser["mirror"].update(
Expand All @@ -200,6 +205,66 @@ def test_validate_config_values_download_mirror_false_sets_no_fallback(
default_values, validate_config_values(release_files_false_configparser)
)

def test_validate_config_values_api_method_simple(self) -> None:
"""Test that api_method='simple' is accepted and validated."""
simple_api_values = SetConfigValues(
False,
"",
"",
False,
SimpleDigest.SHA256,
"filesystem",
False,
True,
"hash",
"",
False,
SimpleFormat.ALL,
"simple",
)
simple_api_config = BandersnatchConfig(load_defaults=True)
simple_api_config["mirror"].update({"api-method": "simple"})
self.assertEqual(simple_api_values, validate_config_values(simple_api_config))

def test_validate_config_values_api_method_xmlrpc(self) -> None:
"""Test that api_method='xmlrpc' is accepted and validated."""
xmlrpc_api_values = SetConfigValues(
False,
"",
"",
False,
SimpleDigest.SHA256,
"filesystem",
False,
True,
"hash",
"",
False,
SimpleFormat.ALL,
"xmlrpc",
)
xmlrpc_api_config = BandersnatchConfig(load_defaults=True)
xmlrpc_api_config["mirror"].update({"api-method": "xmlrpc"})
self.assertEqual(xmlrpc_api_values, validate_config_values(xmlrpc_api_config))

def test_validate_config_values_api_method_invalid(self) -> None:
"""Test that invalid api_method raises ValueError."""
invalid_api_config = BandersnatchConfig(load_defaults=True)
invalid_api_config["mirror"].update({"api-method": "invalid"})
with self.assertRaises(ValueError) as context:
validate_config_values(invalid_api_config)
self.assertIn("api-method invalid is not supported", str(context.exception))
self.assertIn("('xmlrpc', 'simple')", str(context.exception))

def test_validate_config_values_api_method_defaults_to_xmlrpc(self) -> None:
"""Test that api_method defaults to 'xmlrpc' when not specified."""
config = BandersnatchConfig(load_defaults=True)
# Remove the api-method config if it exists
if config.has_option("mirror", "api-method"):
config.remove_option("mirror", "api-method")
result = validate_config_values(config)
self.assertEqual(result.api_method, "xmlrpc")

def test_validate_config_diff_file_reference(self) -> None:
diff_file_test_cases = [
(
Expand Down
Loading