-
Notifications
You must be signed in to change notification settings - Fork 168
Bench ranges #1710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Bench ranges #1710
Changes from all commits
5d58213
c797586
20d2d2d
f493bd8
68c8ba0
9e2afa8
3ffc98d
bef9dcb
75007a7
a85fff1
4c24f66
e216644
80120a1
99bc3eb
f4a622b
af98e0e
1405e92
3c7e7af
970b162
0bf17c7
a7309ac
829f0f4
72e98d6
5586aa6
e3797e4
6bf72a0
9d571a1
b03f712
6fab6ea
b5d26c5
9873084
3e5435e
7dd5e00
3c0dac2
11ff2f6
4a94ed6
1298818
ffc7cd1
799bc99
a20622c
91553d8
a283c89
d5a7efd
5c2cf78
7f22386
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -79,5 +79,5 @@ def list_blobs_with_prefix(bucket_name, prefix, delimiter=None): | |||||
|
|
||||||
| if __name__ == "__main__": | ||||||
| list_blobs_with_prefix( | ||||||
| bucket_name=sys.argv[1], prefix=sys.argv[2], delimiter=sys.argv[3] | ||||||
| bucket_name=sys.argv[1], prefix=sys.argv[2] | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change removes the ability to specify a
Suggested change
|
||||||
| ) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| # Performance Microbenchmarks | ||
|
|
||
| This directory contains performance microbenchmarks for the Python Storage client library. | ||
|
|
||
| ## Usage | ||
|
|
||
| To run the benchmarks, use `pytest` with the `--benchmark-json` flag to specify an output file for the results. | ||
|
|
||
| Example: | ||
| ```bash | ||
| pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py | ||
| ``` | ||
|
|
||
| ### Running a Specific Test | ||
|
|
||
| To run a single test, append `::` followed by the test name to the file path. | ||
|
|
||
| Example: | ||
| ```bash | ||
| pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py::test_downloads_single_proc_single_coro | ||
| ``` | ||
|
|
||
| ## Configuration | ||
|
|
||
| The benchmarks are configured using `config.yaml` files located in the respective subdirectories (e.g., `reads/config.yaml`). | ||
|
|
||
| ## Overriding Buckets | ||
|
|
||
| You can override the buckets used in the benchmarks by setting environment variables. Please refer to the specific benchmark implementation for the environment variable names. | ||
|
|
||
| ## Output | ||
|
|
||
| The benchmarks produce a JSON file with the results. This file can be converted to a CSV file for easier analysis in spreadsheets using the provided `json_to_csv.py` script. | ||
|
|
||
| Example: | ||
| ```bash | ||
| python3 tests/perf/microbenchmarks/json_to_csv.py output.json | ||
| ``` |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| # Copyright 2026 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| # Copyright 2026 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| from typing import Any, List | ||
| import statistics | ||
| import io | ||
| import os | ||
|
|
||
|
|
||
| def publish_benchmark_extra_info( | ||
| benchmark: Any, | ||
| params: Any, | ||
| benchmark_group: str = "read", | ||
| true_times: List[float] = [], | ||
| ) -> None: | ||
| """ | ||
| Helper function to publish benchmark parameters to the extra_info property. | ||
| """ | ||
|
|
||
| benchmark.extra_info["num_files"] = params.num_files | ||
| benchmark.extra_info["file_size"] = params.file_size_bytes | ||
| benchmark.extra_info["chunk_size"] = params.chunk_size_bytes | ||
| if benchmark_group == "write": | ||
| benchmark.extra_info["pattern"] = "seq" | ||
| else: | ||
| benchmark.extra_info["pattern"] = params.pattern | ||
| benchmark.extra_info["coros"] = params.num_coros | ||
| benchmark.extra_info["rounds"] = params.rounds | ||
| benchmark.extra_info["bucket_name"] = params.bucket_name | ||
| benchmark.extra_info["bucket_type"] = params.bucket_type | ||
| benchmark.extra_info["processes"] = params.num_processes | ||
| benchmark.group = benchmark_group | ||
|
|
||
| object_size = params.file_size_bytes | ||
| num_files = params.num_files | ||
| total_uploaded_mib = (object_size / (1024 * 1024) * num_files) | ||
| min_throughput = total_uploaded_mib / benchmark.stats["max"] | ||
| max_throughput = total_uploaded_mib / benchmark.stats["min"] | ||
| mean_throughput = total_uploaded_mib / benchmark.stats["mean"] | ||
| median_throughput = total_uploaded_mib / benchmark.stats["median"] | ||
|
|
||
| benchmark.extra_info["throughput_MiB_s_min"] = min_throughput | ||
| benchmark.extra_info["throughput_MiB_s_max"] = max_throughput | ||
| benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput | ||
| benchmark.extra_info["throughput_MiB_s_median"] = median_throughput | ||
|
|
||
| print("\nThroughput Statistics (MiB/s):") | ||
| print(f" Min: {min_throughput:.2f} (from max time)") | ||
| print(f" Max: {max_throughput:.2f} (from min time)") | ||
| print(f" Mean: {mean_throughput:.2f} (approx, from mean time)") | ||
| print(f" Median: {median_throughput:.2f} (approx, from median time)") | ||
|
|
||
| if true_times: | ||
| throughputs = [total_uploaded_mib / t for t in true_times] | ||
| true_min_throughput = min(throughputs) | ||
| true_max_throughput = max(throughputs) | ||
| true_mean_throughput = statistics.mean(throughputs) | ||
| true_median_throughput = statistics.median(throughputs) | ||
|
|
||
| benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput | ||
| benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput | ||
| benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput | ||
| benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput | ||
|
|
||
| print("\nThroughput Statistics from true_times (MiB/s):") | ||
| print(f" Min: {true_min_throughput:.2f}") | ||
| print(f" Max: {true_max_throughput:.2f}") | ||
| print(f" Mean: {true_mean_throughput:.2f}") | ||
| print(f" Median: {true_median_throughput:.2f}") | ||
|
|
||
| # Get benchmark name, rounds, and iterations | ||
| name = benchmark.name | ||
| rounds = benchmark.stats['rounds'] | ||
| iterations = benchmark.stats['iterations'] | ||
|
|
||
| # Header for throughput table | ||
| header = "\n\n" + "-" * 125 + "\n" | ||
| header += "Throughput Benchmark (MiB/s)\n" | ||
| header += "-" * 125 + "\n" | ||
| header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n" | ||
| header += "-" * 125 | ||
|
|
||
| # Data row for throughput table | ||
| # The table headers (Min, Max) refer to the throughput values. | ||
| row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}" | ||
|
|
||
| print(header) | ||
| print(row) | ||
| print("-" * 125) | ||
|
|
||
| class RandomBytesIO(io.RawIOBase): | ||
| """ | ||
| A file-like object that generates random bytes using os.urandom. | ||
| It enforces a fixed size and an upper safety cap. | ||
| """ | ||
| # 10 GiB default safety cap | ||
| DEFAULT_CAP = 10 * 1024 * 1024 * 1024 | ||
|
|
||
| def __init__(self, size, max_size=DEFAULT_CAP): | ||
| """ | ||
| Args: | ||
| size (int): The exact size of the virtual file in bytes. | ||
| max_size (int): The maximum allowed size to prevent safety issues. | ||
| """ | ||
| if size is None: | ||
| raise ValueError("Size must be defined (cannot be infinite).") | ||
|
|
||
| if size > max_size: | ||
| raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).") | ||
|
|
||
| self._size = size | ||
| self._pos = 0 | ||
|
|
||
| def read(self, n=-1): | ||
| # 1. Handle "read all" (n=-1) | ||
| if n is None or n < 0: | ||
| n = self._size - self._pos | ||
|
|
||
| # 2. Handle EOF (End of File) | ||
| if self._pos >= self._size: | ||
| return b"" | ||
|
|
||
| # 3. Clamp read amount to remaining size | ||
| # This ensures we stop exactly at `size` bytes. | ||
| n = min(n, self._size - self._pos) | ||
|
|
||
| # 4. Generate data | ||
| data = os.urandom(n) | ||
| self._pos += len(data) | ||
| return data | ||
|
|
||
| def readable(self): | ||
| return True | ||
|
|
||
| def seekable(self): | ||
| return True | ||
|
|
||
| def tell(self): | ||
| return self._pos | ||
|
|
||
| def seek(self, offset, whence=io.SEEK_SET): | ||
| if whence == io.SEEK_SET: | ||
| new_pos = offset | ||
| elif whence == io.SEEK_CUR: | ||
| new_pos = self._pos + offset | ||
| elif whence == io.SEEK_END: | ||
| new_pos = self._size + offset | ||
| else: | ||
| raise ValueError(f"Invalid whence: {whence}") | ||
|
|
||
| # Clamp position to valid range [0, size] | ||
| self._pos = max(0, min(new_pos, self._size)) | ||
| return self._pos |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Commented-out code should be avoided in the codebase as it can become stale and reduce readability. If these options are for debugging or future use, they should be removed and can be retrieved from version control history if needed. The
optionstuple can be left empty.