Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@ def __generate_yamls_internal(self, scale: str = None, scale_num: str = None, sc
logger.info(f'HAMMERDB_CONFIG override: {hammerdb_config}')
render_data.update(hammerdb_config)

workload_config = self.__environment_variables_dict.get('workload_config', {})
if workload_config:
workload_config = {k: v for k, v in workload_config.items() if v != ''}
unknown_keys = set(workload_config.keys()) - set(render_data.keys())
if unknown_keys:
logger.warning(f'WORKLOAD_CONFIG unknown keys (will be ignored): {unknown_keys}')
logger.info(f'WORKLOAD_CONFIG override: {workload_config}')
render_data.update(workload_config)

out_files = []
standard_template_path = os.path.join(workload_dir_path, 'internal_data', self.__standard_template_file)
if os.path.isfile(standard_template_path):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
apiVersion: v1
kind: Secret
metadata:
name: linstress-cloudinit-{{ trunc_uuid }}
namespace: {{ namespace }}
stringData:
userdata: |
#cloud-config
user: fedora
password: fedora
chpasswd: { expire: False }
ssh_pwauth: true
{%- if ssh_public_key is defined and ssh_public_key %}
ssh_authorized_keys:
- "{{ ssh_public_key }}"
{%- endif %}
packages:
- python3-psutil
write_files:
- path: /tmp/stress.py
permissions: '0755'
content: |
import multiprocessing, time, psutil, json

def burn_cpu(d, result_dict, idx):
ops = 0
start = time.time()
end = start + d
while time.time() < end:
_ = 2**32
ops += 1
elapsed = time.time() - start
result_dict[idx] = {'ops': ops, 'elapsed': elapsed, 'ops_per_sec': ops / elapsed}

def burn_memory(target_percent, duration):
total = psutil.virtual_memory().total
target_bytes = int(total * target_percent / 100)
current_used = psutil.virtual_memory().used
alloc_bytes = target_bytes - current_used
if alloc_bytes <= 0:
print(f'Memory already at {psutil.virtual_memory().percent}%, skipping')
return
chunk_size = 100 * 1024 * 1024
blocks = []
allocated = 0
while allocated < alloc_bytes:
size = min(chunk_size, alloc_bytes - allocated)
blocks.append(bytearray(size))
allocated += size
print(f'Allocated {allocated // (1024*1024)}MB / {alloc_bytes // (1024*1024)}MB ({psutil.virtual_memory().percent}%)')
print(f'Memory at {psutil.virtual_memory().percent}%, holding for {duration}s...')
time.sleep(duration)

if __name__ == '__main__':
cpu_total = multiprocessing.cpu_count()
cpu_count = max(1, int(cpu_total * {{ stress_cpu }} / 100))
duration = {{ stress_duration }}
mem_target = {{ stress_memory }}

print(f'CPU count: {cpu_total}')
print(f'Stressing {cpu_count} CPUs ({{ stress_cpu }}%) and {mem_target}% memory for {duration}s')
print(f'Total memory: {psutil.virtual_memory().total // (1024**3)}GB')
print(f'Memory before: {psutil.virtual_memory().percent}%')
print(f'CPU before: {psutil.cpu_percent(interval=1)}%')

mem_proc = None
if mem_target > 0:
mem_proc = multiprocessing.Process(target=burn_memory, args=(mem_target, duration))
mem_proc.start()
time.sleep(5)

manager = multiprocessing.Manager()
result_dict = manager.dict()

cpu_procs = [multiprocessing.Process(target=burn_cpu, args=(duration, result_dict, i)) for i in range(cpu_count)]
[p.start() for p in cpu_procs]

intervals = max(1, duration // 30)
samples = []
for i in range(intervals):
time.sleep(30)
mem = psutil.virtual_memory()
cpu_pct = psutil.cpu_percent(interval=1)
sample = {
'time_sec': (i+1)*30,
'cpu_percent': cpu_pct,
'mem_percent': mem.percent,
'mem_used_gb': round(mem.used / (1024**3), 1),
'mem_total_gb': round(mem.total / (1024**3), 1)
}
samples.append(sample)
print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)")

[p.join() for p in cpu_procs]
if mem_proc:
mem_proc.join()
Comment on lines +78 to +96

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Monitoring loop can sleep past the actual stress duration.

intervals = max(1, duration // 30) forces at least one time.sleep(30) iteration even when duration < 30. In that case burn_cpu/burn_memory workers finish long before the monitor wakes up, so the captured CPU/memory samples reflect post-stress idle state rather than the actual stress window — producing misleading benchmark data for short-duration runs (a common case for quick/test_ci iterations).

🐛 Proposed fix using elapsed-time tracking instead of a fixed iteration count
-              intervals = max(1, duration // 30)
-              samples = []
-              for i in range(intervals):
-                  time.sleep(30)
+              samples = []
+              elapsed_total = 0
+              while elapsed_total < duration:
+                  step = min(30, duration - elapsed_total)
+                  time.sleep(step)
+                  elapsed_total += step
                   mem = psutil.virtual_memory()
                   cpu_pct = psutil.cpu_percent(interval=1)
                   sample = {
-                      'time_sec': (i+1)*30,
+                      'time_sec': elapsed_total,
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
intervals = max(1, duration // 30)
samples = []
for i in range(intervals):
time.sleep(30)
mem = psutil.virtual_memory()
cpu_pct = psutil.cpu_percent(interval=1)
sample = {
'time_sec': (i+1)*30,
'cpu_percent': cpu_pct,
'mem_percent': mem.percent,
'mem_used_gb': round(mem.used / (1024**3), 1),
'mem_total_gb': round(mem.total / (1024**3), 1)
}
samples.append(sample)
print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)")
[p.join() for p in cpu_procs]
if mem_proc:
mem_proc.join()
samples = []
elapsed_total = 0
while elapsed_total < duration:
step = min(30, duration - elapsed_total)
time.sleep(step)
elapsed_total += step
mem = psutil.virtual_memory()
cpu_pct = psutil.cpu_percent(interval=1)
sample = {
'time_sec': elapsed_total,
'cpu_percent': cpu_pct,
'mem_percent': mem.percent,
'mem_used_gb': round(mem.used / (1024**3), 1),
'mem_total_gb': round(mem.total / (1024**3), 1)
}
samples.append(sample)
print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)")
[p.join() for p in cpu_procs]
if mem_proc:
mem_proc.join()
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In
`@benchmark_runner/common/template_operations/templates/linstress/internal_data/linstress_vm_template.yaml`
around lines 78 - 96, The monitoring loop in the linstress template is
oversleeping for short runs because `intervals = max(1, duration // 30)` always
forces at least one 30-second wait, so `burn_cpu` and `burn_memory` can finish
before sampling starts. Update the loop in the monitoring section to use
elapsed-time tracking against the requested `duration` instead of a fixed
interval count, and only sleep for the remaining time needed before each sample.
Keep the sampling logic and `cpu_procs`/`mem_proc` joins in place, but make sure
the first sample is taken within the actual stress window even when `duration <
30`.


total_ops = sum(r['ops'] for r in result_dict.values())
total_ops_per_sec = sum(r['ops_per_sec'] for r in result_dict.values())
avg_ops_per_sec = total_ops_per_sec / cpu_count if cpu_count > 0 else 0

print(f'CPU after: {psutil.cpu_percent(interval=1)}%')
print(f'Memory after: {psutil.virtual_memory().percent}%')
print(f'Total operations: {total_ops:,}')
print(f'Total throughput: {total_ops_per_sec:,.0f} ops/sec')
print(f'Avg per CPU: {avg_ops_per_sec:,.0f} ops/sec')

report = {
'config': {
'cpu_total': cpu_total,
'cpu_stressed': cpu_count,
'stress_cpu_percent': {{ stress_cpu }},
'stress_memory_percent': mem_target,
'duration_sec': duration,
'total_memory_gb': round(psutil.virtual_memory().total / (1024**3), 1)
},
'throughput': {
'total_ops': total_ops,
'total_ops_per_sec': round(total_ops_per_sec, 2),
'avg_ops_per_cpu': round(avg_ops_per_sec, 2),
'per_cpu': [{'cpu': i, 'ops': r['ops'], 'ops_per_sec': round(r['ops_per_sec'], 2)} for i, r in sorted(result_dict.items())]
},
'samples': samples
}

with open('/tmp/stress_report.json', 'w') as f:
json.dump(report, f, indent=2)
print('Report saved to /tmp/stress_report.json')
print('Done')
runcmd:
- export HOME=/root
- dnf install -y python3-psutil || true
- python3 /tmp/stress.py
---
apiVersion: kubevirt.io/v1
kind: VirtualMachine
metadata:
{% if scale -%}
name: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }}
labels:
kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }}
{%- else -%}
name: linstress-{{ kind }}-{{ trunc_uuid }}
labels:
kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}
{%- endif %}
namespace: {{ namespace }}
spec:
running: true
template:
metadata:
labels:
{% if scale -%}
kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }}
{%- else -%}
kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}
{%- endif %}
spec:
{%- if pin == 'true' or pin == true %}
nodeSelector:
kubernetes.io/hostname: '{{ pin_node }}'
{%- endif %}
domain:
cpu:
sockets: {{ sockets }}
cores: {{ cores }}
threads: {{ threads }}
devices:
disks:
- disk:
bus: virtio
name: containerdisk
- disk:
bus: virtio
name: cloudinitdisk
interfaces:
- name: default
masquerade: {}
networkInterfaceMultiqueue: true
machine:
type: ""
resources:
requests:
memory: {{ requests_memory }}
terminationGracePeriodSeconds: 180
networks:
- name: default
pod: {}
volumes:
- name: containerdisk
containerDisk:
image: {{ fedora_container_disk }}
- name: cloudinitdisk
cloudInitNoCloud:
secretRef:
name: linstress-cloudinit-{{ trunc_uuid }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
metadata:
name: linstress
template_data:
shared:
pin_node: {{ pin_node0 }}
odf_pvc: {{ odf_pvc }}
uuid: {{ uuid }}
fedora_container_disk: {{ fedora_container_disk }}
stress_cpu: 100
stress_memory: 50
stress_duration: 600
run_type:
perf_ci:
requests_memory: 8Gi
requests_cpu: 8
cores: 1
sockets: 8
threads: 1
default:
requests_memory: 1Gi
requests_cpu: 1
cores: 1
sockets: 2
threads: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
$ErrorActionPreference = 'Stop'

$stressDir = 'C:\tools\stress'
$pythonExe = 'C:\Program Files\Python312\python.exe'
$stressScript = "$stressDir\stress.py"
$reportFile = "$stressDir\stress_report.json"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win

Unused $reportFile variable; report path duplicated instead of reused.

Static analysis flags $reportFile as assigned but never used. The literal path C:\tools\stress\stress_report.json is hardcoded again at Lines 114 and 116, risking drift if $stressDir changes.

♻️ Proposed fix
-    with open(r'C:\tools\stress\stress_report.json', 'w') as f:
+    with open(r'$reportFile', 'w') as f:
         json.dump(report, f, indent=2)
-    print('Report saved to C:\\tools\\stress\\stress_report.json')
+    print('Report saved to $reportFile')
Based on learnings, PSScriptAnalyzer flagged: "The variable 'reportFile' is assigned but never used."

Also applies to: 114-116

🧰 Tools
🪛 PSScriptAnalyzer (1.25.0)

[warning] 6-6: The variable 'reportFile' is assigned but never used.

(PSUseDeclaredVarsMoreThanAssignments)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In
`@benchmark_runner/common/template_operations/templates/winstress/internal_data/01_run_stress_template.ps1`
at line 6, The $reportFile variable in 01_run_stress_template.ps1 is assigned
but never reused, while the report path is hardcoded again later in the script.
Update the logic around $reportFile so the same variable is used wherever the
stress report path is needed, including the report-writing and report-reading
spots in the script, and remove the duplicated literal path to keep it
consistent with $stressDir.

Source: Linters/SAST tools


New-Item -ItemType Directory -Force -Path $stressDir | Out-Null

$scriptContent = @"
import multiprocessing, time, psutil, json

def burn_cpu(d, result_dict, idx):
ops = 0
start = time.time()
end = start + d
while time.time() < end:
_ = 2**32
ops += 1
elapsed = time.time() - start
result_dict[idx] = {'ops': ops, 'elapsed': elapsed, 'ops_per_sec': ops / elapsed}

def burn_memory(target_percent, duration):
total = psutil.virtual_memory().total
target_bytes = int(total * target_percent / 100)
current_used = psutil.virtual_memory().used
alloc_bytes = target_bytes - current_used
if alloc_bytes <= 0:
print(f'Memory already at {psutil.virtual_memory().percent}%, skipping')
return
chunk_size = 100 * 1024 * 1024
blocks = []
allocated = 0
while allocated < alloc_bytes:
size = min(chunk_size, alloc_bytes - allocated)
blocks.append(bytearray(size))
allocated += size
print(f'Allocated {allocated // (1024*1024)}MB / {alloc_bytes // (1024*1024)}MB ({psutil.virtual_memory().percent}%)')
print(f'Memory at {psutil.virtual_memory().percent}%, holding for {duration}s...')
time.sleep(duration)

if __name__ == '__main__':
cpu_total = multiprocessing.cpu_count()
cpu_count = max(1, int(cpu_total * {{ stress_cpu }} / 100))
duration = {{ stress_duration }}
mem_target = {{ stress_memory }}

print(f'CPU count: {cpu_total}')
print(f'Stressing {cpu_count} CPUs ({{"{{ stress_cpu }}"}}%) and {mem_target}% memory for {duration}s')
print(f'Total memory: {psutil.virtual_memory().total // (1024**3)}GB')
print(f'Memory before: {psutil.virtual_memory().percent}%')
print(f'CPU before: {psutil.cpu_percent(interval=1)}%')

mem_proc = None
if mem_target > 0:
mem_proc = multiprocessing.Process(target=burn_memory, args=(mem_target, duration))
mem_proc.start()
time.sleep(5)

manager = multiprocessing.Manager()
result_dict = manager.dict()

cpu_procs = [multiprocessing.Process(target=burn_cpu, args=(duration, result_dict, i)) for i in range(cpu_count)]
[p.start() for p in cpu_procs]

intervals = max(1, duration // 30)
samples = []
for i in range(intervals):
time.sleep(30)
mem = psutil.virtual_memory()
cpu_pct = psutil.cpu_percent(interval=1)
sample = {
'time_sec': (i+1)*30,
'cpu_percent': cpu_pct,
'mem_percent': mem.percent,
'mem_used_gb': round(mem.used / (1024**3), 1),
'mem_total_gb': round(mem.total / (1024**3), 1)
}
samples.append(sample)
print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)")

[p.join() for p in cpu_procs]
if mem_proc:
mem_proc.join()

total_ops = sum(r['ops'] for r in result_dict.values())
total_ops_per_sec = sum(r['ops_per_sec'] for r in result_dict.values())
avg_ops_per_sec = total_ops_per_sec / cpu_count if cpu_count > 0 else 0

print(f'CPU after: {psutil.cpu_percent(interval=1)}%')
print(f'Memory after: {psutil.virtual_memory().percent}%')
print(f'Total operations: {total_ops:,}')
print(f'Total throughput: {total_ops_per_sec:,.0f} ops/sec')
print(f'Avg per CPU: {avg_ops_per_sec:,.0f} ops/sec')

report = {
'config': {
'cpu_total': cpu_total,
'cpu_stressed': cpu_count,
'stress_cpu_percent': {{ stress_cpu }},
'stress_memory_percent': mem_target,
'duration_sec': duration,
'total_memory_gb': round(psutil.virtual_memory().total / (1024**3), 1)
},
'throughput': {
'total_ops': total_ops,
'total_ops_per_sec': round(total_ops_per_sec, 2),
'avg_ops_per_cpu': round(avg_ops_per_sec, 2),
'per_cpu': [{'cpu': i, 'ops': r['ops'], 'ops_per_sec': round(r['ops_per_sec'], 2)} for i, r in sorted(result_dict.items())]
},
'samples': samples
}

with open(r'C:\tools\stress\stress_report.json', 'w') as f:
json.dump(report, f, indent=2)
print('Report saved to C:\\tools\\stress\\stress_report.json')
print('Done')
"@

Set-Content -Path $stressScript -Value $scriptContent -Force
Write-Host "Running stress test..."
& $pythonExe $stressScript
Write-Host "Stress test complete"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
annotations:
cdi.kubevirt.io/storage.deleteAfterCompletion: "false"
name: windows-clone-dv

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check whether another workload template defines a DV with the same name
rg -n "windows-clone-dv" benchmark_runner/common/template_operations/templates

Repository: redhat-performance/benchmark-runner

Length of output: 1243


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect the relevant Windows templates for uniqueness/uuid usage
sed -n '1,220p' benchmark_runner/common/template_operations/templates/windows/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,240p' benchmark_runner/common/template_operations/templates/windows/internal_data/windows_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winstress/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winstress/internal_data/winstress_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winmssql/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winmssql/internal_data/winmssql_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winfio/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winfio/internal_data/winfio_vm_template.yaml

# Search for uuid usage in Windows templates
printf '\n=== uuid hits ===\n'
rg -n "uuid|windows-clone-dv" benchmark_runner/common/template_operations/templates/{windows,winstress,winmssql,winfio} -g '*.yaml'

Repository: redhat-performance/benchmark-runner

Length of output: 37077


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect Windows workload data templates for URL/uuid and whether the clone DV is intended to be shared
sed -n '1,220p' benchmark_runner/common/template_operations/templates/windows/windows_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winstress/winstress_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winmssql/winmssql_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winfio/winfio_data_template.yaml

printf '\n=== references to windows-clone-dv in Python code ===\n'
rg -n "windows-clone-dv|dataVolumeTemplates|source:\s*$|source:\n\s*pvc:" benchmark_runner -g '*.py' -g '*.yaml'

printf '\n=== any cleanup/delete logic for windows clone DV ===\n'
rg -n "deleteAfterCompletion|delete.*DataVolume|DataVolume.*delete|windows-clone" benchmark_runner -g '*.py' -g '*.yaml'

Repository: redhat-performance/benchmark-runner

Length of output: 3365


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Check whether namespace is run-specific or shared, and whether clone DV names are reused intentionally
rg -n "namespace:|name: windows-clone-dv|deleteAfterCompletion|uuid: \{\{ uuid \}\}" benchmark_runner/common/template_operations/templates -g '*.yaml' -g '*.py' -g '*.json'

printf '\n=== common template data / namespace sources ===\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/common_template.yaml
printf '\n---\n'
rg -n "namespace" benchmark_runner/main benchmark_runner/common/template_operations -g '*.py' -g '*.yaml'

Repository: redhat-performance/benchmark-runner

Length of output: 32316


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect workload namespace grouping for the Windows workloads
sed -n '120,220p' benchmark_runner/main/environment_variables.py
printf '\n---\n'
sed -n '430,470p' benchmark_runner/main/environment_variables.py

printf '\n=== windows-related workload names ===\n'
rg -n "\"(windows|winstress|winmssql|winfio)[^\"]*\"" benchmark_runner/main/environment_variables.py

Repository: redhat-performance/benchmark-runner

Length of output: 8725


Make the Windows clone DataVolume name unique

windows-clone-dv is shared by windows, winstress, winmssql, and winfio in the same benchmark-runner namespace, so concurrent Windows runs will contend for the same DataVolume object and can fail. Add a workload/run suffix such as {{ uuid }} or otherwise make this name unique.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In
`@benchmark_runner/common/template_operations/templates/winstress/internal_data/windows_dv_template.yaml`
at line 6, The DataVolume name in the Windows clone template is hardcoded and
shared across multiple Windows workloads, which can cause collisions during
concurrent runs. Update the name in windows_dv_template.yaml to be unique per
workload or per run by incorporating a suffix such as the existing template
uuid/identifier, and make sure the Windows clone path stays consistent in the
template that defines the DataVolume object.

namespace: {{ namespace }}
spec:
source:
http:
url: {{ url }}
pvc:
accessModes:
- ReadWriteMany
resources:
requests:
storage: {{ storage }}
volumeMode: Block
storageClassName: ocs-storagecluster-ceph-rbd-virtualization
Loading