-
Notifications
You must be signed in to change notification settings - Fork 24
Add winstress_vm, linstress_vm workloads and WORKLOAD_CONFIG override #1252
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,196 @@ | ||
| apiVersion: v1 | ||
| kind: Secret | ||
| metadata: | ||
| name: linstress-cloudinit-{{ trunc_uuid }} | ||
| namespace: {{ namespace }} | ||
| stringData: | ||
| userdata: | | ||
| #cloud-config | ||
| user: fedora | ||
| password: fedora | ||
| chpasswd: { expire: False } | ||
| ssh_pwauth: true | ||
| {%- if ssh_public_key is defined and ssh_public_key %} | ||
| ssh_authorized_keys: | ||
| - "{{ ssh_public_key }}" | ||
| {%- endif %} | ||
| packages: | ||
| - python3-psutil | ||
| write_files: | ||
| - path: /tmp/stress.py | ||
| permissions: '0755' | ||
| content: | | ||
| import multiprocessing, time, psutil, json | ||
|
|
||
| def burn_cpu(d, result_dict, idx): | ||
| ops = 0 | ||
| start = time.time() | ||
| end = start + d | ||
| while time.time() < end: | ||
| _ = 2**32 | ||
| ops += 1 | ||
| elapsed = time.time() - start | ||
| result_dict[idx] = {'ops': ops, 'elapsed': elapsed, 'ops_per_sec': ops / elapsed} | ||
|
|
||
| def burn_memory(target_percent, duration): | ||
| total = psutil.virtual_memory().total | ||
| target_bytes = int(total * target_percent / 100) | ||
| current_used = psutil.virtual_memory().used | ||
| alloc_bytes = target_bytes - current_used | ||
| if alloc_bytes <= 0: | ||
| print(f'Memory already at {psutil.virtual_memory().percent}%, skipping') | ||
| return | ||
| chunk_size = 100 * 1024 * 1024 | ||
| blocks = [] | ||
| allocated = 0 | ||
| while allocated < alloc_bytes: | ||
| size = min(chunk_size, alloc_bytes - allocated) | ||
| blocks.append(bytearray(size)) | ||
| allocated += size | ||
| print(f'Allocated {allocated // (1024*1024)}MB / {alloc_bytes // (1024*1024)}MB ({psutil.virtual_memory().percent}%)') | ||
| print(f'Memory at {psutil.virtual_memory().percent}%, holding for {duration}s...') | ||
| time.sleep(duration) | ||
|
|
||
| if __name__ == '__main__': | ||
| cpu_total = multiprocessing.cpu_count() | ||
| cpu_count = max(1, int(cpu_total * {{ stress_cpu }} / 100)) | ||
| duration = {{ stress_duration }} | ||
| mem_target = {{ stress_memory }} | ||
|
|
||
| print(f'CPU count: {cpu_total}') | ||
| print(f'Stressing {cpu_count} CPUs ({{ stress_cpu }}%) and {mem_target}% memory for {duration}s') | ||
| print(f'Total memory: {psutil.virtual_memory().total // (1024**3)}GB') | ||
| print(f'Memory before: {psutil.virtual_memory().percent}%') | ||
| print(f'CPU before: {psutil.cpu_percent(interval=1)}%') | ||
|
|
||
| mem_proc = None | ||
| if mem_target > 0: | ||
| mem_proc = multiprocessing.Process(target=burn_memory, args=(mem_target, duration)) | ||
| mem_proc.start() | ||
| time.sleep(5) | ||
|
|
||
| manager = multiprocessing.Manager() | ||
| result_dict = manager.dict() | ||
|
|
||
| cpu_procs = [multiprocessing.Process(target=burn_cpu, args=(duration, result_dict, i)) for i in range(cpu_count)] | ||
| [p.start() for p in cpu_procs] | ||
|
|
||
| intervals = max(1, duration // 30) | ||
| samples = [] | ||
| for i in range(intervals): | ||
| time.sleep(30) | ||
| mem = psutil.virtual_memory() | ||
| cpu_pct = psutil.cpu_percent(interval=1) | ||
| sample = { | ||
| 'time_sec': (i+1)*30, | ||
| 'cpu_percent': cpu_pct, | ||
| 'mem_percent': mem.percent, | ||
| 'mem_used_gb': round(mem.used / (1024**3), 1), | ||
| 'mem_total_gb': round(mem.total / (1024**3), 1) | ||
| } | ||
| samples.append(sample) | ||
| print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)") | ||
|
|
||
| [p.join() for p in cpu_procs] | ||
| if mem_proc: | ||
| mem_proc.join() | ||
|
|
||
| total_ops = sum(r['ops'] for r in result_dict.values()) | ||
| total_ops_per_sec = sum(r['ops_per_sec'] for r in result_dict.values()) | ||
| avg_ops_per_sec = total_ops_per_sec / cpu_count if cpu_count > 0 else 0 | ||
|
|
||
| print(f'CPU after: {psutil.cpu_percent(interval=1)}%') | ||
| print(f'Memory after: {psutil.virtual_memory().percent}%') | ||
| print(f'Total operations: {total_ops:,}') | ||
| print(f'Total throughput: {total_ops_per_sec:,.0f} ops/sec') | ||
| print(f'Avg per CPU: {avg_ops_per_sec:,.0f} ops/sec') | ||
|
|
||
| report = { | ||
| 'config': { | ||
| 'cpu_total': cpu_total, | ||
| 'cpu_stressed': cpu_count, | ||
| 'stress_cpu_percent': {{ stress_cpu }}, | ||
| 'stress_memory_percent': mem_target, | ||
| 'duration_sec': duration, | ||
| 'total_memory_gb': round(psutil.virtual_memory().total / (1024**3), 1) | ||
| }, | ||
| 'throughput': { | ||
| 'total_ops': total_ops, | ||
| 'total_ops_per_sec': round(total_ops_per_sec, 2), | ||
| 'avg_ops_per_cpu': round(avg_ops_per_sec, 2), | ||
| 'per_cpu': [{'cpu': i, 'ops': r['ops'], 'ops_per_sec': round(r['ops_per_sec'], 2)} for i, r in sorted(result_dict.items())] | ||
| }, | ||
| 'samples': samples | ||
| } | ||
|
|
||
| with open('/tmp/stress_report.json', 'w') as f: | ||
| json.dump(report, f, indent=2) | ||
| print('Report saved to /tmp/stress_report.json') | ||
| print('Done') | ||
| runcmd: | ||
| - export HOME=/root | ||
| - dnf install -y python3-psutil || true | ||
| - python3 /tmp/stress.py | ||
| --- | ||
| apiVersion: kubevirt.io/v1 | ||
| kind: VirtualMachine | ||
| metadata: | ||
| {% if scale -%} | ||
| name: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }} | ||
| labels: | ||
| kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }} | ||
| {%- else -%} | ||
| name: linstress-{{ kind }}-{{ trunc_uuid }} | ||
| labels: | ||
| kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }} | ||
| {%- endif %} | ||
| namespace: {{ namespace }} | ||
| spec: | ||
| running: true | ||
| template: | ||
| metadata: | ||
| labels: | ||
| {% if scale -%} | ||
| kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }}-{{ scale }} | ||
| {%- else -%} | ||
| kubevirt.io/vm: linstress-{{ kind }}-{{ trunc_uuid }} | ||
| {%- endif %} | ||
| spec: | ||
| {%- if pin == 'true' or pin == true %} | ||
| nodeSelector: | ||
| kubernetes.io/hostname: '{{ pin_node }}' | ||
| {%- endif %} | ||
| domain: | ||
| cpu: | ||
| sockets: {{ sockets }} | ||
| cores: {{ cores }} | ||
| threads: {{ threads }} | ||
| devices: | ||
| disks: | ||
| - disk: | ||
| bus: virtio | ||
| name: containerdisk | ||
| - disk: | ||
| bus: virtio | ||
| name: cloudinitdisk | ||
| interfaces: | ||
| - name: default | ||
| masquerade: {} | ||
| networkInterfaceMultiqueue: true | ||
| machine: | ||
| type: "" | ||
| resources: | ||
| requests: | ||
| memory: {{ requests_memory }} | ||
| terminationGracePeriodSeconds: 180 | ||
| networks: | ||
| - name: default | ||
| pod: {} | ||
| volumes: | ||
| - name: containerdisk | ||
| containerDisk: | ||
| image: {{ fedora_container_disk }} | ||
| - name: cloudinitdisk | ||
| cloudInitNoCloud: | ||
| secretRef: | ||
| name: linstress-cloudinit-{{ trunc_uuid }} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| metadata: | ||
| name: linstress | ||
| template_data: | ||
| shared: | ||
| pin_node: {{ pin_node0 }} | ||
| odf_pvc: {{ odf_pvc }} | ||
| uuid: {{ uuid }} | ||
| fedora_container_disk: {{ fedora_container_disk }} | ||
| stress_cpu: 100 | ||
| stress_memory: 50 | ||
| stress_duration: 600 | ||
| run_type: | ||
| perf_ci: | ||
| requests_memory: 8Gi | ||
| requests_cpu: 8 | ||
| cores: 1 | ||
| sockets: 8 | ||
| threads: 1 | ||
| default: | ||
| requests_memory: 1Gi | ||
| requests_cpu: 1 | ||
| cores: 1 | ||
| sockets: 2 | ||
| threads: 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| $ErrorActionPreference = 'Stop' | ||
|
|
||
| $stressDir = 'C:\tools\stress' | ||
| $pythonExe = 'C:\Program Files\Python312\python.exe' | ||
| $stressScript = "$stressDir\stress.py" | ||
| $reportFile = "$stressDir\stress_report.json" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win Unused Static analysis flags ♻️ Proposed fix- with open(r'C:\tools\stress\stress_report.json', 'w') as f:
+ with open(r'$reportFile', 'w') as f:
json.dump(report, f, indent=2)
- print('Report saved to C:\\tools\\stress\\stress_report.json')
+ print('Report saved to $reportFile')Also applies to: 114-116 🧰 Tools🪛 PSScriptAnalyzer (1.25.0)[warning] 6-6: The variable 'reportFile' is assigned but never used. (PSUseDeclaredVarsMoreThanAssignments) 🤖 Prompt for AI AgentsSource: Linters/SAST tools |
||
|
|
||
| New-Item -ItemType Directory -Force -Path $stressDir | Out-Null | ||
|
|
||
| $scriptContent = @" | ||
| import multiprocessing, time, psutil, json | ||
|
|
||
| def burn_cpu(d, result_dict, idx): | ||
| ops = 0 | ||
| start = time.time() | ||
| end = start + d | ||
| while time.time() < end: | ||
| _ = 2**32 | ||
| ops += 1 | ||
| elapsed = time.time() - start | ||
| result_dict[idx] = {'ops': ops, 'elapsed': elapsed, 'ops_per_sec': ops / elapsed} | ||
|
|
||
| def burn_memory(target_percent, duration): | ||
| total = psutil.virtual_memory().total | ||
| target_bytes = int(total * target_percent / 100) | ||
| current_used = psutil.virtual_memory().used | ||
| alloc_bytes = target_bytes - current_used | ||
| if alloc_bytes <= 0: | ||
| print(f'Memory already at {psutil.virtual_memory().percent}%, skipping') | ||
| return | ||
| chunk_size = 100 * 1024 * 1024 | ||
| blocks = [] | ||
| allocated = 0 | ||
| while allocated < alloc_bytes: | ||
| size = min(chunk_size, alloc_bytes - allocated) | ||
| blocks.append(bytearray(size)) | ||
| allocated += size | ||
| print(f'Allocated {allocated // (1024*1024)}MB / {alloc_bytes // (1024*1024)}MB ({psutil.virtual_memory().percent}%)') | ||
| print(f'Memory at {psutil.virtual_memory().percent}%, holding for {duration}s...') | ||
| time.sleep(duration) | ||
|
|
||
| if __name__ == '__main__': | ||
| cpu_total = multiprocessing.cpu_count() | ||
| cpu_count = max(1, int(cpu_total * {{ stress_cpu }} / 100)) | ||
| duration = {{ stress_duration }} | ||
| mem_target = {{ stress_memory }} | ||
|
|
||
| print(f'CPU count: {cpu_total}') | ||
| print(f'Stressing {cpu_count} CPUs ({{"{{ stress_cpu }}"}}%) and {mem_target}% memory for {duration}s') | ||
| print(f'Total memory: {psutil.virtual_memory().total // (1024**3)}GB') | ||
| print(f'Memory before: {psutil.virtual_memory().percent}%') | ||
| print(f'CPU before: {psutil.cpu_percent(interval=1)}%') | ||
|
|
||
| mem_proc = None | ||
| if mem_target > 0: | ||
| mem_proc = multiprocessing.Process(target=burn_memory, args=(mem_target, duration)) | ||
| mem_proc.start() | ||
| time.sleep(5) | ||
|
|
||
| manager = multiprocessing.Manager() | ||
| result_dict = manager.dict() | ||
|
|
||
| cpu_procs = [multiprocessing.Process(target=burn_cpu, args=(duration, result_dict, i)) for i in range(cpu_count)] | ||
| [p.start() for p in cpu_procs] | ||
|
|
||
| intervals = max(1, duration // 30) | ||
| samples = [] | ||
| for i in range(intervals): | ||
| time.sleep(30) | ||
| mem = psutil.virtual_memory() | ||
| cpu_pct = psutil.cpu_percent(interval=1) | ||
| sample = { | ||
| 'time_sec': (i+1)*30, | ||
| 'cpu_percent': cpu_pct, | ||
| 'mem_percent': mem.percent, | ||
| 'mem_used_gb': round(mem.used / (1024**3), 1), | ||
| 'mem_total_gb': round(mem.total / (1024**3), 1) | ||
| } | ||
| samples.append(sample) | ||
| print(f"At {sample['time_sec']}s: CPU={cpu_pct}% MEM={mem.percent}% ({sample['mem_used_gb']}GB/{sample['mem_total_gb']}GB)") | ||
|
|
||
| [p.join() for p in cpu_procs] | ||
| if mem_proc: | ||
| mem_proc.join() | ||
|
|
||
| total_ops = sum(r['ops'] for r in result_dict.values()) | ||
| total_ops_per_sec = sum(r['ops_per_sec'] for r in result_dict.values()) | ||
| avg_ops_per_sec = total_ops_per_sec / cpu_count if cpu_count > 0 else 0 | ||
|
|
||
| print(f'CPU after: {psutil.cpu_percent(interval=1)}%') | ||
| print(f'Memory after: {psutil.virtual_memory().percent}%') | ||
| print(f'Total operations: {total_ops:,}') | ||
| print(f'Total throughput: {total_ops_per_sec:,.0f} ops/sec') | ||
| print(f'Avg per CPU: {avg_ops_per_sec:,.0f} ops/sec') | ||
|
|
||
| report = { | ||
| 'config': { | ||
| 'cpu_total': cpu_total, | ||
| 'cpu_stressed': cpu_count, | ||
| 'stress_cpu_percent': {{ stress_cpu }}, | ||
| 'stress_memory_percent': mem_target, | ||
| 'duration_sec': duration, | ||
| 'total_memory_gb': round(psutil.virtual_memory().total / (1024**3), 1) | ||
| }, | ||
| 'throughput': { | ||
| 'total_ops': total_ops, | ||
| 'total_ops_per_sec': round(total_ops_per_sec, 2), | ||
| 'avg_ops_per_cpu': round(avg_ops_per_sec, 2), | ||
| 'per_cpu': [{'cpu': i, 'ops': r['ops'], 'ops_per_sec': round(r['ops_per_sec'], 2)} for i, r in sorted(result_dict.items())] | ||
| }, | ||
| 'samples': samples | ||
| } | ||
|
|
||
| with open(r'C:\tools\stress\stress_report.json', 'w') as f: | ||
| json.dump(report, f, indent=2) | ||
| print('Report saved to C:\\tools\\stress\\stress_report.json') | ||
| print('Done') | ||
| "@ | ||
|
|
||
| Set-Content -Path $stressScript -Value $scriptContent -Force | ||
| Write-Host "Running stress test..." | ||
| & $pythonExe $stressScript | ||
| Write-Host "Stress test complete" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| apiVersion: cdi.kubevirt.io/v1beta1 | ||
| kind: DataVolume | ||
| metadata: | ||
| annotations: | ||
| cdi.kubevirt.io/storage.deleteAfterCompletion: "false" | ||
| name: windows-clone-dv | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Check whether another workload template defines a DV with the same name
rg -n "windows-clone-dv" benchmark_runner/common/template_operations/templatesRepository: redhat-performance/benchmark-runner Length of output: 1243 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect the relevant Windows templates for uniqueness/uuid usage
sed -n '1,220p' benchmark_runner/common/template_operations/templates/windows/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,240p' benchmark_runner/common/template_operations/templates/windows/internal_data/windows_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winstress/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winstress/internal_data/winstress_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winmssql/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winmssql/internal_data/winmssql_vm_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winfio/internal_data/windows_dv_template.yaml
printf '\n---\n'
sed -n '1,260p' benchmark_runner/common/template_operations/templates/winfio/internal_data/winfio_vm_template.yaml
# Search for uuid usage in Windows templates
printf '\n=== uuid hits ===\n'
rg -n "uuid|windows-clone-dv" benchmark_runner/common/template_operations/templates/{windows,winstress,winmssql,winfio} -g '*.yaml'Repository: redhat-performance/benchmark-runner Length of output: 37077 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect Windows workload data templates for URL/uuid and whether the clone DV is intended to be shared
sed -n '1,220p' benchmark_runner/common/template_operations/templates/windows/windows_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winstress/winstress_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winmssql/winmssql_data_template.yaml
printf '\n---\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/winfio/winfio_data_template.yaml
printf '\n=== references to windows-clone-dv in Python code ===\n'
rg -n "windows-clone-dv|dataVolumeTemplates|source:\s*$|source:\n\s*pvc:" benchmark_runner -g '*.py' -g '*.yaml'
printf '\n=== any cleanup/delete logic for windows clone DV ===\n'
rg -n "deleteAfterCompletion|delete.*DataVolume|DataVolume.*delete|windows-clone" benchmark_runner -g '*.py' -g '*.yaml'Repository: redhat-performance/benchmark-runner Length of output: 3365 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Check whether namespace is run-specific or shared, and whether clone DV names are reused intentionally
rg -n "namespace:|name: windows-clone-dv|deleteAfterCompletion|uuid: \{\{ uuid \}\}" benchmark_runner/common/template_operations/templates -g '*.yaml' -g '*.py' -g '*.json'
printf '\n=== common template data / namespace sources ===\n'
sed -n '1,220p' benchmark_runner/common/template_operations/templates/common_template.yaml
printf '\n---\n'
rg -n "namespace" benchmark_runner/main benchmark_runner/common/template_operations -g '*.py' -g '*.yaml'Repository: redhat-performance/benchmark-runner Length of output: 32316 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect workload namespace grouping for the Windows workloads
sed -n '120,220p' benchmark_runner/main/environment_variables.py
printf '\n---\n'
sed -n '430,470p' benchmark_runner/main/environment_variables.py
printf '\n=== windows-related workload names ===\n'
rg -n "\"(windows|winstress|winmssql|winfio)[^\"]*\"" benchmark_runner/main/environment_variables.pyRepository: redhat-performance/benchmark-runner Length of output: 8725 Make the Windows clone DataVolume name unique
🤖 Prompt for AI Agents |
||
| namespace: {{ namespace }} | ||
| spec: | ||
| source: | ||
| http: | ||
| url: {{ url }} | ||
| pvc: | ||
| accessModes: | ||
| - ReadWriteMany | ||
| resources: | ||
| requests: | ||
| storage: {{ storage }} | ||
| volumeMode: Block | ||
| storageClassName: ocs-storagecluster-ceph-rbd-virtualization | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🎯 Functional Correctness | 🟠 Major | ⚡ Quick win
Monitoring loop can sleep past the actual stress duration.
intervals = max(1, duration // 30)forces at least onetime.sleep(30)iteration even whenduration < 30. In that caseburn_cpu/burn_memoryworkers finish long before the monitor wakes up, so the captured CPU/memory samples reflect post-stress idle state rather than the actual stress window — producing misleading benchmark data for short-duration runs (a common case for quick/test_ciiterations).🐛 Proposed fix using elapsed-time tracking instead of a fixed iteration count
📝 Committable suggestion
🤖 Prompt for AI Agents