Skip to content

Commit e07970c

Browse files
authored
Fix Batching Coordinator Task Explosion (#8832)
1 parent 475b5c7 commit e07970c

File tree

74 files changed

+4005
-56
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+4005
-56
lines changed

.clabot

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"contributors": [],
3+
"allowlist": [
4+
"github-actions[bot]",
5+
"dependabot[bot]"
6+
]
7+
}

.github/workflows/ci.yml

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ on:
55
branches:
66
- main
77
- main-version-*
8+
paths-ignore:
9+
- 'src/HotChocolate/AspNetCore/benchmarks/k6/performance-data.json'
810

911
concurrency:
1012
group: ci-new-2-${{ github.event.pull_request.number }}
@@ -257,9 +259,170 @@ jobs:
257259
flags: unittests
258260
fail_ci_if_error: true
259261

262+
performance-tests:
263+
name: "Performance Tests"
264+
needs: check-changes
265+
if: needs.check-changes.outputs.src_changes == 'true' && github.event.pull_request.draft == false
266+
runs-on: ubuntu-latest
267+
permissions:
268+
contents: write
269+
pull-requests: write
270+
271+
steps:
272+
- name: Checkout PR code
273+
uses: actions/checkout@v4
274+
with:
275+
fetch-depth: 0
276+
show-progress: false
277+
278+
- name: Setup .NET
279+
uses: actions/setup-dotnet@v4
280+
with:
281+
dotnet-version: |
282+
8.x
283+
9.x
284+
10.x
285+
286+
- name: Install k6
287+
run: |
288+
sudo gpg -k
289+
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D69
290+
echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
291+
sudo apt-get update
292+
sudo apt-get install k6
293+
294+
- name: Install jq
295+
run: sudo apt-get install -y jq
296+
297+
- name: Restore dependencies
298+
run: dotnet restore src/HotChocolate/AspNetCore/benchmarks/k6/eShop.slnx
299+
300+
- name: Start AppHost and wait for readiness
301+
working-directory: src/HotChocolate/AspNetCore/benchmarks/k6/Catalog.AppHost
302+
run: |
303+
echo "Starting AppHost..."
304+
dotnet run > /tmp/apphost.log 2>&1 &
305+
APPHOST_PID=$!
306+
echo "APPHOST_PID=$APPHOST_PID" >> $GITHUB_ENV
307+
308+
echo "Waiting for server to be ready..."
309+
for i in {1..30}; do
310+
if curl -s -o /dev/null -w "%{http_code}" http://localhost:5224/graphql -X POST \
311+
-H "Content-Type: application/json" \
312+
-d '{"query": "{ __typename }"}' | grep -q "200"; then
313+
echo "Server is ready!"
314+
break
315+
fi
316+
echo "Waiting... ($i/30)"
317+
sleep 2
318+
done
319+
320+
- name: Run performance tests and collect data
321+
working-directory: src/HotChocolate/AspNetCore/benchmarks/k6
322+
run: |
323+
chmod +x run-and-collect.sh
324+
./run-and-collect.sh performance-data-current.json
325+
326+
- name: Stop AppHost
327+
if: always()
328+
run: |
329+
if [ -n "$APPHOST_PID" ]; then
330+
kill $APPHOST_PID 2>/dev/null || true
331+
wait $APPHOST_PID 2>/dev/null || true
332+
fi
333+
334+
- name: Commit and push performance data to current branch
335+
working-directory: src/HotChocolate/AspNetCore/benchmarks/k6
336+
run: |
337+
# Copy the performance data to the tracked filename
338+
cp performance-data-current.json performance-data.json
339+
340+
# Configure git
341+
git config user.name "github-actions[bot]"
342+
git config user.email "github-actions[bot]@users.noreply.github.com"
343+
344+
# Add and commit the performance data
345+
git add performance-data.json
346+
347+
# Only commit if there are changes
348+
if ! git diff --staged --quiet; then
349+
git commit -m "Update performance data [skip ci]"
350+
git push origin HEAD:${{ github.head_ref }}
351+
else
352+
echo "No changes to performance data"
353+
fi
354+
355+
- name: Fetch baseline performance data from main
356+
run: |
357+
git fetch origin main:main
358+
if git show main:src/HotChocolate/AspNetCore/benchmarks/k6/performance-data.json > baseline-performance.json 2>/dev/null; then
359+
echo "Baseline data fetched successfully"
360+
else
361+
echo "No baseline data found on main branch"
362+
echo '{}' > baseline-performance.json
363+
fi
364+
365+
- name: Compare performance and generate report
366+
working-directory: src/HotChocolate/AspNetCore/benchmarks/k6
367+
run: |
368+
chmod +x compare-performance.sh
369+
./compare-performance.sh performance-data-current.json ../../../../baseline-performance.json performance-report.md
370+
371+
- name: Comment PR with performance report
372+
uses: actions/github-script@v7
373+
with:
374+
github-token: ${{ secrets.GITHUB_TOKEN }}
375+
script: |
376+
const fs = require('fs');
377+
const reportPath = 'src/HotChocolate/AspNetCore/benchmarks/k6/performance-report.md';
378+
379+
let report;
380+
try {
381+
report = fs.readFileSync(reportPath, 'utf8');
382+
} catch (error) {
383+
console.error('Failed to read performance report:', error);
384+
return;
385+
}
386+
387+
// Add timestamp and commit info to the report
388+
const timestamp = new Date().toUTCString();
389+
const commitSha = context.sha.substring(0, 7);
390+
const runNumber = context.runNumber;
391+
392+
const commentBody = `${report}\n\n---\n*Run #${runNumber} • Commit ${commitSha} • ${timestamp}*`;
393+
394+
// Always create a new comment
395+
await github.rest.issues.createComment({
396+
owner: context.repo.owner,
397+
repo: context.repo.repo,
398+
issue_number: context.issue.number,
399+
body: commentBody,
400+
});
401+
402+
- name: Upload performance data as artifact
403+
uses: actions/upload-artifact@v4
404+
if: always()
405+
with:
406+
name: performance-data
407+
path: |
408+
src/HotChocolate/AspNetCore/benchmarks/k6/performance-data-current.json
409+
src/HotChocolate/AspNetCore/benchmarks/k6/performance-report.md
410+
/tmp/apphost.log
411+
retention-days: 30
412+
413+
- name: Check for performance regression
414+
working-directory: src/HotChocolate/AspNetCore/benchmarks/k6
415+
run: |
416+
# Fail the build if there's a significant performance regression
417+
if grep -q "⚠️ \*\*Performance regression detected" performance-report.md; then
418+
echo "::warning::Performance regression detected! Please review the performance report."
419+
# Uncomment the next line to fail the build on regression
420+
# exit 1
421+
fi
422+
260423
ci-status-check:
261424
name: "CI Status Check"
262-
needs: [library-tests, website-tests]
425+
needs: [library-tests, website-tests, performance-tests]
263426
if: always()
264427
runs-on: ubuntu-latest
265428
steps:
@@ -268,4 +431,5 @@ jobs:
268431
if: |
269432
always() &&
270433
(needs.library-tests.result == 'failure' ||
271-
needs.website-tests.result == 'failure')
434+
needs.website-tests.result == 'failure' ||
435+
needs.performance-tests.result == 'failure')

src/All.slnx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@
3535
<Project Path="HotChocolate/ApolloFederation/test/ApolloFederation.Tests/HotChocolate.ApolloFederation.Tests.csproj" />
3636
</Folder>
3737
<Folder Name="/HotChocolate/AspNetCore/" />
38+
<Folder Name="/HotChocolate/AspNetCore/benchmarks/" />
39+
<Folder Name="/HotChocolate/AspNetCore/benchmarks/k6/">
40+
<Project Path="HotChocolate/AspNetCore/benchmarks/k6/Catalog.AppHost/eShop.Catalog.AppHost.csproj" />
41+
<Project Path="HotChocolate/AspNetCore/benchmarks/k6/Catalog.API/eShop.Catalog.API.csproj" />
42+
<Project Path="HotChocolate/AspNetCore/benchmarks/k6/Catalog.ServiceDefaults/eShop.Catalog.ServiceDefaults.csproj" />
43+
</Folder>
3844
<Folder Name="/HotChocolate/AspNetCore/src/">
3945
<Project Path="HotChocolate/AspNetCore/src/AspNetCore.Authorization.Opa/HotChocolate.AspNetCore.Authorization.Opa.csproj" />
4046
<Project Path="HotChocolate/AspNetCore/src/AspNetCore.Authorization/HotChocolate.AspNetCore.Authorization.csproj" />

src/Directory.Packages.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
<PackageVersion Include="Aspire.Hosting.RabbitMQ" Version="9.4.1" />
1010
<PackageVersion Include="Aspire.Hosting.Redis" Version="9.4.1" />
1111
<PackageVersion Include="Aspire.Hosting" Version="9.4.1" />
12+
<PackageVersion Include="Aspire.AppHost.Sdk" Version="1.9.0" />
1213
<PackageVersion Include="Azure.Storage.Blobs" Version="12.23.0" />
1314
<PackageVersion Include="Basic.Reference.Assemblies.Net100" Version="1.8.3" />
1415
<PackageVersion Include="Basic.Reference.Assemblies.Net80" Version="1.8.3" />

src/GreenDonut/src/GreenDonut.Abstractions/Batch.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ public abstract class Batch
3838
/// </summary>
3939
public abstract BatchStatus Status { get; }
4040

41+
/// <summary>
42+
/// Gets a high-resolution timestamp representing when this batch was first created.
43+
/// This value is used to enforce maximum batch age timeouts to prevent starvation.
44+
/// </summary>
45+
public abstract long CreatedTimestamp { get; }
46+
4147
/// <summary>
4248
/// Gets a high-resolution timestamp from representing the last time an item was added to this batch.
4349
/// This value is used for recency checks in scheduling decisions.

src/GreenDonut/src/GreenDonut/Batch.cs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ internal sealed class Batch<TKey> : Batch where TKey : notnull
1212
private Func<Batch<TKey>, CancellationToken, ValueTask> _dispatch = null!;
1313
private CancellationToken _ct = CancellationToken.None;
1414
private int _status = Enqueued;
15-
private long _timestamp;
15+
private long _createdTimestamp;
16+
private long _modifiedTimestamp;
17+
private CancellationTokenRegistration _promiseCancellationRegistration;
1618

1719
public bool IsScheduled { get; set; }
1820

@@ -22,7 +24,9 @@ internal sealed class Batch<TKey> : Batch where TKey : notnull
2224

2325
public override BatchStatus Status => (BatchStatus)_status;
2426

25-
public override long ModifiedTimestamp => _timestamp;
27+
public override long CreatedTimestamp => _createdTimestamp;
28+
29+
public override long ModifiedTimestamp => _modifiedTimestamp;
2630

2731
public override bool Touch()
2832
{
@@ -36,7 +40,7 @@ public Promise<TValue> GetOrCreatePromise<TValue>(TKey key, bool allowCachePropa
3640
// as long as there are components interacting with this batch its good to
3741
// keep it in enqueued state.
3842
Interlocked.Exchange(ref _status, Enqueued);
39-
_timestamp = Stopwatch.GetTimestamp();
43+
_modifiedTimestamp = Stopwatch.GetTimestamp();
4044

4145
if (_items.TryGetValue(key, out var value))
4246
{
@@ -55,13 +59,19 @@ public Promise<TValue> GetPromise<TValue>(TKey key)
5559
=> (Promise<TValue>)_items[key];
5660

5761
public override async Task DispatchAsync()
58-
=> await _dispatch(this, _ct);
62+
{
63+
await _dispatch(this, _ct);
64+
_promiseCancellationRegistration.Dispose();
65+
}
5966

6067
internal void Initialize(Func<Batch<TKey>, CancellationToken, ValueTask> dispatch, CancellationToken ct)
6168
{
6269
_status = Enqueued;
6370
_dispatch = dispatch;
6471
_ct = ct;
72+
_createdTimestamp = Stopwatch.GetTimestamp();
73+
_modifiedTimestamp = _createdTimestamp;
74+
_promiseCancellationRegistration = ct.Register(TryCancelPromises);
6575
}
6676

6777
internal void ClearUnsafe()
@@ -72,5 +82,16 @@ internal void ClearUnsafe()
7282
_status = Enqueued;
7383
_dispatch = null!;
7484
_ct = CancellationToken.None;
85+
_createdTimestamp = 0;
86+
_modifiedTimestamp = 0;
87+
_promiseCancellationRegistration.Dispose();
88+
}
89+
90+
internal void TryCancelPromises()
91+
{
92+
foreach (var item in _items.Values)
93+
{
94+
item.TryCancel();
95+
}
7596
}
7697
}

src/GreenDonut/src/GreenDonut/Instrumentation/AggregateDataLoaderDiagnosticEventListener.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,46 @@ public override void BatchItemError<TKey>(
6060
}
6161
}
6262

63+
/// <inheritdoc />
64+
public override IDisposable RunBatchDispatchCoordinator()
65+
{
66+
var scopes = new IDisposable[listeners.Length];
67+
68+
for (var i = 0; i < listeners.Length; i++)
69+
{
70+
scopes[i] = listeners[i].RunBatchDispatchCoordinator();
71+
}
72+
73+
return new AggregateEventScope(scopes);
74+
}
75+
76+
/// <inheritdoc />
77+
public override void BatchDispatchError(Exception error)
78+
{
79+
for (var i = 0; i < listeners.Length; i++)
80+
{
81+
listeners[i].BatchDispatchError(error);
82+
}
83+
}
84+
85+
/// <inheritdoc />
86+
public override void BatchEvaluated(int openBatches)
87+
{
88+
for (var i = 0; i < listeners.Length; i++)
89+
{
90+
listeners[i].BatchEvaluated(openBatches);
91+
}
92+
}
93+
94+
/// <inheritdoc />
95+
public override void BatchDispatched(int dispatchedBatches, bool inParallel)
96+
{
97+
for (var i = 0; i < listeners.Length; i++)
98+
{
99+
listeners[i].BatchDispatched(dispatchedBatches, inParallel);
100+
}
101+
}
102+
63103
private sealed class AggregateEventScope(IDisposable[] scopes) : IDisposable
64104
{
65105
public void Dispose()

src/GreenDonut/src/GreenDonut/Instrumentation/DataLoaderDiagnosticEventListener.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ public virtual void BatchItemError<TKey>(
4646
where TKey : notnull
4747
{ }
4848

49+
/// <inheritdoc />
50+
public virtual IDisposable RunBatchDispatchCoordinator()
51+
=> EmptyScope;
52+
53+
/// <inheritdoc />
54+
public virtual void BatchDispatchError(Exception error)
55+
{ }
56+
57+
/// <inheritdoc />
58+
public virtual void BatchEvaluated(int openBatches)
59+
{ }
60+
61+
/// <inheritdoc />
62+
public virtual void BatchDispatched(int dispatchedBatches, bool inParallel)
63+
{ }
64+
4965
private sealed class EmptyActivityScope : IDisposable
5066
{
5167
public void Dispose() { }

0 commit comments

Comments
 (0)