Skip to content

Commit ae12dfc

Browse files
committed
benchmark: fix calibrate-n option handling
Fix broken parsing for --cv-threshold and --max-increases so the value after the equals sign is used. Use the configured CV threshold when checking overall stability and report the configured thresholds in calibrate-n output. Also handle invalid numeric values for --runs and --cv-threshold consistently with the other numeric options. Signed-off-by: Luan Muniz <luan@luanmuniz.com.br>
1 parent ccc2d7c commit ae12dfc

1 file changed

Lines changed: 32 additions & 21 deletions

File tree

benchmark/calibrate-n.js

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ const { styleText } = require('node:util');
88
const DEFAULT_RUNS = 30; // Number of runs for each n value
99
const CV_THRESHOLD = 0.05; // 5% coefficient of variation threshold
1010
const MAX_N_INCREASE = 6; // Maximum number of times to increase n (10**6)
11+
const MAX_CV_THRESHOLD = 0.10; // 10% coefficient of variation threshold for individual configurations
1112
const INCREASE_FACTOR = 10; // Factor by which to increase n
13+
const START_N = 10; // Starting n value (10 iterations)
1214

1315
const args = process.argv.slice(2);
1416
if (args.length === 0) {
@@ -19,7 +21,7 @@ Options:
1921
--runs=N Number of runs for each n value (default: ${DEFAULT_RUNS})
2022
--cv-threshold=N Target coefficient of variation threshold (default: ${CV_THRESHOLD})
2123
--max-increases=N Maximum number of n increases to try (default: ${MAX_N_INCREASE})
22-
--start-n=N Initial n value to start with (default: autodetect)
24+
--start-n=N Initial n value to start with (default: ${START_N})
2325
--increase=N Factor by which to increase n (default: ${INCREASE_FACTOR})
2426
2527
Example:
@@ -34,25 +36,33 @@ let benchmarkPath;
3436
let runs = DEFAULT_RUNS;
3537
let cvThreshold = CV_THRESHOLD;
3638
let maxIncreases = MAX_N_INCREASE;
37-
let startN = 10;
39+
let startN = START_N;
3840
let increaseFactor = INCREASE_FACTOR;
3941

4042
for (const arg of args) {
4143
if (arg.startsWith('--runs=')) {
4244
runs = parseInt(arg.substring(7), 10);
45+
if (isNaN(runs)) {
46+
console.error(`Error: Invalid value for --runs. Using default: ${DEFAULT_RUNS}`);
47+
runs = DEFAULT_RUNS;
48+
}
4349
} else if (arg.startsWith('--cv-threshold=')) {
44-
cvThreshold = parseFloat(arg.substring(14));
50+
cvThreshold = parseFloat(arg.substring(15));
51+
if (isNaN(cvThreshold)) {
52+
console.error(`Error: Invalid value for --cv-threshold. Using default: ${CV_THRESHOLD}`);
53+
cvThreshold = CV_THRESHOLD;
54+
}
4555
} else if (arg.startsWith('--max-increases=')) {
46-
maxIncreases = parseInt(arg.substring(15), 10);
56+
maxIncreases = parseInt(arg.substring(16), 10);
4757
if (isNaN(maxIncreases)) {
4858
console.error(`Error: Invalid value for --max-increases. Using default: ${MAX_N_INCREASE}`);
4959
maxIncreases = MAX_N_INCREASE;
5060
}
5161
} else if (arg.startsWith('--start-n=')) {
5262
startN = parseInt(arg.substring(10), 10);
5363
if (isNaN(startN)) {
54-
console.error(`Error: Invalid value for --start-n. Using default: 10`);
55-
startN = 10;
64+
console.error(`Error: Invalid value for --start-n. Using default: ${START_N}`);
65+
startN = START_N;
5666
}
5767
} else if (arg.startsWith('--increase=')) {
5868
increaseFactor = parseInt(arg.substring(11), 10);
@@ -125,6 +135,7 @@ async function main(n = startN) {
125135
let bestN = n;
126136
let bestCV = Infinity;
127137
let bestGroupStats = null;
138+
const cvThresholdPercentage = (cvThreshold * 100).toFixed(2);
128139

129140
console.log(`
130141
--------------------------------------------------------
@@ -136,12 +147,12 @@ that produces consistent benchmark results without wasting time.
136147
How it works:
137148
1. Run the benchmark multiple times with a specific n value
138149
2. Group results by configuration
139-
3. If overall CV is above 5% or any configuration has CV above 10%, increase n and try again
150+
3. If overall CV is above ${cvThresholdPercentage}% or any configuration has CV above ${MAX_CV_THRESHOLD * 100}%, increase n and try again
140151
141152
Configuration:
142153
- Starting n: ${n.toLocaleString()} iterations
143154
- Runs per n value: ${runs}
144-
- Target CV threshold: ${cvThreshold * 100}% (lower CV = more stable results)
155+
- Target CV threshold: ${cvThresholdPercentage}% (lower CV = more stable results)
145156
- Max increases: ${maxIncreases}
146157
- Increase factor: ${increaseFactor}x`);
147158

@@ -195,23 +206,23 @@ Configuration:
195206

196207
if (groupStats.length > 0) {
197208
// Check if any configuration has CV > 10% (too unstable)
198-
const tooUnstableConfigs = groupStats.filter((g) => g.stats.cv > 0.10);
209+
const tooUnstableConfigs = groupStats.filter((g) => g.stats.cv > MAX_CV_THRESHOLD);
199210

200211
const avgCV = groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length;
201212
console.log(`\nOverall average CV: ${(avgCV * 100).toFixed(2)}%`);
202213

203-
const isOverallStable = avgCV < CV_THRESHOLD;
214+
const isOverallStable = avgCV < cvThreshold;
204215
const hasVeryUnstableConfigs = tooUnstableConfigs.length > 0;
205216

206-
// Check if overall CV is below CV_THRESHOLD and no configuration has CV > 10%
217+
// Check if overall CV is below cvThreshold and no configuration has CV > MAX_CV_THRESHOLD
207218
if (isOverallStable && !hasVeryUnstableConfigs) {
208-
console.log(styleText(['bold', 'green'], ` ✓ Overall CV is below 5% and no configuration has CV above 10%`));
219+
console.log(styleText(['bold', 'green'], ` ✓ Overall CV is below ${cvThresholdPercentage}% and no configuration has CV above ${MAX_CV_THRESHOLD * 100}%`));
209220
} else {
210221
if (!isOverallStable) {
211-
console.log(styleText(['bold', 'red'], ` ✗ Overall CV (${(avgCV * 100).toFixed(2)}%) is above 5%`));
222+
console.log(styleText(['bold', 'red'], ` ✗ Overall CV (${(avgCV * 100).toFixed(2)}%) is above ${cvThresholdPercentage}%`));
212223
}
213224
if (hasVeryUnstableConfigs) {
214-
console.log(styleText(['bold', 'red'], ` ✗ ${tooUnstableConfigs.length} configuration(s) have CV above 10%`));
225+
console.log(styleText(['bold', 'red'], ` ✗ ${tooUnstableConfigs.length} configuration(s) have CV above ${MAX_CV_THRESHOLD * 100}%`));
215226
}
216227
}
217228

@@ -226,7 +237,7 @@ Configuration:
226237
bestGroupStats.push({
227238
conf: group.conf,
228239
stats: stats,
229-
isStable: stats.cv <= 0.10,
240+
isStable: stats.cv <= MAX_CV_THRESHOLD,
230241
});
231242
}
232243
}
@@ -237,15 +248,15 @@ Configuration:
237248
}
238249

239250
// Check if we've reached acceptable stability based on new criteria
240-
// 1. Overall CV should be below CV_THRESHOLD
241-
// 2. No configuration should have a CV greater than 10%
251+
// 1. Overall CV should be below cvThreshold
252+
// 2. No configuration should have a CV greater than MAX_CV_THRESHOLD
242253
const avgCV = groupStats.length > 0 ?
243254
groupStats.reduce((sum, g) => sum + g.stats.cv, 0) / groupStats.length : Infinity;
244-
const hasUnstableConfig = groupStats.some((g) => g.stats.cv > 0.10);
245-
const isOverallStable = avgCV < CV_THRESHOLD;
255+
const hasUnstableConfig = groupStats.some((g) => g.stats.cv > MAX_CV_THRESHOLD);
256+
const isOverallStable = avgCV < cvThreshold;
246257

247258
if (isOverallStable && !hasUnstableConfig) {
248-
console.log(`\n✓ Found optimal n=${n} (Overall CV=${(avgCV * 100).toFixed(2)}% < 5% and no configuration has CV > 10%)`);
259+
console.log(`\n✓ Found optimal n=${n} (Overall CV=${(avgCV * 100).toFixed(2)}% < ${cvThresholdPercentage}% and no configuration has CV > ${MAX_CV_THRESHOLD * 100}%)`);
249260
console.log('\nFinal CV for each configuration:');
250261
groupStats.forEach((g) => {
251262
console.log(` ${JSON.stringify(groupedResults[g.confKey].conf)}: ${(g.stats.cv * 100).toFixed(2)}%`);
@@ -271,7 +282,7 @@ Configuration:
271282
if (g.conf) {
272283
console.log(` ${JSON.stringify(g.conf)}: ${(g.stats.cv * 100).toFixed(2)}%`);
273284
if (g.stats.cv > cvThreshold) {
274-
console.log(` ⚠️ This configuration is above the target threshold of ${cvThreshold * 100}%`);
285+
console.log(` ⚠️ This configuration is above the target threshold of ${cvThresholdPercentage}%`);
275286
}
276287
}
277288
});

0 commit comments

Comments
 (0)