Skip to content

Commit f7854bc

Browse files
committed
scheduler: fixed a bug where the bandwidth of reserved cores were not taken into account
1 parent a55a0fd commit f7854bc

File tree

9 files changed

+148
-43
lines changed

9 files changed

+148
-43
lines changed

.changelog/26768.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
scheduler: fixed a bug where the bandwidth of reserved cores were not taken into account
3+
```

client/client.go

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,8 +1816,19 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp
18161816
}
18171817

18181818
// update config with total cpu compute if it was detected
1819-
if cpu := response.NodeResources.Processors.TotalCompute(); cpu > 0 {
1820-
newConfig.CpuCompute = cpu
1819+
totalCompute := response.NodeResources.Processors.TotalCompute()
1820+
usableCompute := response.NodeResources.Processors.UsableCompute()
1821+
if totalCompute > 0 {
1822+
if newConfig.CpuCompute != totalCompute {
1823+
newConfig.CpuCompute = totalCompute
1824+
nodeHasChanged = true
1825+
}
1826+
1827+
reservedCompute := int64(totalCompute - usableCompute)
1828+
if newConfig.Node.ReservedResources.Cpu.CpuShares != reservedCompute {
1829+
newConfig.Node.ReservedResources.Cpu.CpuShares = reservedCompute
1830+
nodeHasChanged = true
1831+
}
18211832
}
18221833
}
18231834

@@ -3323,8 +3334,8 @@ func (c *Client) setGaugeForDiskStats(hStats *hoststats.HostStats, baseLabels []
33233334
// setGaugeForAllocationStats proxies metrics for allocation specific statistics
33243335
func (c *Client) setGaugeForAllocationStats(baseLabels []metrics.Label) {
33253336
node := c.GetConfig().Node
3326-
total := node.NodeResources
3327-
res := node.ReservedResources
3337+
3338+
available := node.Comparable()
33283339
allocated := c.getAllocatedResources(node)
33293340

33303341
// Emit allocated
@@ -3342,20 +3353,15 @@ func (c *Client) setGaugeForAllocationStats(baseLabels []metrics.Label) {
33423353
}
33433354

33443355
// Emit unallocated
3345-
unallocatedMem := total.Memory.MemoryMB - res.Memory.MemoryMB - allocated.Flattened.Memory.MemoryMB
3346-
unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB
3347-
3348-
// The UsableCompute function call already subtracts and accounts for any
3349-
// reserved CPU within the client configuration. Therefore, we do not need
3350-
// to subtract that here.
3351-
unallocatedCpu := int64(total.Processors.Topology.UsableCompute()) - allocated.Flattened.Cpu.CpuShares
3356+
unallocatedMem := available.Flattened.Memory.MemoryMB - allocated.Flattened.Memory.MemoryMB
3357+
unallocatedDisk := available.Shared.DiskMB - allocated.Shared.DiskMB
3358+
unallocatedCpu := available.Flattened.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares
33523359

33533360
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels)
33543361
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels)
33553362
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels)
33563363

3357-
totalComparable := total.Comparable()
3358-
for _, n := range totalComparable.Flattened.Networks {
3364+
for _, n := range available.Flattened.Networks {
33593365
// Determined the used resources
33603366
var usedMbits int
33613367
totalIdx := allocated.Flattened.Networks.NetIndex(n)

client/client_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,56 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) {
16881688

16891689
}
16901690

1691+
func TestClient_UpdateNodeFromFingerprintCalculatesReservedResources(t *testing.T) {
1692+
ci.Parallel(t)
1693+
1694+
client, cleanup := TestClient(t, func(c *config.Config) {
1695+
// Parsed version of the client reserved field
1696+
c.Node.ReservedResources.Cpu = structs.NodeReservedCpuResources{
1697+
CpuShares: 100,
1698+
ReservedCpuCores: []uint16{0},
1699+
}
1700+
})
1701+
defer cleanup()
1702+
1703+
// Set up a basic topology where the first core is reserved and
1704+
// an additionally 100 MHz are reserved
1705+
basicTopology := structs.MockBasicTopology()
1706+
basicTopology.Cores[0].Disable = true
1707+
basicTopology.OverrideWitholdCompute = 100
1708+
client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{
1709+
// overrides the detected hardware in TestClient
1710+
NodeResources: &structs.NodeResources{
1711+
Memory: structs.NodeMemoryResources{MemoryMB: 1024},
1712+
Processors: structs.NodeProcessorResources{
1713+
Topology: basicTopology,
1714+
},
1715+
},
1716+
})
1717+
1718+
// initial check
1719+
conf := client.GetConfig()
1720+
1721+
expectedReservedResources := &structs.NodeReservedResources{
1722+
Cpu: structs.NodeReservedCpuResources{
1723+
// set by fingerprinting callback, once the total compute has been detected, it should have converted the
1724+
// client reserved configuration into the effective reserved bandwidth (1 core * 3500 MHz + 100 MHz)
1725+
CpuShares: 3_600,
1726+
ReservedCpuCores: []uint16{0},
1727+
},
1728+
Memory: structs.NodeReservedMemoryResources{
1729+
MemoryMB: 256,
1730+
},
1731+
Disk: structs.NodeReservedDiskResources{
1732+
DiskMB: 4096,
1733+
},
1734+
Networks: structs.NodeReservedNetworkResources{
1735+
ReservedHostPorts: "22",
1736+
},
1737+
}
1738+
must.Eq(t, expectedReservedResources, conf.Node.ReservedResources)
1739+
}
1740+
16911741
// TestClient_UpdateNodeFromFingerprintKeepsConfig asserts manually configured
16921742
// network interfaces take precedence over fingerprinted ones.
16931743
func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) {

client/lib/numalib/topology.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,17 @@ func (st *Topology) NumECores() int {
282282
return total
283283
}
284284

285-
// UsableCores returns the number of logical cores usable by the Nomad client
285+
// AllCores returns the set of logical cores detected. The UsableCores will
286+
// be equal to or less than this value.
287+
func (st *Topology) AllCores() *idset.Set[hw.CoreID] {
288+
result := idset.Empty[hw.CoreID]()
289+
for _, cpu := range st.Cores {
290+
result.Insert(cpu.ID)
291+
}
292+
return result
293+
}
294+
295+
// UsableCores returns the set of logical cores usable by the Nomad client
286296
// for running tasks. Nomad must subtract off any reserved cores (reserved.cores)
287297
// and/or must mask the cpuset to the one set in config (config.reservable_cores).
288298
func (st *Topology) UsableCores() *idset.Set[hw.CoreID] {

nomad/structs/funcs.go

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -189,10 +189,8 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
189189
return false, "cores", used, nil
190190
}
191191

192-
// Check that the node resources (after subtracting reserved) are a
193-
// super set of those that are being allocated
194-
available := node.NodeResources.Comparable()
195-
available.Subtract(node.ReservedResources.Comparable())
192+
// Check that the node resources are a super set of those that are being allocated
193+
available := node.Comparable()
196194
if superset, dimension := available.Superset(used); !superset {
197195
return false, dimension, used, nil
198196
}
@@ -232,20 +230,12 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi
232230
}
233231

234232
func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) {
235-
reserved := node.ReservedResources.Comparable()
236-
res := node.NodeResources.Comparable()
237-
238233
// Determine the node availability
239-
nodeCpu := float64(res.Flattened.Cpu.CpuShares)
240-
nodeMem := float64(res.Flattened.Memory.MemoryMB)
241-
if reserved != nil {
242-
nodeCpu -= float64(reserved.Flattened.Cpu.CpuShares)
243-
nodeMem -= float64(reserved.Flattened.Memory.MemoryMB)
244-
}
234+
available := node.Comparable()
245235

246236
// Compute the free percentage
247-
freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / nodeCpu)
248-
freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / nodeMem)
237+
freePctCpu = 1 - (float64(util.Flattened.Cpu.CpuShares) / float64(available.Flattened.Cpu.CpuShares))
238+
freePctRam = 1 - (float64(util.Flattened.Memory.MemoryMB) / float64(available.Flattened.Memory.MemoryMB))
249239
return freePctCpu, freePctRam
250240
}
251241

nomad/structs/funcs_test.go

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ func node2k() *Node {
106106
ID: 1,
107107
Grade: numalib.Performance,
108108
BaseSpeed: 1000,
109+
}, {
110+
ID: 2,
111+
Grade: numalib.Performance,
112+
BaseSpeed: 1000,
113+
Disable: true,
114+
}, {
115+
ID: 3,
116+
Grade: numalib.Performance,
117+
BaseSpeed: 1000,
118+
Disable: true,
109119
}},
110120
OverrideWitholdCompute: 1000, // set by client reserved field
111121
},
@@ -137,7 +147,10 @@ func node2k() *Node {
137147
},
138148
ReservedResources: &NodeReservedResources{
139149
Cpu: NodeReservedCpuResources{
140-
CpuShares: 1000,
150+
// set by fingerprinting callback, topology of 1000 MHz * 4 cores (4000 MHz), of which 2 cores are reserved
151+
// plus 1000 MHz of reserved amount of CPU, effectively a total of 3000 MHz of reserved CPU
152+
CpuShares: 3000,
153+
ReservedCpuCores: []uint16{2, 3},
141154
},
142155
Memory: NodeReservedMemoryResources{
143156
MemoryMB: 1024,
@@ -201,9 +214,10 @@ func TestAllocsFit(t *testing.T) {
201214
must.Eq(t, 1024, used.Flattened.Memory.MemoryMB)
202215

203216
// Should not fit second allocation
204-
fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
217+
fit, dim, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false)
205218
must.NoError(t, err)
206219
must.False(t, fit)
220+
must.Eq(t, "cpu", dim)
207221
must.Eq(t, 2000, used.Flattened.Cpu.CpuShares)
208222
must.Eq(t, 2048, used.Flattened.Memory.MemoryMB)
209223

@@ -649,8 +663,23 @@ func TestScoreFitBinPack(t *testing.T) {
649663
Cores: []numalib.Core{{
650664
ID: 0,
651665
Grade: numalib.Performance,
652-
BaseSpeed: 4096,
666+
BaseSpeed: 2048,
667+
}, {
668+
ID: 1,
669+
Grade: numalib.Performance,
670+
BaseSpeed: 2048,
671+
}, {
672+
ID: 2,
673+
Grade: numalib.Performance,
674+
BaseSpeed: 2048,
675+
Disable: true,
676+
}, {
677+
ID: 3,
678+
Grade: numalib.Performance,
679+
BaseSpeed: 2048,
680+
Disable: true,
653681
}},
682+
OverrideWitholdCompute: 2048, // set by client reserved field
654683
},
655684
},
656685
Memory: NodeMemoryResources{
@@ -661,7 +690,10 @@ func TestScoreFitBinPack(t *testing.T) {
661690
node.NodeResources.Compatibility()
662691
node.ReservedResources = &NodeReservedResources{
663692
Cpu: NodeReservedCpuResources{
664-
CpuShares: 2048,
693+
// set by fingerprinting callback, topology of 2048 MHz * 4 cores (8192 MHz), of which 2 cores are reserved
694+
// plus 2048 MHz of reserved amount of CPU, effectively a total of 6144 MHz of reserved CPU
695+
CpuShares: 6144,
696+
ReservedCpuCores: []uint16{2, 3},
665697
},
666698
Memory: NodeReservedMemoryResources{
667699
MemoryMB: 4096,

nomad/structs/numa.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,10 @@ func (r *NodeProcessorResources) TotalCompute() int {
172172
}
173173
return int(r.Topology.TotalCompute())
174174
}
175+
176+
func (r *NodeProcessorResources) UsableCompute() int {
177+
if r == nil || r.Topology == nil {
178+
return 0
179+
}
180+
return int(r.Topology.UsableCompute())
181+
}

nomad/structs/structs.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,6 +2247,19 @@ func (n *Node) Canonicalize() {
22472247
}
22482248
}
22492249

2250+
// Comparable returns a comparable version of the node's resources available for scheduling.
2251+
// This conversion can be lossy so care must be taken when using it.
2252+
func (n *Node) Comparable() *ComparableResources {
2253+
if n == nil {
2254+
return nil
2255+
}
2256+
2257+
resources := n.NodeResources.Comparable()
2258+
resources.Subtract(n.ReservedResources.Comparable())
2259+
2260+
return resources
2261+
}
2262+
22502263
func (n *Node) Copy() *Node {
22512264
if n == nil {
22522265
return nil
@@ -3244,16 +3257,16 @@ func (n *NodeResources) Comparable() *ComparableResources {
32443257
return nil
32453258
}
32463259

3247-
usableCores := n.Processors.Topology.UsableCores().Slice()
3248-
reservableCores := helper.ConvertSlice(usableCores, func(id hw.CoreID) uint16 {
3260+
allCores := n.Processors.Topology.AllCores().Slice()
3261+
cores := helper.ConvertSlice(allCores, func(id hw.CoreID) uint16 {
32493262
return uint16(id)
32503263
})
32513264

32523265
c := &ComparableResources{
32533266
Flattened: AllocatedTaskResources{
32543267
Cpu: AllocatedCpuResources{
32553268
CpuShares: int64(n.Processors.Topology.TotalCompute()),
3256-
ReservedCores: reservableCores,
3269+
ReservedCores: cores,
32573270
},
32583271
Memory: AllocatedMemoryResources{
32593272
MemoryMB: n.Memory.MemoryMB,

scheduler/feasible/preemption.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,7 @@ func (p *Preemptor) Copy() *Preemptor {
162162

163163
// SetNode sets the node
164164
func (p *Preemptor) SetNode(node *structs.Node) {
165-
nodeRemainingResources := node.NodeResources.Comparable()
166-
167-
// Subtract the reserved resources of the node
168-
if c := node.ReservedResources.Comparable(); c != nil {
169-
nodeRemainingResources.Subtract(c)
170-
}
171-
p.nodeRemainingResources = nodeRemainingResources
165+
p.nodeRemainingResources = node.Comparable()
172166
}
173167

174168
// SetCandidates initializes the candidate set from which preemptions are chosen

0 commit comments

Comments
 (0)