Skip to content

Commit 67b0f24

Browse files
committed
Implementing maxAvailableComponentSets for resource models
Signed-off-by: mszacillo <[email protected]>
1 parent 0fde1d2 commit 67b0f24

File tree

2 files changed

+569
-10
lines changed

2 files changed

+569
-10
lines changed

pkg/estimator/client/general.go

Lines changed: 262 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"math"
23+
"sort"
2324

2425
corev1 "k8s.io/api/core/v1"
2526
"k8s.io/apimachinery/pkg/api/resource"
@@ -54,7 +55,7 @@ func (ge *GeneralEstimator) MaxAvailableReplicas(_ context.Context, clusters []*
5455
}
5556

5657
func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluster, replicaRequirements *workv1alpha2.ReplicaRequirements) int32 {
57-
//Note: resourceSummary must be deep-copied before using in the function to avoid modifying the original data structure.
58+
// Note: resourceSummary must be deep-copied before using in the function to avoid modifying the original data structure.
5859
resourceSummary := cluster.Status.ResourceSummary.DeepCopy()
5960
if resourceSummary == nil {
6061
return 0
@@ -149,8 +150,7 @@ func (ge *GeneralEstimator) maxAvailableComponentSets(cluster *clusterv1alpha1.C
149150
}
150151

151152
if features.FeatureGate.Enabled(features.CustomizedClusterResourceModeling) && len(cluster.Status.ResourceSummary.AllocatableModelings) > 0 {
152-
num, err := getMaximumSetsBasedOnResourceModels(cluster, components)
153-
if err != nil {
153+
if num, err := getMaximumSetsBasedOnResourceModels(cluster, components, podBound); err != nil {
154154
klog.Warningf("Failed to get maximum sets based on resource models, skipping: %v", err)
155155
} else if num < maxSets {
156156
maxSets = num
@@ -160,13 +160,265 @@ func (ge *GeneralEstimator) maxAvailableComponentSets(cluster *clusterv1alpha1.C
160160
return int32(maxSets) // #nosec G115: integer overflow conversion int64 -> int32
161161
}
162162

163-
// getMaximumSetsBasedOnResourceModels is a placeholder for future implementation.
164-
// It should refine the maximum sets based on cluster resource models, similar
165-
// to getMaximumReplicasBasedOnResourceModels but adapted to full component sets.
166-
func getMaximumSetsBasedOnResourceModels(_ *clusterv1alpha1.Cluster, _ []workv1alpha2.Component) (int64, error) {
167-
// TODO: implement logic based on cluster.Spec.ResourceModels
168-
// For now, just return MaxInt64 so it never reduces the upper bound.
169-
return math.MaxInt64, nil
163+
// getMaximumSetsBasedOnResourceModels computes the maximum number of full sets that can be
164+
// placed on a cluster using the cluster's ResourceModels. It expands one set into
165+
// replica kinds (demand + count) and performs a first-fit-decreasing placement onto model-grade nodes.
166+
// `upperBound` caps the search. We can set this using the podBound (allowedPods / podsPerSet)
167+
func getMaximumSetsBasedOnResourceModels(
168+
cluster *clusterv1alpha1.Cluster,
169+
components []workv1alpha2.Component,
170+
upperBound int64,
171+
) (int64, error) {
172+
if upperBound <= 0 {
173+
return 0, nil
174+
}
175+
176+
// Build model nodes from Spec.ResourceModels and Status.AllocatableModelings
177+
nodes, err := buildModelNodes(cluster)
178+
if err != nil {
179+
return -1, err
180+
}
181+
if len(nodes) == 0 {
182+
return 0, nil
183+
}
184+
185+
// Compressed one-set: per-kind (identical replicas grouped)
186+
oneSetKinds := expandKindsOneSet(components)
187+
if len(oneSetKinds) == 0 {
188+
// No pods in a set -> nothing to schedule under models
189+
return 0, nil
190+
}
191+
192+
// Use cluster "available" totals (allocatable - allocated - allocating) for normalized scoring.
193+
// This reflects what the cluster can actually accept *now*.
194+
totals := availableResourceMap(cluster.Status.ResourceSummary)
195+
196+
// Binary search on #sets within [0, upperBound]
197+
lo, hi := int64(0), upperBound
198+
for lo < hi {
199+
mid := (lo + hi + 1) / 2
200+
if modelsFeasibleCompressed(mid, oneSetKinds, nodes, totals) {
201+
lo = mid
202+
} else {
203+
hi = mid - 1
204+
}
205+
}
206+
return lo, nil
207+
}
208+
209+
// ----- Models helpers -----
210+
211+
// modelNode holds remaining capacity for a given node across all resource types
212+
type modelNode struct {
213+
cap map[corev1.ResourceName]int64
214+
}
215+
216+
// buildModelNodes constructs identical nodes for each model grade using its Min vector,
217+
// repeated `AllocatableModelings[i].Count` times.
218+
func buildModelNodes(cluster *clusterv1alpha1.Cluster) ([]modelNode, error) {
219+
if len(cluster.Spec.ResourceModels) == 0 {
220+
return nil, fmt.Errorf("resource model is inapplicable as no grades are defined")
221+
}
222+
223+
if len(cluster.Spec.ResourceModels) > len(cluster.Status.ResourceSummary.AllocatableModelings) {
224+
// Shouldn’t happen - status is malformed
225+
return nil, fmt.Errorf("resource model/status mismatch: %d grades in spec, %d in status",
226+
len(cluster.Spec.ResourceModels), len(cluster.Status.ResourceSummary.AllocatableModelings))
227+
}
228+
229+
// Convert Spec.ResourceModels to a map of resource -> []MinByGrade
230+
minMap := convertToResourceModelsMinMap(cluster.Spec.ResourceModels)
231+
232+
// Build nodes for each grade index i
233+
var nodes []modelNode
234+
for i := 0; i < len(cluster.Spec.ResourceModels); i++ {
235+
count := cluster.Status.ResourceSummary.AllocatableModelings[i].Count
236+
if count == 0 {
237+
continue
238+
}
239+
240+
// Capacity vector for this grade = Min boundary of each resource at grade i (normalized)
241+
capTemplate := make(map[corev1.ResourceName]int64, len(minMap))
242+
for resName, mins := range minMap {
243+
if i >= len(mins) {
244+
// Model shape mismatch; treat as missing resource for this grade
245+
return nil, fmt.Errorf("resource model is inapplicable as missing resource %q in grade %d", string(resName), i)
246+
}
247+
capTemplate[resName] = quantityAsInt64(mins[i])
248+
}
249+
250+
// Append `count` identical nodes of this grade
251+
for n := 0; n < count; n++ {
252+
// Copy capTemplate to each node
253+
capCopy := make(map[corev1.ResourceName]int64, len(capTemplate))
254+
for k, v := range capTemplate {
255+
capCopy[k] = v
256+
}
257+
nodes = append(nodes, modelNode{cap: capCopy})
258+
}
259+
}
260+
return nodes, nil
261+
}
262+
263+
// replicaKind represents a single type of component, including replica demand and count
264+
type replicaKind struct {
265+
dem map[corev1.ResourceName]int64 // per-replica demand
266+
count int64 // how many replicas
267+
score float64 // ordering heuristic (higher first)
268+
}
269+
270+
// expandKindsOneSet flattens components into a slice of unique replica kinds.
271+
// Each entry holds the per-replica demand and how many replicas of that kind a set needs.
272+
func expandKindsOneSet(components []workv1alpha2.Component) []replicaKind {
273+
kinds := make([]replicaKind, 0, len(components))
274+
for _, c := range components {
275+
if c.ReplicaRequirements == nil || c.ReplicaRequirements.ResourceRequest == nil {
276+
continue
277+
}
278+
// normalize per-replica demand
279+
base := make(map[corev1.ResourceName]int64, len(c.ReplicaRequirements.ResourceRequest))
280+
for name, qty := range c.ReplicaRequirements.ResourceRequest {
281+
base[name] = quantityAsInt64(qty)
282+
}
283+
// skip zero-demand or non-positive replica count
284+
if allZero(base) || c.Replicas <= 0 {
285+
continue
286+
}
287+
288+
k := replicaKind{
289+
dem: base,
290+
count: int64(c.Replicas),
291+
// score is filled later once we know cluster-wide totals
292+
}
293+
kinds = append(kinds, k)
294+
}
295+
return kinds
296+
}
297+
298+
// modelsFeasibleCompressed checks if the given # of copies of `oneSetKinds` can be placed onto `nodes`
299+
// using first-fit decreasing but placing **batches** of identical replicas at once.
300+
// The ordering heuristic uses a normalized "max utilization ratio" to avoid unit bias.
301+
func modelsFeasibleCompressed(
302+
sets int64,
303+
oneSetKinds []replicaKind,
304+
nodes []modelNode,
305+
totals map[corev1.ResourceName]int64, // cluster-wide totals from ResourceSummary.Allocatable or "available"
306+
) bool {
307+
if sets <= 0 {
308+
return true
309+
}
310+
if len(oneSetKinds) == 0 {
311+
return true
312+
}
313+
314+
// working copy of node capacities
315+
work := make([]modelNode, len(nodes))
316+
for i := range nodes {
317+
capCopy := make(map[corev1.ResourceName]int64, len(nodes[i].cap))
318+
for k, v := range nodes[i].cap {
319+
capCopy[k] = v
320+
}
321+
work[i] = modelNode{cap: capCopy}
322+
}
323+
324+
// scale counts by #sets and compute normalized scores
325+
items := make([]replicaKind, len(oneSetKinds))
326+
for i, k := range oneSetKinds {
327+
items[i] = replicaKind{
328+
dem: k.dem,
329+
count: k.count * sets,
330+
}
331+
items[i].score = demandScoreNormalized(items[i].dem, totals)
332+
}
333+
334+
// sort decreasing by normalized "max utilization ratio"
335+
sort.Slice(items, func(i, j int) bool { return items[i].score > items[j].score })
336+
337+
// greedy first-fit with batch placement
338+
for idx := range items {
339+
if items[idx].count <= 0 {
340+
continue
341+
}
342+
remaining := items[idx].count
343+
for n := range work {
344+
if remaining == 0 {
345+
break
346+
}
347+
fit := maxFit(work[n].cap, items[idx].dem) // how many replicas of this kind fit on node n
348+
if fit <= 0 {
349+
continue
350+
}
351+
place := fit
352+
if place > remaining {
353+
place = remaining
354+
}
355+
consumeMul(work[n].cap, items[idx].dem, place)
356+
remaining -= place
357+
}
358+
if remaining > 0 {
359+
// couldn't place all replicas of this kind -> infeasible
360+
return false
361+
}
362+
}
363+
return true
364+
}
365+
366+
// demandScoreNormalized returns the "max utilization ratio" of a demand vector against total capacities.
367+
// If a resource is missing/zero in total, treat it as maximally constrained.
368+
func demandScoreNormalized(
369+
demand map[corev1.ResourceName]int64,
370+
total map[corev1.ResourceName]int64,
371+
) float64 {
372+
var maxRatio float64
373+
for res, req := range demand {
374+
if req <= 0 {
375+
continue
376+
}
377+
totalCap := float64(total[res])
378+
if totalCap <= 0 {
379+
return math.MaxFloat64
380+
}
381+
ratio := float64(req) / totalCap
382+
if ratio > maxRatio {
383+
maxRatio = ratio
384+
}
385+
}
386+
return maxRatio
387+
}
388+
389+
// maxFit returns how many copies of `dem` fit in `cap` simultaneously.
390+
func maxFit(capacity map[corev1.ResourceName]int64, dem map[corev1.ResourceName]int64) int64 {
391+
var limit int64 = math.MaxInt64
392+
for k, req := range dem {
393+
if req <= 0 {
394+
continue
395+
}
396+
avail := capacity[k]
397+
if avail <= 0 {
398+
return 0
399+
}
400+
bound := avail / req
401+
if bound < limit {
402+
limit = bound
403+
}
404+
}
405+
if limit == math.MaxInt64 {
406+
return 0
407+
}
408+
return limit
409+
}
410+
411+
// consumeMul subtracts mult * dem from cap.
412+
func consumeMul(capacity map[corev1.ResourceName]int64, dem map[corev1.ResourceName]int64, mult int64) {
413+
if mult <= 0 {
414+
return
415+
}
416+
for k, req := range dem {
417+
if req <= 0 {
418+
continue
419+
}
420+
capacity[k] -= req * mult
421+
}
170422
}
171423

172424
// podsInSet computes the total number of pods in the CRD

0 commit comments

Comments
 (0)