Skip to content

Commit dda27d0

Browse files
authored
fix evict policy (#4127)
* fix evict policy * fix test schedule
1 parent fa1856c commit dda27d0

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

lmdeploy/pytorch/paging/scheduler.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,11 @@ def _reorder_waiting():
247247
def _schedule_decoding(self, prealloc_size: int = 0):
248248
"""Schedule decoding."""
249249

250-
running = self.running
250+
def _reorder_running():
251+
"""Reorder running."""
252+
return sorted(self.running, key=lambda seq: seq.arrive_time)
253+
254+
running = _reorder_running()
251255
assert len(running) != 0
252256

253257
eviction_helper = self.eviction_helper
@@ -271,9 +275,9 @@ def __evict_for_seq(seq: SchedulerSequence, num_required_blocks: int):
271275
return eviction_helper.evict_for_seq(seq, evictable, prealloc_size)
272276

273277
# 1. running
274-
for seq in running:
278+
while len(running) > 0:
275279
# token + n
276-
280+
seq = running.pop(0)
277281
num_required_blocks = self.block_manager.num_required_blocks(seq, prealloc_size)
278282
if len(seq.logical_blocks) + num_required_blocks > self.block_manager.num_gpu_blocks:
279283
# Reach max gpu cache size.
@@ -285,7 +289,13 @@ def __evict_for_seq(seq: SchedulerSequence, num_required_blocks: int):
285289
seq.set_step(0)
286290
continue
287291

288-
if not __evict_for_seq(seq, num_required_blocks):
292+
while not __evict_for_seq(seq, num_required_blocks):
293+
if len(running) == 0:
294+
break
295+
seq_preempted = running.pop(-1)
296+
self._set_message_status(seq_preempted, MessageStatus.WAITING)
297+
298+
if self.block_manager.get_num_free_gpu_blocks() < num_required_blocks:
289299
self._set_message_status(seq, MessageStatus.WAITING)
290300
continue
291301

tests/pytorch/paging/test_scheduler.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@ def test_evict(self, scheduler, block_size, num_gpu_blocks, num_cpu_blocks):
175175
seq2.update_token_ids(torch.tensor([1] * block_size))
176176
assert len(scheduler.running) == 2
177177
scheduler.schedule(is_prefill=False)
178-
# seq1: 1 waiting cpu
179-
# seq2: 4 running gpu
178+
# seq1: 2 running gpu
179+
# seq2: 4 waiting cpu
180180
# seq3: 3 nan
181-
assert seq1.status == MessageStatus.WAITING
182-
assert seq2.status == MessageStatus.RUNNING
183-
assert block_manager.get_num_free_gpu_blocks() == 0
181+
assert seq1.status == MessageStatus.RUNNING
182+
assert seq2.status == MessageStatus.WAITING
183+
assert block_manager.get_num_free_gpu_blocks() == 2

0 commit comments

Comments
 (0)