Skip to content

Commit 2a3a642

Browse files
colesburymiss-islington
authored andcommitted
gh-145615: Fix mimalloc page leak in the free-threaded build (gh-145626)
Fix three issues that caused mimalloc pages to be leaked until the owning thread exited: 1. In _PyMem_mi_page_maybe_free(), move pages out of the full queue when relying on QSBR to defer freeing the page. Pages in the full queue are never searched by mi_page_queue_find_free_ex(), so a page left there is unusable for allocations. 2. Move _PyMem_mi_page_clear_qsbr() from _mi_page_free_collect() to _mi_page_thread_free_collect() where it only fires when all blocks on the page are free (used == 0). The previous placement was too broad: it cleared QSBR state whenever local_free was non-NULL, but _mi_page_free_collect() is called from non-allocation paths (e.g., page visiting in mi_heap_visit_blocks) where the page is not being reused. 3. In _PyMem_mi_page_maybe_free(), use the page's heap tld to find the correct thread state for QSBR list insertion instead of PyThreadState_GET(). During stop-the-world pauses, the function may process pages belonging to other threads, so the current thread state is not necessarily the owner of the page. (cherry picked from commit d76df75f51e662fd15ebe00e107058841de94860) Co-authored-by: Sam Gross <colesbury@gmail.com>
1 parent 7c624d4 commit 2a3a642

File tree

5 files changed

+40
-10
lines changed

5 files changed

+40
-10
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed a memory leak in the :term:`free-threaded build` where mimalloc pages
2+
could become permanently unreclaimable until the owning thread exited.

Objects/mimalloc/heap.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
100100
// note: this will free retired pages as well.
101101
bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE);
102102
if (!freed && collect == MI_ABANDON) {
103-
_mi_page_abandon(page, pq);
103+
// _PyMem_mi_page_maybe_free may have moved the page to a different
104+
// page queue, so we need to re-fetch the correct queue.
105+
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
106+
_mi_page_abandon(page, &heap->pages[bin]);
104107
}
105108
}
106109
else if (collect == MI_ABANDON) {

Objects/mimalloc/page.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,13 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
213213

214214
// update counts now
215215
page->used -= count;
216+
217+
if (page->used == 0) {
218+
// The page may have had a QSBR goal set from a previous point when it
219+
// was all-free. That goal is no longer valid because the page was
220+
// allocated from and then freed again by other threads.
221+
_PyMem_mi_page_clear_qsbr(page);
222+
}
216223
}
217224

218225
void _mi_page_free_collect(mi_page_t* page, bool force) {
@@ -225,9 +232,6 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
225232

226233
// and the local free list
227234
if (page->local_free != NULL) {
228-
// any previous QSBR goals are no longer valid because we reused the page
229-
_PyMem_mi_page_clear_qsbr(page);
230-
231235
if mi_likely(page->free == NULL) {
232236
// usual case
233237
page->free = page->local_free;

Objects/mimalloc/segment.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,6 +1286,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
12861286
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
12871287
#ifdef Py_GIL_DISABLED
12881288
page->qsbr_goal = 0;
1289+
mi_assert_internal(page->qsbr_node.next == NULL);
12891290
#endif
12901291
segment->abandoned--;
12911292
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
@@ -1361,6 +1362,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
13611362
// if everything free by now, free the page
13621363
#ifdef Py_GIL_DISABLED
13631364
page->qsbr_goal = 0;
1365+
mi_assert_internal(page->qsbr_node.next == NULL);
13641366
#endif
13651367
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
13661368
}

Objects/obmalloc.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
149149
}
150150
return false;
151151
}
152+
153+
static _PyThreadStateImpl *
154+
tstate_from_heap(mi_heap_t *heap)
155+
{
156+
return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
157+
}
152158
#endif
153159

154160
static bool
@@ -157,23 +163,35 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
157163
#ifdef Py_GIL_DISABLED
158164
assert(mi_page_all_free(page));
159165
if (page->use_qsbr) {
160-
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
161-
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
166+
struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
167+
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
162168
_PyMem_mi_page_clear_qsbr(page);
163169
_mi_page_free(page, pq, force);
164170
return true;
165171
}
166172

173+
// gh-145615: since we are not freeing this page yet, we want to
174+
// make it available for allocations. Note that the QSBR goal and
175+
// linked list node remain set even if the page is later used for
176+
// an allocation. We only detect and clear the QSBR goal when the
177+
// page becomes empty again (used == 0).
178+
if (mi_page_is_in_full(page)) {
179+
_mi_page_unfull(page);
180+
}
181+
167182
_PyMem_mi_page_clear_qsbr(page);
168183
page->retire_expire = 0;
169184

170-
if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
171-
page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
185+
if (should_advance_qsbr_for_page(qsbr, page)) {
186+
page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
172187
}
173188
else {
174-
page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
189+
page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
175190
}
176191

192+
// We may be freeing a page belonging to a different thread during a
193+
// stop-the-world event. Find the _PyThreadStateImpl for the page.
194+
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
177195
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
178196
return false;
179197
}
@@ -190,7 +208,8 @@ _PyMem_mi_page_reclaimed(mi_page_t *page)
190208
if (page->qsbr_goal != 0) {
191209
if (mi_page_all_free(page)) {
192210
assert(page->qsbr_node.next == NULL);
193-
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
211+
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
212+
assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
194213
page->retire_expire = 0;
195214
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
196215
}

0 commit comments

Comments
 (0)