From 99ff621b299c043ff89069e50c5c1604b92e4716 Mon Sep 17 00:00:00 2001 From: Nathan Bellalou Date: Sun, 19 Apr 2026 17:20:36 +0300 Subject: [PATCH 1/2] UCT/CUDA_IPC: Keep lru invariant - region in cache <-> region in lru Modify initial design of keeping in lru only regions candidates to eviction - and thus removing from lru regions with refcount > 0. This caused a failure when regions with refcount > 0 were removed from cache on cache miss and were assumed to also be in lru --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 16 ++------ test/gtest/uct/cuda/test_cuda_ipc_md.cc | 54 ++++++++++++++++++++----- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index 1c48f022562..910968b4943 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -238,9 +238,7 @@ static void uct_cuda_ipc_cache_evict_lru(uct_cuda_ipc_cache_t *cache) } if (region->refcount > 0) { - /* In-use region -- pull off LRU, it will be re-added on release */ - ucs_list_del(®ion->lru_list); - region->in_lru = 0; + /* In-use region -- keep on LRU, revisit on next eviction pass.*/ continue; } @@ -590,10 +588,6 @@ ucs_status_t uct_cuda_ipc_unmap_memhandle(pid_t pid, ucs_sys_ns_t pid_ns, if (!region->refcount && !cache_enabled) { ucs_assert(region->mapped_addr == mapped_addr); uct_cuda_ipc_cache_region_destroy(cache, region); - } else if (!region->in_lru) { - /* Region becomes an eviction candidate -- add to LRU tail */ - ucs_list_add_tail(&cache->lru_list, ®ion->lru_list); - region->in_lru = 1; } pthread_rwlock_unlock(&cache->lock); @@ -648,12 +642,10 @@ UCS_PROFILE_FUNC(ucs_status_t, uct_cuda_ipc_map_memhandle, UCS_PGT_REGION_FMT, cache->name, (void *)key->d_bptr, key->b_len, UCS_PGT_REGION_ARG(®ion->super)); - /* Move to LRU tail (most recently used) */ - if (region->in_lru) { - ucs_list_del(®ion->lru_list); - } + /* Move to LRU tail (most recently used). Region is always on LRU + * while alive, so unconditional del/add_tail is safe. */ + ucs_list_del(®ion->lru_list); ucs_list_add_tail(&cache->lru_list, ®ion->lru_list); - region->in_lru = 1; *mapped_addr = region->mapped_addr; ucs_assert(region->refcount < UINT64_MAX); diff --git a/test/gtest/uct/cuda/test_cuda_ipc_md.cc b/test/gtest/uct/cuda/test_cuda_ipc_md.cc index 4a8b38f59a1..05340a5ad92 100644 --- a/test/gtest/uct/cuda/test_cuda_ipc_md.cc +++ b/test/gtest/uct/cuda/test_cuda_ipc_md.cc @@ -302,22 +302,31 @@ class test_cuda_ipc_cache_lru : public ucs::test { void release_region(uct_cuda_ipc_cache_region_t *region) { ASSERT_GE(region->refcount, 1UL); region->refcount--; - if (!region->in_lru) { - ucs_list_add_tail(&m_cache->lru_list, ®ion->lru_list); - region->in_lru = 1; - } } void reacquire_region(uct_cuda_ipc_cache_region_t *region) { /* Move to LRU tail (most recently used) */ - if (region->in_lru) { - ucs_list_del(®ion->lru_list); - } + ucs_list_del(®ion->lru_list); ucs_list_add_tail(&m_cache->lru_list, ®ion->lru_list); - region->in_lru = 1; region->refcount++; } + void destroy_region(uct_cuda_ipc_cache_region_t *region) { + ucs_status_t status = ucs_pgtable_remove(&m_cache->pgtable, + ®ion->super); + ASSERT_EQ(UCS_OK, status); + + ASSERT_TRUE(region->in_lru); + ucs_list_del(®ion->lru_list); + region->in_lru = 0; + + ASSERT_GT(m_cache->num_regions, 0UL); + m_cache->num_regions--; + m_cache->total_size -= region->key.b_len; + + ucs_free(region); + } + void evict_lru() { uct_cuda_ipc_cache_region_t *region, *tmp; @@ -328,9 +337,6 @@ class test_cuda_ipc_cache_lru : public ucs::test { } if (region->refcount > 0) { - /* In-use -- pull off LRU, will be re-added on release */ - ucs_list_del(®ion->lru_list); - region->in_lru = 0; continue; } @@ -530,3 +536,29 @@ UCS_TEST_F(test_cuda_ipc_cache_lru, unlimited) { EXPECT_TRUE(pgtable_has(i)); } } + +UCS_TEST_F(test_cuda_ipc_cache_lru, stale_destroy_while_in_use) { + /* Regression test for Bug A: evict_lru must not pull in-use regions off + * the LRU. Otherwise a subsequent destroy (e.g. stale-buffer_id branch + * in map_memhandle) hits an assertion / list corruption because the + * region is alive in the pgtable but not on the LRU. */ + create_cache(0 /* force eviction on every insert */, SIZE_MAX); + + uct_cuda_ipc_cache_region_t *r1 = insert_region(0); + ASSERT_EQ(1UL, r1->refcount); + ASSERT_EQ(1, r1->in_lru); + + evict_lru(); + + /* Core assertion: in-use region must stay on LRU after eviction */ + EXPECT_EQ(1, r1->in_lru); + EXPECT_EQ(1UL, m_cache->num_regions); + EXPECT_TRUE(pgtable_has(0)); + + /* Simulate the stale-buffer_id destroy path */ + destroy_region(r1); + + EXPECT_EQ(0UL, m_cache->num_regions); + EXPECT_EQ(0UL, m_cache->total_size); + EXPECT_TRUE(ucs_list_is_empty(&m_cache->lru_list)); +} From 5dea95470b3cb329a86b62ecaa228095b5e74e04 Mon Sep 17 00:00:00 2001 From: Nathan Bellalou Date: Mon, 20 Apr 2026 11:50:25 +0300 Subject: [PATCH 2/2] UCT/CUDA_IPC: PR fixes --- src/uct/cuda/cuda_ipc/cuda_ipc_cache.c | 2 +- test/gtest/uct/cuda/test_cuda_ipc_md.cc | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c index 910968b4943..2e80a8adcf4 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_cache.c @@ -238,7 +238,7 @@ static void uct_cuda_ipc_cache_evict_lru(uct_cuda_ipc_cache_t *cache) } if (region->refcount > 0) { - /* In-use region -- keep on LRU, revisit on next eviction pass.*/ + /* In-use region -- keep on LRU, revisit on next eviction pass */ continue; } diff --git a/test/gtest/uct/cuda/test_cuda_ipc_md.cc b/test/gtest/uct/cuda/test_cuda_ipc_md.cc index 05340a5ad92..35c943b88c1 100644 --- a/test/gtest/uct/cuda/test_cuda_ipc_md.cc +++ b/test/gtest/uct/cuda/test_cuda_ipc_md.cc @@ -311,9 +311,10 @@ class test_cuda_ipc_cache_lru : public ucs::test { region->refcount++; } - void destroy_region(uct_cuda_ipc_cache_region_t *region) { + void destroy_region(uct_cuda_ipc_cache_region_t *region) + { ucs_status_t status = ucs_pgtable_remove(&m_cache->pgtable, - ®ion->super); + ®ion->super); ASSERT_EQ(UCS_OK, status); ASSERT_TRUE(region->in_lru); @@ -538,10 +539,9 @@ UCS_TEST_F(test_cuda_ipc_cache_lru, unlimited) { } UCS_TEST_F(test_cuda_ipc_cache_lru, stale_destroy_while_in_use) { - /* Regression test for Bug A: evict_lru must not pull in-use regions off - * the LRU. Otherwise a subsequent destroy (e.g. stale-buffer_id branch - * in map_memhandle) hits an assertion / list corruption because the - * region is alive in the pgtable but not on the LRU. */ + /* Check that evict_lru does not pull in-use regions off + * the LRU. This could cause a failure if region is destroyed + * while not in LRU. */ create_cache(0 /* force eviction on every insert */, SIZE_MAX); uct_cuda_ipc_cache_region_t *r1 = insert_region(0);