Skip to content

Commit 4fd4eb4

Browse files
committed
UCP/WIREUP: Relax peer-failure criteria for same-worker endpoints
When an endpoint is wired to the same UCP worker (same unpacked address UUID), there is no independent remote peer for cross-worker RMA. Skip requiring UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE and UCT_MD_FLAG_INVALIDATE_RMA in RMA BW wireup criteria in that case. In ucp_request_get_invalidation_map(), return an empty invalidation map for UCP_EP_CONFIG_KEY_FLAG_SELF so RMA BW lanes without MD invalidate support (e.g. cuda_copy) remain valid for same-worker / memtype EPs with peer-failure error handling. Extend ucp_wireup_fill_peer_err_criteria() and ucp_wireup_fill_aux_criteria() with worker + unpacked address so auxiliary and other lane selection paths apply the same rule.
1 parent 1d04d8e commit 4fd4eb4

2 files changed

Lines changed: 49 additions & 17 deletions

File tree

src/ucp/core/ucp_request.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ static ucp_md_map_t ucp_request_get_invalidation_map(ucp_ep_h ep)
379379
ucp_lane_index_t i;
380380
ucp_md_map_t inv_map;
381381

382+
/* Same-worker EPs (loopback, memtype): no cross-worker RMA to invalidate. */
383+
if (key->flags & UCP_EP_CONFIG_KEY_FLAG_SELF) {
384+
return 0;
385+
}
386+
382387
for (i = 0, inv_map = 0;
383388
(key->rma_bw_lanes[i] != UCP_NULL_LANE) && (i < UCP_MAX_LANES); i++) {
384389
lane = key->rma_bw_lanes[i];

src/ucp/wireup/select.c

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,9 +1042,18 @@ static double ucp_wireup_rma_score_func(const ucp_worker_iface_t *wiface,
10421042
}
10431043

10441044
static void ucp_wireup_fill_peer_err_criteria(ucp_wireup_criteria_t *criteria,
1045-
unsigned ep_init_flags)
1045+
unsigned ep_init_flags,
1046+
ucp_worker_h worker,
1047+
const ucp_unpacked_address_t *unpacked_addr)
10461048
{
10471049
if (ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) {
1050+
/* No independent remote worker when connecting an EP to itself (loopback,
1051+
* memtype EPs, etc.): peer-failure iface caps and MD invalidation are
1052+
* relaxed in wireup; see ucp_request_get_invalidation_map(). */
1053+
if ((unpacked_addr != NULL) && (unpacked_addr->uuid == worker->uuid)) {
1054+
return;
1055+
}
1056+
10481057
criteria->local_iface_flags.mandatory |=
10491058
UCT_IFACE_FLAG_ERRHANDLE_PEER_FAILURE;
10501059
/* transport selection procedure will check additionally for KA or EP check
@@ -1068,7 +1077,9 @@ static double ucp_wireup_aux_score_func(const ucp_worker_iface_t *wiface,
10681077

10691078
static void ucp_wireup_fill_aux_criteria(ucp_wireup_criteria_t *criteria,
10701079
unsigned ep_init_flags,
1071-
uint64_t mandatory_flags)
1080+
uint64_t mandatory_flags,
1081+
ucp_worker_h worker,
1082+
const ucp_unpacked_address_t *unpacked_addr)
10721083
{
10731084
criteria->title = "auxiliary";
10741085
criteria->local_md_flags = 0;
@@ -1092,7 +1103,8 @@ static void ucp_wireup_fill_aux_criteria(ucp_wireup_criteria_t *criteria,
10921103
criteria->tl_rsc_flags = UCP_TL_RSC_FLAG_AUX; /* Can use aux transports */
10931104
criteria->lane_type = UCP_LANE_TYPE_LAST;
10941105

1095-
ucp_wireup_fill_peer_err_criteria(criteria, ep_init_flags);
1106+
ucp_wireup_fill_peer_err_criteria(criteria, ep_init_flags, worker,
1107+
unpacked_addr);
10961108
}
10971109

10981110
static void ucp_wireup_criteria_init(ucp_wireup_criteria_t *criteria)
@@ -1185,7 +1197,9 @@ ucp_wireup_add_rma_lanes(const ucp_wireup_select_params_t *select_params,
11851197
UCT_IFACE_FLAG_PENDING, 0);
11861198
}
11871199
criteria.calc_score = ucp_wireup_rma_score_func;
1188-
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags);
1200+
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags,
1201+
select_params->ep->worker,
1202+
select_params->address);
11891203

11901204
tl_bitmap = ucp_tl_bitmap_max;
11911205
ucs_memory_type_for_each(mem_type) {
@@ -1239,7 +1253,9 @@ ucp_wireup_add_amo_lanes(const ucp_wireup_select_params_t *select_params,
12391253
criteria.calc_score = ucp_wireup_amo_score_func;
12401254
ucp_wireup_init_select_flags(&criteria.local_iface_flags,
12411255
UCT_IFACE_FLAG_PENDING, 0);
1242-
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags);
1256+
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags,
1257+
select_params->ep->worker,
1258+
select_params->address);
12431259
ucp_context_uct_atomic_iface_flags(context, &criteria.remote_atomic_flags);
12441260

12451261
/* We can use only non-p2p resources or resources which are explicitly
@@ -1487,9 +1503,10 @@ ucp_wireup_add_am_lane(const ucp_wireup_select_params_t *select_params,
14871503
UCT_IFACE_FLAG_AM_BCOPY, 0);
14881504
ucp_wireup_init_select_flags(&criteria.remote_iface_flags,
14891505
UCP_ADDR_IFACE_FLAG_AM_SYNC, 0);
1490-
ucp_wireup_fill_peer_err_criteria(&criteria,
1491-
ucp_wireup_ep_init_flags(select_params,
1492-
select_ctx));
1506+
ucp_wireup_fill_peer_err_criteria(
1507+
&criteria,
1508+
ucp_wireup_ep_init_flags(select_params, select_ctx),
1509+
select_params->ep->worker, select_params->address);
14931510

14941511
if (ucs_test_all_flags(ucp_ep_get_context_features(select_params->ep),
14951512
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
@@ -1925,7 +1942,8 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
19251942
UCP_ADDR_IFACE_FLAG_AM_SYNC, 0);
19261943
ucp_wireup_init_select_flags(&bw_info.criteria.local_iface_flags,
19271944
UCT_IFACE_FLAG_AM_BCOPY, 0);
1928-
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags);
1945+
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags,
1946+
ep->worker, select_params->address);
19291947

19301948
if (ucs_test_all_flags(ucp_ep_get_context_features(ep),
19311949
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
@@ -2090,7 +2108,8 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
20902108
bw_info.criteria.calc_score = ucp_wireup_rma_bw_score_func;
20912109
ucp_wireup_init_select_flags(&bw_info.criteria.local_iface_flags,
20922110
UCT_IFACE_FLAG_PENDING, 0);
2093-
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags);
2111+
ucp_wireup_fill_peer_err_criteria(&bw_info.criteria, ep_init_flags,
2112+
ep->worker, select_params->address);
20942113

20952114
if (ucs_test_all_flags(ucp_ep_get_context_features(ep),
20962115
UCP_FEATURE_TAG | UCP_FEATURE_WAKEUP)) {
@@ -2139,9 +2158,11 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,
21392158

21402159
/* If error handling is requested and we have RNDV, we require memory
21412160
* invalidation support to provide correct data integrity in case of error.
2161+
* Skip for endpoints wired to the same worker (loopback / memtype EPs).
21422162
*/
21432163
if ((ep_init_flags & UCP_EP_INIT_ERR_MODE_PEER_FAILURE) &&
2144-
ucp_context_rndv_is_enabled(context)) {
2164+
ucp_context_rndv_is_enabled(context) &&
2165+
(select_params->address->uuid != ep->worker->uuid)) {
21452166
bw_info.criteria.local_md_flags |= UCT_MD_FLAG_INVALIDATE_RMA;
21462167
}
21472168

@@ -2290,7 +2311,8 @@ ucp_wireup_select_wireup_msg_lane(ucp_worker_h worker,
22902311
const ucp_address_entry_t *address_list,
22912312
const ucp_wireup_lane_desc_t *lane_descs,
22922313
ucp_lane_index_t num_lanes,
2293-
ucp_lane_index_t am_lane)
2314+
ucp_lane_index_t am_lane,
2315+
const ucp_unpacked_address_t *unpacked_addr)
22942316
{
22952317
ucp_context_h context = worker->context;
22962318
ucp_lane_index_t p2p_lane = UCP_NULL_LANE;
@@ -2307,7 +2329,8 @@ ucp_wireup_select_wireup_msg_lane(ucp_worker_h worker,
23072329
}
23082330

23092331
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags,
2310-
UCP_ADDR_IFACE_FLAG_CB_ASYNC);
2332+
UCP_ADDR_IFACE_FLAG_CB_ASYNC, worker,
2333+
unpacked_addr);
23112334
for (lane = 0; lane < num_lanes; ++lane) {
23122335
if (lane_descs[lane].rsc_index == UCP_NULL_RESOURCE) {
23132336
continue;
@@ -2421,7 +2444,8 @@ ucp_wireup_add_keepalive_lane(const ucp_wireup_select_params_t *select_params,
24212444
/* Keepalive can also use auxiliary transports */
24222445
criteria.tl_rsc_flags = UCP_TL_RSC_FLAG_AUX;
24232446
criteria.lane_type = UCP_LANE_TYPE_KEEPALIVE;
2424-
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags);
2447+
ucp_wireup_fill_peer_err_criteria(&criteria, ep_init_flags, worker,
2448+
select_params->address);
24252449

24262450
status = ucp_wireup_select_transport(select_ctx, select_params, &criteria,
24272451
*tl_bitmap, UINT64_MAX, UINT64_MAX,
@@ -2770,7 +2794,8 @@ ucp_wireup_construct_lanes(const ucp_wireup_select_params_t *select_params,
27702794
select_ctx),
27712795
select_params->address->address_list,
27722796
select_ctx->lane_descs,
2773-
key->num_lanes, key->am_lane);
2797+
key->num_lanes, key->am_lane,
2798+
select_params->address);
27742799
}
27752800

27762801
for (i = 0; key->rma_bw_lanes[i] != UCP_NULL_LANE; i++) {
@@ -2875,7 +2900,8 @@ ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags,
28752900

28762901
/* Select auxiliary transport that supports async active message callback */
28772902
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags,
2878-
UCP_ADDR_IFACE_FLAG_CB_ASYNC);
2903+
UCP_ADDR_IFACE_FLAG_CB_ASYNC, ep->worker,
2904+
remote_address);
28792905
status = ucp_wireup_select_transport(&select_ctx, &select_params, &criteria,
28802906
ucp_tl_bitmap_max, UINT64_MAX,
28812907
UINT64_MAX, UINT64_MAX, 0,
@@ -2886,7 +2912,8 @@ ucp_wireup_select_aux_transport(ucp_ep_h ep, unsigned ep_init_flags,
28862912

28872913
/* Fallback to an auxiliary transport without async active message callback
28882914
* requirement */
2889-
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags, 0);
2915+
ucp_wireup_fill_aux_criteria(&criteria, ep_init_flags, 0, ep->worker,
2916+
remote_address);
28902917
return ucp_wireup_select_transport(&select_ctx, &select_params, &criteria,
28912918
ucp_tl_bitmap_max, UINT64_MAX,
28922919
UINT64_MAX, UINT64_MAX, 1, select_info);

0 commit comments

Comments
 (0)