From dafc07e38c7c2af1dec371276dec08da39e1636a Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Wed, 12 Sep 2018 08:53:32 -0700 Subject: [PATCH] Revert "prov/psm2: Avoid long delay in psm2_ep_close" This reverts commit 7741df0db37085c1a49c05185c9b3f8170981661. Sporadic assertion failures have been observed inside psm2_ep_disconnect2(). Disable the patch until the issue is fixed. Signed-off-by: Jianxin Xiong --- prov/psm2/src/psmx2_av.c | 79 ++++++++++++++++++------------------------ prov/psm2/src/psmx2_trx_ctxt.c | 32 +++-------------- 2 files changed, 39 insertions(+), 72 deletions(-) diff --git a/prov/psm2/src/psmx2_av.c b/prov/psm2/src/psmx2_av.c index 815407405..63c6e5beb 100644 --- a/prov/psm2/src/psmx2_av.c +++ b/prov/psm2/src/psmx2_av.c @@ -32,40 +32,6 @@ #include "psmx2.h" -static void psmx2_set_epaddr_context(struct psmx2_trx_ctxt *trx_ctxt, - psm2_epid_t epid, psm2_epaddr_t epaddr) -{ - struct psmx2_epaddr_context *context; - - context = (void *)psm2_epaddr_getctxt(epaddr); - if (context) { - if (context->trx_ctxt != trx_ctxt || context->epid != epid) { - FI_WARN(&psmx2_prov, FI_LOG_AV, - "trx_ctxt or epid doesn't match\n"); - context = NULL; - } - } - - if (context) - return; - - context = malloc(sizeof *context); - if (!context) { - FI_WARN(&psmx2_prov, FI_LOG_AV, - "cannot allocate context\n"); - return; - } - - context->trx_ctxt = trx_ctxt; - context->epid = epid; - context->epaddr = epaddr; - psm2_epaddr_setctxt(epaddr, context); - - psmx2_lock(&trx_ctxt->peer_lock, 2); - dlist_insert_before(&context->entry, &trx_ctxt->peer_list); - psmx2_unlock(&trx_ctxt->peer_lock, 2); -} - /* * SEP address query protocol: * @@ -118,8 +84,6 @@ int psmx2_am_sep_handler(psm2_am_token_t token, psm2_amarg_t *args, struct psmx2_fid_sep *sep; struct psmx2_sep_query *req; struct psmx2_fid_av *av; - psm2_epaddr_t src_epaddr; - psm2_epid_t src_epid; psm2_epid_t *epids; psm2_epid_t *buf = NULL; int buflen; @@ -129,15 +93,6 @@ int psmx2_am_sep_handler(psm2_am_token_t token, psm2_amarg_t *args, cmd = PSMX2_AM_GET_OP(args[0].u32w0); domain = trx_ctxt->domain; - /* - * the implicit connection to the AM source needs also to be disconnected - * to avoid long delay inside psm2_ep_close. make sure the source is added - * to the peer list. - */ - psm2_am_get_source(token, &src_epaddr); - psm2_epaddr_to_epid(src_epaddr, &src_epid); - psmx2_set_epaddr_context(trx_ctxt, src_epid, src_epaddr); - switch (cmd) { case PSMX2_AM_REQ_SEP_QUERY: sep_id = args[0].u32w1; @@ -226,6 +181,40 @@ static inline double psmx2_conn_timeout(int sec) return sec * 1e9; } +static void psmx2_set_epaddr_context(struct psmx2_trx_ctxt *trx_ctxt, + psm2_epid_t epid, psm2_epaddr_t epaddr) +{ + struct psmx2_epaddr_context *context; + + context = (void *)psm2_epaddr_getctxt(epaddr); + if (context) { + if (context->trx_ctxt != trx_ctxt || context->epid != epid) { + FI_WARN(&psmx2_prov, FI_LOG_AV, + "trx_ctxt or epid doesn't match\n"); + context = NULL; + } + } + + if (context) + return; + + context = malloc(sizeof *context); + if (!context) { + FI_WARN(&psmx2_prov, FI_LOG_AV, + "cannot allocate context\n"); + return; + } + + context->trx_ctxt = trx_ctxt; + context->epid = epid; + context->epaddr = epaddr; + psm2_epaddr_setctxt(epaddr, context); + + psmx2_lock(&trx_ctxt->peer_lock, 2); + dlist_insert_before(&context->entry, &trx_ctxt->peer_list); + psmx2_unlock(&trx_ctxt->peer_lock, 2); +} + int psmx2_epid_to_epaddr(struct psmx2_trx_ctxt *trx_ctxt, psm2_epid_t epid, psm2_epaddr_t *epaddr) { diff --git a/prov/psm2/src/psmx2_trx_ctxt.c b/prov/psm2/src/psmx2_trx_ctxt.c index 6dd3196e5..709ced94f 100644 --- a/prov/psm2/src/psmx2_trx_ctxt.c +++ b/prov/psm2/src/psmx2_trx_ctxt.c @@ -124,10 +124,6 @@ void psmx2_trx_ctxt_disconnect_peers(struct psmx2_trx_ctxt *trx_ctxt) struct psmx2_epaddr_context *peer; struct dlist_entry peer_list; psm2_amarg_t arg; - psm2_epaddr_t *epaddrs; - psm2_error_t *errors; - int peer_count = 0; - int i = 0; arg.u32w0 = PSMX2_AM_REQ_TRX_CTXT_DISCONNECT; @@ -137,36 +133,17 @@ void psmx2_trx_ctxt_disconnect_peers(struct psmx2_trx_ctxt *trx_ctxt) dlist_foreach_safe(&trx_ctxt->peer_list, item, tmp) { dlist_remove(item); dlist_insert_before(item, &peer_list); - peer_count++; } psmx2_unlock(&trx_ctxt->peer_lock, 2); - if (!peer_count) - return; - - epaddrs = malloc(peer_count * sizeof(*epaddrs)); - errors = malloc(peer_count * sizeof(*errors)); - dlist_foreach_safe(&peer_list, item, tmp) { peer = container_of(item, struct psmx2_epaddr_context, entry); - if (epaddrs) - epaddrs[i++] = peer->epaddr; - if (psmx2_env.disconnect) { - FI_INFO(&psmx2_prov, FI_LOG_CORE, "epaddr: %p\n", peer->epaddr); - psm2_am_request_short(peer->epaddr, PSMX2_AM_TRX_CTXT_HANDLER, - &arg, 1, NULL, 0, 0, NULL, NULL); - } + FI_INFO(&psmx2_prov, FI_LOG_CORE, "epaddr: %p\n", peer->epaddr); + psm2_am_request_short(peer->epaddr, PSMX2_AM_TRX_CTXT_HANDLER, + &arg, 1, NULL, 0, 0, NULL, NULL); psm2_epaddr_setctxt(peer->epaddr, NULL); free(peer); } - - /* disconnect locally to avoid long delay inside psm2_ep_close() */ - if (epaddrs && errors) - psm2_ep_disconnect2(trx_ctxt->psm2_ep, peer_count, epaddrs, NULL, - errors, PSM2_EP_DISCONNECT_FORCE, 0); - - free(errors); - free(epaddrs); } static const char *psmx2_usage_flags_to_string(int usage_flags) @@ -203,7 +180,8 @@ void psmx2_trx_ctxt_free(struct psmx2_trx_ctxt *trx_ctxt, int usage_flags) dlist_remove(&trx_ctxt->entry); psmx2_unlock(&trx_ctxt->domain->trx_ctxt_lock, 1); - psmx2_trx_ctxt_disconnect_peers(trx_ctxt); + if (psmx2_env.disconnect) + psmx2_trx_ctxt_disconnect_peers(trx_ctxt); if (trx_ctxt->am_initialized) psmx2_am_fini(trx_ctxt); -- 2.14.4