...
Code Block |
---|
lnet_ni lnet_get_best_ni(local_net, cur_ni, md_cpt) { local_net = get_local_net(peer_net) for each ni in local_net { health_value = lnet_local_ni_health(ni) /* select the best health value */ if (health_value < best_health_value) continue distance = get_distance(md_cpt, dev_cpt) /* select the shortest distance to the MD */ if (distance < lnet_numa_range) distance = lnet_numa_range if (distance > shortest_distance) continue else if distance < shortest_distance distance = shortest_distance /* select based on the most available credits */ else if ni_credits < best_credits continue /* if all is equal select based on round robin */ else if ni_credits == best_credits if best_ni->ni_seq <= ni->ni_seq continue } } /* * lnet_select_pathway() will be modified to add a peer_nid parameter. * This parameter indicates that the peer_ni is predetermined, * and is * identified by the NID provided. The peer_nid parameter is the * next-hop NID, which can be the final destination or * the next-hop * router. If that peer_NID is not healthy then another peer_NID is * selected as per the current algorithm. This will * force the * algorithm to prefer the peer_ni which was selected in the initial * message sending. The peer_ni NID is stored in * the message. This * new parameter extends the concept of the src_nid, which is provided * to lnet_select_pathway() to inform it * that the local NI is * predetermined. */ /* on resend */ enum lnet_error_type { LNET_LOCAL_NI_DOWN, /* don't use this NI until you get an UP */ LNET_LOCAL_NI_UP, /* start using this NI */ LNET_LOCAL_NI_SEND_TIMOUT, /* demerit this NI so it's not selected immediately, provided there are other healthy interfaces */ LNET_PEER_NI_NO_LISTENER, /* there is no remote listener. demerit the peer_ni and try another NI */ LNET_PEER_NI_ADDR_ERROR, /* The address for the peer_ni is wrong. Don't use this peer_NI */ LNET_PEER_NI_UNREACHABLE, /* temporarily don't use the peer NI */ LNET_PEER_NI_CONNECT_ERROR, /* temporarily don't use the peer NI */ LNET_PEER_NI_CONNECTION_REJECTED /* temporarily don't use the peer NI */ }; static int lnet_handle_send_failure_locked(msg, local_nid, status) { switch (status) /* * LNET_LOCAL_NI_DOWN can be received without a message being sent. * In this case msg == NULL and it is sufficient to update the health * of the local NI */ case LNET_LOCAL_NI_DOWN: LASSERT(!msg); local_ni = lnet_get_local_ni(msg->local_nid) if (!local_ni) return /* flag local NI down */ lnet_set_local_ni_health(DOWN) break; case LNET_LOCAL_NI_UP: LASSERT(!msg); local_ni = lnet_get_local_ni(msg->local_nid) if (!local_ni) return /* flag local NI down */ lnet_set_local_ni_health(UP) /* This NI will be a candidate for selection in the next message send */ break; ... } static int lnet_complete_msg_locked(msg, cpt) { status = msg->msg_ev.status if (status != 0) rc = lnet_handle_send_failure_locked(msg, status) if rc == 0 return /* continue as currently done */ } |
...