...
| Code Block |
|---|
static inline int
lnet_get_peer_net_health(struct lnet_peer_net *lpn)
{
/*
* The peer net is highest health value of all lpnis in the peer
* This is a value which will be maintained when the lpni health
* value is updated.
*/
return lpn->lpn_health;
}
/*
* select an NI from the Nets with highest priority
*/
struct lnet_ni *
lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt)
{
...
/*
* All criteria are defaulted at the beginning of the loop such that
* the first peer_net is selected and then overwritten if there is a
* better peer_net found
*/
list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
...
struct lnet_net *net;
best_peer_net = peer_net;
/* get the the health of a peer net */
peer_net_health = lnet_get_peer_net_health(peer_net);
if (peer_net_health < best_peer_net_health)
continue;
else if (peer_net_health > best_peer_net_health)
best_peer_net_health = peer_net_health;
/* consider only highest priority peer_net */
else if (peer_net_prio > best_peer_net_prio)
continue;
else if (peer_net_prio < best_peer_net_prio)
peer_net_prio = best_peer_net_prio;
net = lnet_get_net_locked(peer_net->lpn_net_id);
if (!best_net)
continue
net_prio = net->net_priority;
/*
* look only at the Nets with the highest priority and disregard
* nets which have lower priority. Nets with equal priority are
* examined and the best_ni is selected from amongst them.
*/
net_health = lnet_get_local_net_health(net)
if (net_health < best_net_health)
continue;
else if (net_health > best_net_health)
best_net_health = net_health;
else if (net_prio > best_net_prio)
continue;
else if (net_prio < best_net_prio)
best_net_prio = net_prio;
best_net = net
...
}
if (!best_net || !best_peer_net)
goto fail;
best_ni = lnet_find_best_ni_on_spec_net(NULL, peer,
best_peer_net, md_cpt, false);
...
}
/*
* select the NI with the highest priority
*/
static struct lnet_ni *
lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *best_ni,
struct lnet_peer *peer, struct lnet_peer_net *peer_net,
int md_cpt)
{
...
ni_prio = ni->ni_priority;
if (ni_fatal) {
continue;
} else if (ni_healthv < best_healthv) {
continue;
} else if (ni_healthv > best_healthv) {
best_healthv = ni_healthv;
if (distance < shortest_distance)
shortest_distance = distance;
/*
* if this NI is lower in priority than the one already set then discard it
* otherwise use it and set the best priority so far to this NI's.
* keep track of the shortest distance because it is tested later
*/
} else if ni_prio > best_ni_prio) {
continue;
} else if (ni_prio < best_ni_prio)
best_ni_prio = ni_prio;
if (distance < shortest_distance)
shortest_distance = distance;
}
...
}
/*
* When a UDSP rule associates local NIs with remote NIs, the list of local NIs NIDs
* is flattened to a list in the associated peer_NI. When selecting a peer NI, the
* peer NI with the corresponding preferred local NI is selected.
*/
bool
lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
{
...
}
/*
* select the peer NI with the highest priority first and then check
* if it's preferred.
*/
static struct lnet_peer_ni *
lnet_select_peer_ni(struct lnet_send_data *sd, struct lnet_peer *peer,
struct lnet_peer_net *peer_net)
{
...
ni_is_pref = lnet_peer_is_pref_nid_locked(lpni, best_ni->ni_nid);
lpni_prio = lpni->lpni_priority;
if (lpni_healthv < best_lpni_healthv)
continue;
/*
* select the NI with the highest priority.
*/
else if lpni_prio > best_lpni_prio)
continue;
else if (lpni_prio < best_lpni_prio)
best_lpni_prio = lpni_prio;
/*
* select the NI which has the best_ni's NID in its preferred list
*/
else if (!preferred && ni_is_pref)
preferred = true;
...
}
static int
lnet_handle_find_routed_path(struct lnet_send_data *sd,
lnet_nid_t dst_nid,
struct lnet_peer_ni **gw_lpni,
struct lnet_peer **gw_peer)
{
...
lpni = lnet_find_peer_ni_locked(dst_nid);
peer = lpni->lpni_net->lpn_peer;
list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
peer_net_priority = peer_net->lpn_priority;
if (peer_net_priority > peer_net_best_priority)
continue;
else if (peer_net_priority < peer_net_best_priority)
peer_net_best_priority = peer_net_priority;
lpni = NULL;
while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni)) {
/* find best gw for this lpni */
lpni_prio = lpni->lpni_priority;
if (lpni_prio > lpni_best_prio)
continue;
else if (lpni_prio < lpni_best_prio)
lpni_best_prio = lpni_prio;
/*
* lnet_find_route_locked will be changed to consider the list of
* gw NIDs on the lpni
*/
gw = lnet_find_route_locked(NULL, lpni, sd->sd_rtr_nid);
...
/*
* if gw is MR then select best_NI. Increment the sequence number of
* the gw NI for Round Robin selection.
*/
...
}
}
...
}/*
* For Non-MR peers we always want to use the preferred NID because
* if we don't the non-MR peer will have problems when it receives
* messages from a different NI other than the one it's expecting.
* However, for MR cases we need to adhere to the rule that health
* always trumps all other criteria. In the preferred NIDs case, if
* we have a healthier peer-NI which doesn't have the local_ni on its
* preferred list, then we should choose it.
*
* This scenario is handled here: lnet_handle_send_case_locked()
*/
ni_is_pref = lnet_peer_is_pref_nid_locked(lpni, best_ni->ni_nid);
lpni_prio = lpni->lpni_priority;
if (lpni_healthv < best_lpni_healthv)
continue;
/*
* select the NI with the highest priority.
*/
else if lpni_prio > best_lpni_prio)
continue;
else if (lpni_prio < best_lpni_prio)
best_lpni_prio = lpni_prio;
/*
* select the NI which has the best_ni's NID in its preferred list
*/
else if (!preferred && ni_is_pref)
preferred = true;
...
}
static int
lnet_handle_find_routed_path(struct lnet_send_data *sd,
lnet_nid_t dst_nid,
struct lnet_peer_ni **gw_lpni,
struct lnet_peer **gw_peer)
{
...
lpni = lnet_find_peer_ni_locked(dst_nid);
peer = lpni->lpni_net->lpn_peer;
list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
peer_net_priority = peer_net->lpn_priority;
if (peer_net_priority > peer_net_best_priority)
continue;
else if (peer_net_priority < peer_net_best_priority)
peer_net_best_priority = peer_net_priority;
lpni = NULL;
while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni)) {
/* find best gw for this lpni */
lpni_prio = lpni->lpni_priority;
if (lpni_prio > lpni_best_prio)
continue;
else if (lpni_prio < lpni_best_prio)
lpni_best_prio = lpni_prio;
best_lpni = lpni;
...
}
}
...
/*
* lnet_find_route_locked will be changed to consider the list of
* gw NIDs on the lpni
*/
gw = lnet_find_route_locked(NULL, best_lpni, sd->sd_rtr_nid);
...
/*
* if gw is MR then select best_NI. Increment the sequence number of
* the gw NI for Round Robin selection.
*/
} |
Selection Algorithm Notes
When examining the peer_net, we need to examin its health. The health of a peer_net can be derived from the health of the NIs in that peer_net. We can have a health value in the peer_net, which is set to the best health value of all the peer_NIs in that peer_Net. When we are selecting the peer_net in lnet_find_best_ni_on_local_net(), then we test that health value. This logic can be implemented for local networks as well. The loop will then select the best pair of peer and local Nets, then the best_ni is selected from the best network outside the loop.
For Non-MR peers we always want to use the preferred NID because if we don't the non-MR peer will have problems when it receives messages from a different NI other than the one it's expecting. However, for MR cases we need to adhere to the rule that health always trumps all other criteria. In the preferred NIDs case, if we have a healthier peer-NI which doesn't have the local_ni on its preferred list, then we should choose i
We need to select the best peer_net (highest priority). Then from that peer_net we select the peer_ni with the highest priority, and then if that peer_ni has a list of preferred routers, then we select the route to use from this list. For remote peer-nis we never ding the health value because we never send messages directly to it. So if there is a failure to send, we ding the router's NI. The only break to this rule, is for REPLY/ACK case. If we dont' receive the REPLY/ACK, whos's fault is it? the remote peer? or the router? To make things simple, we always blame the route.
distance criteria in the selection of the best local_ni, should be ranked below the priority assigned by the admin.
User Space Design
UDSP Marshaling
...
| Code Block |
|---|
/* each NID range is defined as net_id and an ip range */
struct lnet_ud_nid_descr {
__u32 ud_net_id;
list_head ud_ip_range;
}
/* UDSP action types */
enum lnet_udsp_action_type {
EN_LNET_UDSP_ACTION_PRIORITYNONE = 0,
EN_LNET_UDSP_ACTION_NONE = 1PRIORITY,
}
/*
* a UDSP rule can have up to three user defined NID descriptors
* - src: defines the local NID range for the rule
* - dst: defines the peer NID range for the rule
* - rte: defines the router NID range for the rule
*
* An action union defines the action to take when the rule
* is matched
*/
struct lnet_udsp {
list_head udsp_on_list;
__u32 idx;
enum lnet_udsp_action_type udsp_action_type;
lnet_ud_nid_descr *udsp_src;
lnet_ud_nid_describe union defines the action to take when the rule
* is matched
*/
struct lnet_udsp {
list_head udsp_on_list*udsp_dst;
lnet_ud_nid_descr *udsp_rte;
union udsp_action {
__u32 udsp_priority;
};
} |
Marshaled Structures
| Code Block |
|---|
struct lnet_range_expr { __u32 re_lo; __u32 idxre_hi; lnet_ud_nid_descr *udsp_src; lnet_ud_nid_describe *udsp_dst; lnet_ud_nid_descr *udsp_rte; enum lnet_udsp_action_type udsp_action_type; union udsp_action { __u32 udsp_priority; }; } |
Marshaled Structures
| Code Block |
|---|
struct cfs_range_expru32 re_stride; }; struct lnet_ioctl_udsp_descr_hdr { /* * The literals SRC, DST and RTE are encoded * here. */ __u16 ud_descr_type; __u16 ud_descr_count; }; struct lnet_ioctl_udsp_descr { struct listlnet_head re_link; __u32 re_lo; __u32 re_hi; __u32 re_stride; }; struct lnet_ioctl_udsp {ioctl_udsp_descr_hdr ud_hdr; }; struct lnet_ioctl_udsp { struct libcfs_ioctl_hdr iou_hdr; __u32 iou_idx; enum lnet_udsp_action_typeu32 iou_action_type union action iou_action { __u32 priority; } __u32 iou_src_dot_expr_count; __u32 iou_dst_dot_expr_count; __u32 iou_rte_dot_expr_count; char iou_bulk[0]; }; |
...