Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. iterate over all the networks that a peer can be reached on and select the best local network
    1. The remote network with the highest priority is examined
      1. Network Rule
    2. The local network with the highest priority is selected
      1. Network Rule
  2. If the peer is a remote peer and has no local networks,
    1. then select the remote peer network with the highest priority
      1. Network Rule
    2. Select the highest priority remote peer_ni on the network selected
      1. NID Rule
    3. Now that the peer's network and NI are decided select the router in round robin from the peer NI's preferred router list
      1. Router Rule
  3. Otherwise for local peers, select the peer_ni from the peer.
    1. highest priority peer NI is selected
      1. NID Rule
  4. Select the local NI to send from
    1. If the peer has a set of preferred local NIs select the highest priority from them. Otherwise select in round robin
      1. NID Pair Rule

Kernel Design

Anchor
InKernelStructures
InKernelStructures
In Kernel Structures

Code Block
/* lnet structure which will keep a list of UDSPs */
struct lnet {
	...
	list_head ln_udsp_list;
	...
}

/* each NID range is defined as net_id and an ip range */
struct lnet_ud_nid_descr {
	__u32 ud_net_id;
	list_head ud_ip_range;
}

/* UDSP action types */
enum lnet_udsp_action_type {
	EN_LNET_UDSP_ACTION_PRIORITY = 0,
	EN_LNET_UDSP_ACTION_NONE = 1,
}

 /*
 * a UDSP rule can have up to three user defined NID descriptors
 * 		- src: defines the local NID range for the rule
 * 		- dst: defines the peer NID range for the rule
 * 		- rte: defines the router NID range for the rule
 *
 * An action union defines the action to take when the rule
 * is matched
 */ 
struct lnet_udsp {
	list_head udsp_on_list;
	__u32 idx;
	lnet_ud_nid_descr *udsp_src;
	lnet_ud_nid_describe *udsp_dst;
	lnet_ud_nid_descr *udsp_rte;
	enum lnet_udsp_action_type udsp_action_type;
	union udsp_action {
		__u32 udsp_priority;
	};
}

/* The rules are flattened in the LNet structures as shown below */
struct lnet_net {
...
	/* defines the relative priority of this net compared to others in the system */
	__u32 net_priority;
...
}


struct lnet_remotenet {
...
	/* defines the relative priority of the remote net compared to other remote nets */
	__u32 lrn_priority;
...
}

struct lnet_ni {
...
	/* defines the relative priority of this NI compared to other NIs in the net */
	__u32 ni_priority;
...
}

struct lnet_peer_ni {
...
	/* defines the relative peer_ni priority compared to other peer_nis in the peer */
	__u32 lpni_priority;

	/* defines the list of local NID(s) (>=1) which should be used as the source */
	union lpni_pref {
		lnet_nid_t nid;
		lnet_nid_t *nids;
	}

	/*
	 *	defines the list of router NID(s) to be used when sending to this peer NI
	 *	if the peer NI is remote
     */
	lnet_nid_t *lpni_rte_nids;
...
}

/* UDSPs will be passed to the kernel via IOCTL */
#define IOC_LIBCFS_ADD_UDSP _IOWR(IOC_LIBCFS_TYPE, 106, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_DEL_UDSP _IOWR(IOC_LIBCFS_TYPE, 107, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_UDSP _IOWR(IOC_LIBCFS_TYPE, 108, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_UDSP_SIZE _IOWR(IOC_LIBCFS_TYPE, 108, IOCTL_CONFIG_SIZE)

Kernel IOCTL Handling

Code Block
/*
 * api-ni.c will be modified to handle adding a UDSP 
 * All UDSP operations are done under mutex and exclusive spin
 * lock to avoid constructs changing during application of the
 * policies.
 */
int
LNetCtl(unsigned int cmd, void *arg)
{
...
	case IOC_LIBCFS_ADD_UDSP: {
		struct lnet_ioctl_config_udsp *config_udsp = arg;
		mutex_lock(&the_lnet.ln_api_mutex);
		/*
		 * add and do initial flattening of the UDSP into
		 * internal structures.
		 */
		rc = lnet_add_and_flatten_udsp(config_udsp);
		mutex_unlock(&the_lnet.ln_api_mutex);
		return rc;
	}

	case IOC_LIBCFS_DEL_UDSP: {
		struct lnet_ioctl_config_udsp *del_udsp = arg;
		mutex_lock(&the_lnet.ln_api_mutex);
		/*
		 * delete the rule identified by index
		 */
		rc = lnet_del_udsp(del_udsp->udsp_idx);
		mutex_unlock(&the_lnet.ln_api_mutex);
		return rc;
	}

	case IOC_LIBCFS_GET_UDSP_SIZE: {
		struct lnet_ioctl_config_udsp *get_udsp = arg;
		mutex_lock(&the_lnet.ln_api_mutex);
		/*
		 * get the UDSP size specified by idx
		 */
		rc = lnet_get_udsp_num(get_udsp);
		mutex_unlock(&the_lnet.ln_api_mutex);
		return rc
	}

	case IOC_LIBCFS_GET_UDSP: {
		struct lnet_ioctl_config_udsp *get_udsp = arg;
		mutex_lock(&the_lnet.ln_api_mutex);
		/*
		 * get the udsp at index provided. Return -ENOENT if
		 * no more UDSPs to get
		 */
		rc = lnet_add_udsp(get_udsp, get_udsp->udsp_idx);
		mutex_unlock(&the_lnet.ln_api_mutex);
		return rc
	}
...
}

IOC_LIBCFS_ADD_UDSP

The handler for the IOC_LIBCFS_ADD_RULES will perform the following operations:

...

  1. A local network interface is added.
  2. A remote peer/peer_net/peer_ni is added

IOC_LIBCFS_DEL_UDSP

The handler for IOC_LIBCFS_DEL_RULES will

...

When the updated rule set is applied all traces of deleted or modified rules are removed from the LNet constructs.

IOC_LIBCFS_GET_UDSP_SIZE

Return the size of the UDSP specified by index.

IOC_LIBCFS_GET_UDSP

The handler for the IOC_LIBCFS_GET_RULES will serialize the rules on the UDSP list.

...

TODO: Another option is to have IOC_LIBCFS_GET_UDSP_NUM, which gets the total size needed for all UDSPs,  and then user space can make one call to get all the UDSPs. However, this complicates the marshaling function. The user space will also need to handle cases where the size of the UDSPs are too large for one call. The above proposal will do more iterations to get all the UDSPs, but the code should be simpler. And since the number of UDSPs are expected to be small, the above proposal should be fine.

Kernel Selection Algorithm Modifications

Code Block
/*
 * select an NI from the Nets with highest priority
 */
struct lnet_ni *
lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt)
{
...
	list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
	...
		struct lnet_net *net;
		net = lnet_get_net_locked(peer_net->lpn_net_id);
		if (!net)
			continue

		/*
		 * look only at the NIs with the highest priority and disregard
		 * nets which have lower priority. Nets with equal priority are
		 * examined and the best_ni is selected from amongst them.
		 */
		net_prio = net->net_priority;
		if (net_prio > best_net_prio)
			continue;
		else if (net_prio < best_net_prio) {
			best_net_prio = net_prio;
			best_ni = NULL;
		}
		best_ni = lnet_find_best_ni_on_spec_net(best_ni, peer,
											    best_peer_net, md_cpt, false);
	...
	}
...
}

/*
 * select the NI with the highest priority 
 */
static struct lnet_ni *
lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *best_ni,
				 struct lnet_peer *peer, struct lnet_peer_net *peer_net,
				 int md_cpt)
{
...
	ni_prio = ni->ni_priority;

	if (ni_fatal) {
		continue;
	} else if (ni_healthv < best_healthv) {
		continue;
	} else if (ni_healthv > best_healthv) {
		best_healthv = ni_healthv;
		if (distance < shortest_distance)
			shortest_distance = distance;
	/*
	 * if this NI is lower in priority than the one already set then discard it
 	 * otherwise use it and set the best prioirty so far to this NI's.
	 */
	} else if ni_prio > best_ni_prio) {
		continue;
	} else if (ni_prio < best_ni_prio)
		best_ni_prio = ni_prio;
	}

...
}

/*
 * When a UDSP rule associates local NIs with remote NIs, the list of local NIs NIDs
 * is flattened to a list in the associated peer_NI. When selecting a peer NI, the
 * peer NI with the corresponding preferred local NI is selected.
 */
bool
lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
{
...
}

/*
 * select the peer NI with the highest priority first and then the
 * preferred one
 */ 
static struct lnet_peer_ni *
lnet_select_peer_ni(struct lnet_send_data *sd, struct lnet_peer *peer,
					struct lnet_peer_net *peer_net)
{
...
	ni_is_pref = lnet_peer_is_pref_nid_locked(lpni, best_ni->ni_nid);

	lpni_prio = lpni->lpni_priority;

	if (lpni_healthv < best_lpni_healthv)
		continue;
	/*
	 * select the NI with the highest priority.
	 */
	else if lpni_prio > best_lpni_prio)
		continue;
	else if (lpni_prio < best_lpni_prio)
		best_lpni_prio = lpni_prio;
	/*
	 * select the NI which has the best_ni's NID in its preferred list
	 */
	else if (!preferred && ni_is_pref)
		preferred = true;
...
} 

User Space Design

UDSP Marshaling

After a UDSP is parsed in user space it needs to be marshaled and sent to the kernel. The kernel will de-marshal the data and store it in its own data structures. The UDSP is formed of the following pieces of information:

...

Gliffy Diagram
nameDataFlow
pagePin2

DLC APIs

The DLC library will provide the outlined APIs to expose a way to create, delete and show rules.

...