...
| Code Block |
|---|
/*
* describes a network:
* nw_id: can be the base network name, ex: o2ib or a full network id, ex: o2ib3.
* nw_expr: an expression to describe the variable part of the network ID
* ex: tcp* - all tcp networks
* ex: tcp[1-5] - resolves to tcp1, tcp2, tcp3, tcp4 and tcp5.
*/
struct lustre_lnet_network_descr {
__u32 nw_id;
struct cfs_expr_list *nw_expr;
};
/*
* lustre_lnet_network_rule
* network rule
* nwr_link - link on rule list
* nwr_descr - network descriptor
* nwr_priority - priority of the rule.
* nwr_id - ID of the rule assigned while deserializing if not already assigned.
*/
struct lustre_lnet_network_rule {
struct list_head nwr_link;
struct lustre_lnet_network_descr nwr_descr;
int__u32 nwr_priority;
__u32 nwr_id
};
/*
* lustre_lnet_nid_range_descr
* nidr_expr - expression describing the IP part of the NID
* nidr_nw - a description of the network
*/
struct lustre_lnet_nidr_range_descr {
struct list_head nidr_expr;
struct lustre_lnet_network_descr nidr_nw;
};
/*
* lustre_lnet_nidr_range_rule
* Rule for the nid range.
* nidr_link - link on the rule list
* nidr_descr - descriptor of the nid range
* priority - priority of the rule
*/
struct lustre_lnet_nidr_range_rule {
struct list_head nidr_link;
struct lustre_lnet_nidr_range_descr nidr_descr;
int priority;
};
/*
* lustre_lnet_p2p_rule
* Rule for the peer to peer.
* p2p_link - link on the rule list
* p2p_src_descr - source nid range
* p2p_dst_descr - destination nid range
* priority - priority of the rule
*/
struct lustre_lnet_p2p_rule {
struct list_head p2p_link;
struct lustre_lnet_nidr_range_descr p2p_src_descr;
struct lustre_lnet_nidr_range_descr p2p_dst_descr;
int priority;
}; |
...
| Code Block |
|---|
enum lnet_sel_rule_type {
LNET_SEL_RULE_NET = 0,
LNET_SEL_RULE_NID,
LNET_SEL_RULE_P2P
};
struct lnet_expr {
__u32 ex_lo;
__u32 ex_hi;
__u32 ex_stride;
};
struct lnet_net_descr {
__u32 nsd_net_id;
struct lnet_expr nsd_expr;
};
struct lnet_nid_descr {
struct lnet_expr nir_ip[4];
struct lnet_net_descr nir_net;
};
struct lnet_ioctl_net_rule {
struct lnet_net_descr nsr_descr
__u32 nsr_prio;
__u32 nsr_id
};
struct lnet_ioctl_nid_rule {
struct lnet_nid_descr nir_descr;
__32 nir_prio
};
__u32 nir_id;
};
sturct lnet_ioctl_net_p2p_rule {
struct lnet_nid_descr p2p_src_descr;
struct lnet_nid_descr p2p_dst_descr;
__u32 p2p_prio;
__u32 p2p_id;
};
/*
* lnet_ioctl_rule_blk
* describes a set of rules of the same type to transfer to the kernel.
* rule_hdr - header information describing the total size of the transfer
* rule_type - type of rules included
* rule_size - size of each individual rule. Can be used to check backwards compatibility
* rule_count - number of rules included in the bulk.
* rule_bulk - pointer to the user space allocated memory containing the rules.
*/
struct lnet_ioctl_rule_blk {
struct libcfs_ioctl_hdr rule_hdr;
enum lnet_sel_rule_type rule_type;
__u32 rule_size;
__u32 rule_count;
__user void *rule_bulk;
}; |
Serialization/Deserialization
...
| Gliffy Diagram | ||||
|---|---|---|---|---|
|
The rest of the rules will look very similar as above, except that the list of rules included in the memory pointed to by rule_bulk is going to contain the pertinent structure format.
On the receiving end the process is reversed to rebuild the linked lists.
Common functions that can be called from user space and kernel space will be created to serialize and deserialize the rules:
| Code Block |
|---|
TBD/* -* int lnet_sel_rule_serialize() int lnet_sel_rule_deserialize() |
Policy Application
A new IOCTL will need to be added: IOC_LIBCFS_ADD_RULES and IOC_LIBCFS_GET_RULES.
The handler for the IOC_LIBCFS_ADD_RULES will perform the following operations:
- Rebuild the rules
- Iterate through all the local networks and apply the rules
- Iterate through all the peers and apply the rules.
- Store the rules
Application of the rules will be done under api_mutex_lock and the exclusive lnet_net_lock to avoid having the peer or local net lists changed while the rules are being applied.
There will be different lists one for each rule type. The rules are iterated and applied whenever:
- A local network interface is added.
- A remote peer/peer_net/peer_ni is added
...
* alocate enough memory to store the serialized rules. Fail if not enough memory available.
* Otherwise serialize the rules and return a pointer to the bulk and the size allocated.
* It is the responsibility of the caller of this API to free the memory allocated for the
* bulk rules.
* rules [IN] - rules to be serialized
* rule_type [IN] - rule type to be serialized
* bulk_size [OUT] - size of memory allocated.
* bulk [OUT] - allocated block of memory where the serialized rules are stored.
*/
int lnet_sel_rule_serialize(struct list_head *rules, enum lnet_sel_rule_type rule_type, __u32 *bulk_size, void **bulk);
/*
* lnet_sel_rule_deserialize()
* Given a bulk of rule_type rules, deserialize and append rules to the linked
* list passed in. Each rule is assigned an ID > 0 if an ID is not already assigned
* bulk [IN] - memory block containing serialized rules
* bulk_size [IN] - size of bulk memory block
* rule_type [IN] - type of rule to deserialize
* rules [OUT] - linked list to append the deserialized rules to
*/
int lnet_sel_rule_deserialize(void *bulk, __u32_bulk_size, enum lnet_sel_rule_type rule_type, struct list_head *rules); |
Policy IOCTL Handling
Three new IOCTLs will need to be added: IOC_LIBCFS_ADD_RULES, IOC_LIBCFS_DEL_RULES, and IOC_LIBCFS_GET_RULES.
IOC_LIBCFS_ADD_RULES
The handler for the IOC_LIBCFS_ADD_RULES will perform the following operations:
- call
lnet_sel_rule_deserialize() - Iterate through all the local networks and apply the rules
- Iterate through all the peers and apply the rules.
- splice the new list with the existing rules in the process resolving any conflicts. New rules always trump old rules (no pun intended).
Application of the rules will be done under api_mutex_lock and the exclusive lnet_net_lock to avoid having the peer or local net lists changed while the rules are being applied.
There will be different lists one for each rule type. The rules are iterated and applied whenever:
- A local network interface is added.
- A remote peer/peer_net/peer_ni is added
IOC_LIBCFS_DEL_RULES
The handler for IOC_LIBCFS_DEL_RULES will delete the rules which the ID of the rule passed in or if no ID is passed in then the exact rule is matched.
There will be no other actions taken on rule removal. Once the rule has been applied it will remain applied until the objects it has been applied to are removed.
IOC_LIBCFS_GET_RULES
The handler for the IOC_LIBCFS_GET_RULES will call lnet_sel_rule_serialize() on the master linked list for the type of the rule identified in struct lnet_ioctl_rule_bulk.
It fills as many rules as can fit in the bulk by examining the result of (rule_hdr.ioc_len - sizeof(struct lnet_ioctl_rule_blk)) / rule_size . That number of rules are serialized and placed in the bulk memory block. The IOCTL returns ENOSPC if the given bulk memory block is not enough to hold all the rules. It assigns the number of rules serialized in rule_count. The userspace process can make another call with the number of rules to skip set in rule_count. The handler will skip that indicated number of rules and fill the new bulk memory with the remaining rules. This process can be repeated until all the rules are returned to userspace.
In userpsace the rules are printed in the same YAML format as they are parsed in.
Policy Application
Net Rule
The net which matches the rule will be assigned the priority defined in the rule.
NID Rule
The local_ni or the peer_ni which match that NID will be assigned the priority defined in the rule.
Peer to Peer Rule
NIDs for local_nis matching the source NID pattern in the peer to peer rule will be added to a list on the peer_nis which NID match the destination NID pattern.
Selection Algorithm Integration
Currently the selection algorithm performs its job in the following general steps:
- determine the best network to communicate to the destination peer by looking at all the LNet networks the peer is on.
- for each network go through all the local NIs and keep track of the best_ni based on:
- NUMA distance
- available credits
- round robin
- As you visit each network select the best_ni from the network with the highest priority. Skip any networks which are lower priority than the "active" one. If there are multiple networks with the same priority then the best_ni is selected from amongst them using the stated criteria.
- Once the best_ni has been selected, select the best peer_ni available by going through the list of the peer_nis on the selected network. Select the peer_ni based on:
- if the NID of the best_ni is on the preferred local NID list of the peer_ni. It is placed there through the application of the peer to peer rules.
- available credits
- round robin