...
| Code Block |
|---|
/* lnet structure which will keep a list of UDSPs */
struct lnet {
...
list_head ln_udsp_list;
...
}
/* each NID range is defined as net_id and an ip range */
struct lnet_ud_nid_descr {
__u32 ud_net_id;
list_head ud_ip_range;
}
/* UDSP action types */
enum lnet_udsp_action_type {
EN_LNET_UDSP_ACTION_PRIORITYNONE = 0,
EN_LNET_UDSP_ACTION_NONE = 1PRIORITY,
}
/*
* a UDSP rule can have up to three user defined NID descriptors
* - src: defines the local NID range for the rule
* - dst: defines the peer NID range for the rule
* - rte: defines the router NID range for the rule
*
* An action union defines the action to take when the rule
* is matched
*/
struct lnet_udsp {
list_head udsp_on_list;
__u32 idx;
lnet_ud_nid_descr *udsp_src;
lnet_ud_nid_describe *udsp_dst;
lnet_ud_nid_descr *udsp_rte;
enum lnet_udsp_action_type udsp_action_type;
union udsp_action {
__u32 udsp_priority;
};
}
/* The rules are flattened in the LNet structures as shown below */
struct lnet_net {
...
/* defines the relative priority of this net compared to others in the system */
__u32 net_priority;
...
}
struct lnet_remotenetni {
...
/* defines the relative priority of thethis remote netNI compared to other NIs in remotethe netsnet */
__u32 lrnni_priority;
...
}
struct lnet_peer_ninet {
...
/* defines the relative priority of this NIpeer net compared to other NIsothers in the netsystem */
__u32 nilpn_priority;
...
}
struct lnet_peer_ni {
...
/* defines the relative peer_ni priority compared to other peer_nis in the peer */
__u32 lpni_priority;
/* defines the list of local NID(s) (>=1) which should be used as the source */
union lpni_pref {
lnet_nid_t nid;
lnet_nid_t *nids;
}
/*
* defines the list of router NID(s) to be used when sending to this peer NI
* if the peer NI is remote
*/
lnet_nid_t *lpni_rte_nids;
...
}
/* UDSPs will be passed to the kernel via IOCTL */
#define IOC_LIBCFS_ADD_UDSP _IOWR(IOC_LIBCFS_TYPE, 106, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_DEL_UDSP _IOWR(IOC_LIBCFS_TYPE, 107, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_UDSP _IOWR(IOC_LIBCFS_TYPE, 108, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_UDSP_SIZE _IOWR(IOC_LIBCFS_TYPE, 108, IOCTL_CONFIG_SIZE) |
...
| Code Block |
|---|
struct lnet_range_expr {
__u32 re_lo;
__u32 re_hi;
__u32 re_stride;
};
struct lnet_ioctl_udsp_descr_hdr {
/*
* The literals SRC, DST and RTE are encoded
* here.
*/
__u16u32 ud_descr_type;
__u16u32 ud_descr_count;
};
/* each matching expression in the UDSP is described with this */
struct lnet_ioctl_udsp_descr {
struct lnet_ioctl_udsp_descr_hdr udiud_src_hdr;
union action iou_action {
__u32 priority;
}
char iud_bulk[0];
};
struct lnet_ioctl_udsp {
struct libcfs_ioctl_hdr iou_hdr;
__u32 iou_idx;
__u32 iou_action_type
union actionchar iou_action {
__u32 priority;
}
__u32 iou_src_dot_expr_count;
__u32 iou_dst_dot_expr_count;
__u32 iou_rte_dot_expr_count;
char iou_bulk[0];
};bulk[0];
}; |
The address is expressed as a list of cfslnet_range_expr. These need to be marshalled. For IP address there are 4 of these structures. Other type of addresses can have a different number. As an example, gemini will only have one. The corresponding iou_[src|dst|rte]_dot_expr_count is set to the number of expressions describing the address. Each expression is then flattened in the structure. They have to be flattened in the order defined: SRC, DST, RTEmarshalled structure will look like this:
| Code Block |
|---|
[lnet_ioctl_udsp | lnet_ioctl_udsp_descr | lnet_range_expr ] |
They matching expressions need to follow this exact order: SRC, DST, RTE.
It's worth noting that lnet_ioctl_udsp_descr_hdr.ud_descr_type is a 32 bit field which gets set to the literal SRC, DST or RTE depending on what it's describing. Using a 4 byte value that contains ascii letters, which serve as magic values, can be help in rebuilding a system's information in case of corruption.
The kernel will receive the marshalled data and will form its internal structures. The functions to marshal and de-marshal unmarshal should be straight forward. Note that user space and kernel space use the same structures. These structure will be defined in a common location. For this reason the functions to marshal and de-marshal will be sharedand unmarshal will have the same interface and same logic. However, because they are needed for both kernel and user space, they will need to be duplicated in two locations. One in the kernel path to be compiled in the kernel, and the other in user space to be compiled for use in user space utilities.
Marshalling and
...
unmarshalling functions
Common functions that can be called from user space and kernel space will be created to marshal and de-marshal the UDSPs:
| Code Block |
|---|
/* * lnet_get_udsp_size() * Given the UDSP return the size needed to flattenstore the marshalled UDSP */ int lnet_get_udsp_size(struct lnet_udsp *udsp); /* * lnet_udsp_marshal() * Marshal the UDSP pointed to by udsp into the memory block that is provided. In order for this * API to work in both Kernel and User space the bulk pointer needs to be passed in. When this API * is called in the kernel, it is expected that the bulk memory is allocated in userspace. This API * is intended to be called from the kernel to marshal the rules before sending it to user space. * It will also be called from user space to marshal the udsp before sending to the kernel. * udsp [IN] - udsp to marshal * bulk_size [IN] - size of bulk. * bulk [OUT] - allocated block of memory where the serialized rules are stored. */ int lnet_udsp_marshal(struct lnet_udsp *udsp, __u32 *bulk_size, void __user *bulk); /* * lnet_udsp_demarshal() * Given a bulk containing a single UDSP, demarshal and populate the udsp structure provided * bulk [IN] - memory block containing serialized rules * bulk_size [IN] - size of bulk memory block * udsp [OUT] - preallocated struct lnet_udsp */ int *lnet_udsp_demarshal(void __user *bulk, __u32_bulk_size, struct lnet_udsp *usdp); |
...
Client sets A and B are all configured on the same LNet network, example o2ib. The servers are on a different LNet network, o2ib2. But due to the underlying network topology it is more efficient to route traffic from Client set A over Router set A and Client set B over Router set B. The green links are wider than the red links. UDSPs can be configured on the clients to specify the preferred set of router NIDs.
Fine Grained Routing
TODO: needs to be filled out.
Node Types
Based on
| Jira | ||||||
|---|---|---|---|---|---|---|
|
...