Multi-Rail HLD
Logic to check the uniqueness of the NET, lnet_net_unique(), needs to be changed to check that the same NI is not being added twice in the same lnet_net
Each NI needs to have its own set of credits. Currently the LND uses the same set of credits for everything
Once the LND is started correctly, add the ni to the lnet_nets ni list.
Go over all lnet_nets and shutdown each NI on the lnet_net list.
assert that checks if ln_nis_zombie is empty is bad, since a previous singe NI shutdown could still be on the zombie list, it should be moved inside the section protected by lnet_net_lock
.
lnet_peer_tables_cleanup() - the entire peer structure will be redone
There is no longer a 1:1 mapping between net and ni, so each call site needs to be evaluated to see what the code should be doing there. In many cases, the lookup of an lnet_ni
can be replaced with the lookup of an lnet_net
.
Follow all places where this is called
!@+-< lnet_net2ni_locked
+-< lnet_net2ni
| +-< kiblnd_passive_connect – lookup of a single NID
| | +-< kiblnd_cm_callback
| +-< lnet_accept – lookup of a single NID
| | +-< lnet_acceptor
| +-< lnet_dyn_del_ni – takes a netid as a parameter, and would be more accurately called lnet_dyn_del_net()
, it implements the IOC_LIBCFS_DEL_NET
ioctl.
| | +-< lnet_dyn_unconfigure
| | | +-< lnet_ioctl
| +-< LNetCtl – there are two calls, both used to get the lnd for the ni. Any ni on the net works for that, as does a lookup of the lnet_net
, provided it has an lnd pointer (all lnet_ni
attached to an lnet_net
use the same LND).
| | +-< lnet_ioctl
| | +-< ptlrpc_expire_one_request
| | | +-< ptlrpc_check_set
| | | +-< ptlrpc_expired_set
| | | +-< ctx_refresh_timeout
| | +-< ptlrpc_uuid_to_peer
| | | +-< ptlrpc_uuid_to_connection
+-< lnet_islocalnet – used to decide whether the network is local (direct-attached), a lookup of the lnet_net
works for this
| +-< lnet_add_route
| | +-< LNetCtl
| | | +-< lnet_ioctl
| | | +-< ptlrpc_expire_one_request
| | | +-< ptlrpc_uuid_to_peer
| | +-< lnet_parse_route
| | | +-< lnet_parse_route_tbs
+-< lnet_send – the call to lnet_net2ni_locked()
serves as the current version of the local ni selection algorithm.
| +-< lnet_parse_get
| | +-< lnet_parse_local
| | | +-< lnet_parse
| | | +-< delayed_msg_process
| +-< LNetPut
| | +-< srpc_post_active_rdma
| | | +-< srpc_post_active_rqtbuf
| | | +-< srpc_do_bulk
| | | +-< srpc_send_reply
| | +-< ptl_send_buf
| | | +-< ptlrpc_send_reply
| | | +-< ptl_send_rpc
| | +-< ptlrpc_start_bulk_transfer
| | | +-< target_bulk_io
| +-< LNetGet
| | +-< lnet_ping
| | | +-< LNetCtl
| | +-< lnet_ping_router_locked
| | | +-< lnet_router_checker
| | +-< srpc_post_active_rdma
| | | +-< srpc_post_active_rqtbuf
| | | +-< srpc_do_bulk
| | | +-< srpc_send_reply
| | +-< ptlrpc_start_bulk_transfer
| | | +-< target_bulk_io
| +-< lnet_complete_msg_locked
| | +-< lnet_finalize
| | | +-< kgnilnd_tx_done
| | | +-< kgnilnd_setup_rdma
| | | +-< kgnilnd_recv
| | | +-< kiblnd_tx_done
| | | +-< kiblnd_reply
| | | +-< kiblnd_recv
# @| | | +-< ksocknal_destroy_conn
# @| | | +-< ksocknal_tx_done
# @| | | +-< ksocknal_process_receive
# @| | | +-< lnet_ni_recv
# @| | | +-< lnet_ni_send
# @| | | +-< lnet_post_send_locked
# @| | | +-< lnet_drop_routed_msgs_locked
# @| | | +-< lnet_parse_get
# @| | | +-< lnet_parse
# @| | | +-< lnet_drop_delayed_msg_list
# @| | | +-< LNetPut
# @| | | +-< LNetGet
# @| | | +-< lolnd_recv
# @| | | +-< delayed_msg_process
! @+-< lnet_nid2peer_locked – part of the peer setup code, which will be extensively revised as the peer datastructures are changed. I'd be inclined to replace lp_ni
with lp_net
, and then the call becomes a lookup of an lnet_net
, as opposed to an lnet_ni
.
# |@+-< lnet_send
# |@| +-< lnet_parse_get
# |@| | +-< lnet_parse_local
# |@| +-< LNetPut
# |@| | +-< srpc_post_active_rdma
# |@| | +-< ptl_send_buf
# |@| | +-< ptlrpc_start_bulk_transfer
# |@| +-< LNetGet
# |@| | +-< lnet_ping
# |@| | +-< lnet_ping_router_locked
# |@| | +-< srpc_post_active_rdma
# |@| | +-< ptlrpc_start_bulk_transfer
# |@| +-< lnet_complete_msg_locked
# |@| | +-< lnet_finalize
# |@+-< lnet_parse
# |@| +-< kgnilnd_check_fma_rx
# |@| | +-< kgnilnd_process_conns
# |@| +-< kiblnd_handle_rx
# |@| | +-< kiblnd_rx_complete
# |@| | +-< kiblnd_handle_early_rxs
# |@| +-< ksocknal_process_receive
# |@| | +-< ksocknal_scheduler
# |@| +-< lolnd_send
# |@+-< lnet_debug_peer
# |@| +-< LNetCtl
# |@| | +-< lnet_ioctl
# |@| | +-< ptlrpc_expire_one_request
# |@| | +-< ptlrpc_uuid_to_peer
! |@+-< lnet_add_route
# | |@+-< LNetCtl
# | |@| +-< lnet_ioctl
# | |@| +-< ptlrpc_expire_one_request
# | |@| +-< ptlrpc_uuid_to_peer
! | |@+-< lnet_parse_route
! | | |@+-< lnet_parse_route_tbs
lnet_net2ni() will be called from the LND. this has to be changed to get the right NI under the new structure.
1 601 lnet/include/lnet/lib-types.h <<GLOBAL>>
struct list_head ln_nis;
2 565 lnet/lnet/api-ni.c <<lnet_prepare>>
INIT_LIST_HEAD(&the_lnet.ln_nis);
3 640 lnet/lnet/api-ni.c <<lnet_unprepare>>
LASSERT(list_empty(&the_lnet.ln_nis));
4 679 lnet/lnet/api-ni.c <<lnet_net2ni_locked>>
list_for_each(tmp, &the_lnet.ln_nis) {
5 792 lnet/lnet/api-ni.c <<lnet_nid2ni_locked>>
list_for_each(tmp, &the_lnet.ln_nis) {
6 829 lnet/lnet/api-ni.c <<lnet_count_acceptor_nis>>
list_for_each(tmp, &the_lnet.ln_nis) {
7 870 lnet/lnet/api-ni.c <<lnet_get_ni_count>>
list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
8 893 lnet/lnet/api-ni.c <<lnet_ping_info_destroy>>
list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
9 1005 lnet/lnet/api-ni.c <<lnet_ping_info_install_locked>>
list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
10 1173 lnet/lnet/api-ni.c <<lnet_shutdown_lndnis>>
while (!list_empty(&the_lnet.ln_nis)) {
11 1174 lnet/lnet/api-ni.c <<lnet_shutdown_lndnis>>
ni = list_entry(the_lnet.ln_nis.next,
12 1247 lnet/lnet/api-ni.c <<lnet_startup_lndni>>
rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
13 1328 lnet/lnet/api-ni.c <<lnet_startup_lndni>>
list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
14 1717 lnet/lnet/api-ni.c <<lnet_get_net_config>>
list_for_each(tmp, &the_lnet.ln_nis) {
15 2142 lnet/lnet/api-ni.c <<LNetGetId>>
list_for_each(tmp, &the_lnet.ln_nis) {
16 2470 lnet/lnet/lib-move.c <<LNetDist>>
list_for_each(e, &the_lnet.ln_nis) {
17 253 lnet/lnet/router.c <<lnet_shuffle_seed>>
list_for_each(tmp, &the_lnet.ln_nis) {
18 852 lnet/lnet/router.c <<lnet_update_ni_status_locked>>
list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
19 667 lnet/lnet/router_proc.c <<proc_lnet_nis>>
n = the_lnet.ln_nis.next;
20 669 lnet/lnet/router_proc.c <<proc_lnet_nis>>
while (n != &the_lnet.ln_nis) {
21 893 lnet/lnet/router_proc.c <<proc_lnet_net_status>>
n = the_lnet.ln_nis.next;
22 895 lnet/lnet/router_proc.c <<proc_lnet_net_status>>
while (n != &the_lnet.ln_nis) {