diff --git a/lustre.peer_credits b/lustre.peer_credits new file mode 100644 index 0000000..b5b1652 --- /dev/null +++ b/lustre.peer_credits @@ -0,0 +1,91 @@ +IB tweaks Mellanox: + +default ko2ublnd.conf: + +[root@pg-mds01 ~]# cat /etc/modprobe.d/ko2iblnd.conf + +alias ko2iblnd-opa ko2iblnd +options ko2iblnd-opa peer_credits=128 peer_credits_hiw=64 credits=1024 concurrent_sends=256 ntx=2048 map_on_demand=32 fmr_pool_size=2048 fmr_flush_trigger=512 fmr_cache=1 conns_per_peer=4 + +install ko2iblnd /usr/sbin/ko2iblnd-probe + +ko2iblnd-opa is the 'wrong' interface, we use Mellanox cards. + +To check for example the peer_credits setting in this example: + +[root@pg-mds01 ~]# cat /proc/sys/lnet/peers +nid refs state last max rtr min tx min queue +0@lo 1 NA -1 0 0 0 0 0 0 +172.23.52.179@o2ib 1 NA -1 8 8 8 8 -8 0 +172.23.52.35@o2ib 1 NA -1 8 8 8 8 -4 0 +172.23.52.124@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.69@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.158@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.14@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.103@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.192@o2ib 1 NA -1 8 8 8 8 -10 0 +172.23.52.48@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.52.137@o2ib 1 NA -1 8 8 8 8 -9 0 +172.23.54.2@o2ib 1 NA -1 8 8 8 8 -8 0 +172.23.52.82@o2ib 1 NA -1 8 8 8 8 -11 0 +172.23.55.212@o2ib 1 NA -1 8 8 8 8 8 0 + +You can see the peer_credits have max value of 8.. + + +should be like this: + +[root@dh2-mds01 ~]# cat /etc/modprobe.d/ko2iblnd.conf + +options ko2iblnd peer_credits=128 peer_credits_hiw=64 credits=1024 concurrent_sends=256 ntx=2048 map_on_demand=32 fmr_pool_size=2048 fmr_flush_trigger=512 fmr_cache=1 conns_per_peer=4 + +After reboot/reload of lnd module you can check the peer_credits: + +[root@dh2-mds01 ~]# cat /proc/sys/lnet/peers +nid refs state last max rtr min tx min queue +0@lo 1 NA -1 0 0 0 0 0 0 +172.23.53.156@o2ib2 1 NA -1 128 128 128 128 -19 0 +172.23.57.43@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.4@o2ib2 2 NA -1 128 128 128 127 -49 672 +172.23.53.9@o2ib2 1 NA -1 128 128 128 128 126 0 +172.23.53.1@o2ib2 1 NA -1 128 128 128 128 102 0 +172.23.53.158@o2ib2 1 NA -1 128 128 128 128 -1601 0 +172.23.57.45@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.6@o2ib2 1 NA -1 128 128 128 128 121 0 +172.23.53.155@o2ib2 1 NA -1 128 128 128 128 -92 0 +172.23.57.42@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.57.47@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.8@o2ib2 1 NA -1 128 128 128 128 126 0 +172.23.57.52@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.157@o2ib2 1 NA -1 128 128 128 128 32 0 +172.23.57.44@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.204@o2ib2 1 NA -1 128 128 128 128 127 0 +172.23.53.5@o2ib2 1 NA -1 128 128 128 128 100 0 +172.23.57.49@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.10@o2ib2 1 NA -1 128 128 128 128 126 0 +172.23.57.41@tcp12 1 NA -1 8 8 8 8 6 0 +172.23.53.2@o2ib2 1 NA -1 128 128 128 128 103 0 +172.23.57.46@tcp12 1 NA -1 8 8 8 8 6 0 + +On ib interfaces you can now see the peer_credits are on 128, on tcp (not changed) the peer credits are still on default(8). + +Futher explination of the settings: + +peer_credits=128 - the number of concurrent sends to a single peer +peer_credits_hiw=64 - Hold in Wait – when to eagerly return credits +credits=1024 - the number of concurrent sends (to all peers) +concurrent_sends=256 - send work-queue sizing +ntx=2048 - the number of message descriptors that are pre-allocated when the ko2iblnd module is loaded in the kernel +map_on_demand=32 - the number of noncontiguous memory regions that will be mapped into a virtual contiguous region +fmr_pool_size=2048 - the size of the Fast Memory registration (FMR) pool (must be >= ntx/4) +fmr_flush_trigger=512 - the dirty FMR pool flush trigger +fmr_cache=1 - enable FMR caching +conns_per_peer=4 - create multiple queue pairs per peer to allow higher throughput from a single client. This is of most benefit to OPA interfaces, when coupled with the krcvqs parameter of the OPA hfi1 kernel driver. The hfi1 driver option krcvqs must also be set. It is recommended to set krcvqs=4. In some cases, setting krcvqs=8 will yield improved IO performance, but this can impact other workloads, especially on clients. If queue-pair memory usage becomes excessive, reduce the ko2iblnd conns_per_peer value to 2 and krcvqs=2. + +The default values used by Lustre if no parameters are given is: + +peer_credits=8 +peer_credits_hiw=8 +concurrent_sends=8 +credits=64 +