-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathngc_internal_lb_rdma_wrapper.sh
executable file
·263 lines (233 loc) · 8.23 KB
/
ngc_internal_lb_rdma_wrapper.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/bin/bash
set -eE
# Variables
scriptdir="$(dirname "$0")"
tests="--tests=ib_write_bw,ib_read_bw,ib_send_bw"
CURRENT_DATE=$(date +'%F %H:%M:%S')
source "${scriptdir}/common.sh"
# Parse command line options
while [ $# -gt 0 ]; do
case "${1}" in
--vm)
RUN_AS_VM=true
shift
;;
--aff)
if [ -f "${2}" ]; then
AFFINITY_FILE="${2}"
RUN_AS_VM=true
shift 2
else
fatal "--aff parameter requires a file"
fi
;;
--with_cuda)
RUN_WITH_CUDA=true
shift
;;
--cuda_only)
ONLY_CUDA=true
RUN_WITH_CUDA=true
shift
;;
--write)
tests="--tests=ib_write_bw"
shift
;;
--read)
tests="--tests=ib_read_bw"
shift
;;
-h|--help)
help
shift
;;
--*)
fatal "Unknown option ${1}"
;;
*)
POSITIONAL_ARGS+=("${1}")
shift
;;
esac
done
set -- "${POSITIONAL_ARGS[@]}"
SERVER_IP=${1}
# Check if --aff is provided without --with_cuda or --cuda_only
if [ -n "${AFFINITY_FILE}" ] && [ -z "${RUN_WITH_CUDA}" ] && [ -z "${ONLY_CUDA}" ]; then
fatal "If --aff is provided, either --with_cuda or --cuda_only must also be provided."
fi
help() {
local WHITE RESET
WHITE=$(tput bold)
RESET=$(tput sgr0)
cat <<EOF >&2
Internal Loopback wrapper for ngc_rdma_test.sh.
For debugging, you can see results and other information using: journalctl --since "${CURRENT_DATE}" -t ngc_multinode_perf.
RDMA devices are obtained from 'ibdev2netdev' command.
Criteria for Pass/Fail - 90% line rate of the port speed.
* Passwordless SSH access to the participating nodes is required.
* Passwordless sudo root access is required from the SSH'ing user.
* Dependencies which need to be installed: numctl, perftest.
* For Virtual Machines, you can change the NIC<->GPU affinity by
providing the affinity in a file.
The file should consist two lines, one for GPUs and the other for NICs.
Example:
echo "mlx5_0 mlx5_1 mlx5_2 mlx5_3 mlx5_4 mlx5_5 mlx5_6 mlx5_7" > gpuaff.txt
echo "GPU6 GPU3 GPU1 GPU7 GPU4 GPU2 GPU0 GPU5" >> gpuaff.txt
Options:
--vm # Use this flag when running on a VM
--aff # Used with the --vm flag to provide a different NIC<->GPU affinity
--write # Run write tests only
--read # Run read tests only
--with_cuda # Run both RDMA and GPUDirect
--cuda_only # Run only GPUDirect
${WHITE}Usage:
Run RDMA & GPUDirect:
$0 Server --with_cuda
Run RDMA only:
$0 Server
Hosts with different NIC<->GPU affinity:
$0 Server --vm --aff \$FILE --with_cuda${RESET}
EOF
exit 1
}
# Internal loopback function for Hosts
ngc_rdma_internal_lb() {
local use_cuda
# Determine the current test being run (CUDA on/off)
if [[ "${1}" == "use_cuda" ]]; then
use_cuda="--use_cuda"
log "NGC BW RDMA Test (Internal Loopback) in progress... (CUDA on)"
else
if [ "${ONLY_CUDA}" = "true" ]; then
return
fi
use_cuda=""
log "NGC BW RDMA Test (Internal Loopback) in progress... (CUDA off)"
fi
# Loop over the Host devices
for i in "${SERVER_MLNX[@]}"; do
if ! "${scriptdir}/ngc_rdma_test.sh" "${SERVER_IP}" "${i}" "${SERVER_IP}" "${i}" "${tests}" ${use_cuda} "--unidir" 2> /dev/null | sed -n '/RESULT_\(PASS\|FAIL\):/p' ; then
log "Issue with device ${SERVER_MLNX[i]} <-> ${SERVER_MLNX[i]}${NC}" WARNING
fi
done
}
# Internal loopback function for VMs
ngc_rdma_vm_internal_lb() {
local use_cuda
# Determine the current test being run (CUDA on/off)
if [[ "${1}" == "use_cuda" ]]; then
use_cuda="--use_cuda"
log "NGC BW RDMA Test (Internal Loopback) in progress... (CUDA on)"
# Loop over the Host devices
for ((i=0; i<${#SERVER_MLNX[@]}; i++)); do
gpu_index="${GPU_ARR[i]//[!0-9]/}"
if ! "${scriptdir}/ngc_rdma_test.sh" "${SERVER_IP}" "${SERVER_MLNX[i]}" "${SERVER_IP}" "${SERVER_MLNX[i]}" "${tests}" ${use_cuda} --server_cuda="${gpu_index}" --client_cuda="${gpu_index}" "--unidir" 2> /dev/null | sed -n '/RESULT_\(PASS\|FAIL\):/p' ; then
log "Issue with device ${SERVER_MLNX[i]} <-> ${GPU_ARR[i]}${NC}" WARNING
fi
done
else
if [ "${ONLY_CUDA}" = "true" ]; then
return
fi
use_cuda=""
log "NGC BW RDMA Test (Internal Loopback) in progress... (CUDA off)"
log "Without CUDA:"
# Loop over the Host devices
for ((i=0; i<${#SERVER_MLNX[@]}; i++)); do
if ! "${scriptdir}/ngc_rdma_test.sh" "${SERVER_IP}" "${SERVER_MLNX[i]}" "${SERVER_IP}" "${SERVER_MLNX[i]}" "${tests}" ${use_cuda} "--unidir" 2> /dev/null | sed -n '/RESULT_\(PASS\|FAIL\):/p' ; then
log "Issue with device ${SERVER_MLNX[i]} <-> ${SERVER_MLNX[i]}${NC}" WARNING
fi
done
fi
}
# Determine nic <-> gpu affinity
nic_to_gpu_affinity() {
# Display NIC & GPU affinity according to file
if [ -n "${AFFINITY_FILE}" ]; then
if [ -f "${AFFINITY_FILE}" ]; then
echo "NIC to GPU affinity according to ${AFFINITY_FILE} file:"
GPU_LINE=$(grep -ni "gpu" "${AFFINITY_FILE}" | cut -d ':' -f1)
NIC_LINE=$(grep -ni "mlx" "${AFFINITY_FILE}" | cut -d ':' -f1)
if [[ -z "${NIC_LINE}" || -z "${GPU_LINE}" ]]; then
fatal "Error with file ${AFFINITY_FILE}"
fi
SERVER_MLNX=($(awk "NR==${NIC_LINE}" "${AFFINITY_FILE}"))
GPU_ARR=($(awk "NR==${GPU_LINE}" "${AFFINITY_FILE}"))
for ((i=0; i<${#SERVER_MLNX[@]}; i++)); do
echo "${SERVER_MLNX[i]} <-> ${GPU_ARR[i]}"
done
else
fatal "Error with file ${AFFINITY_FILE}."
fi
else
# Display default NIC & GPU affinity
# Find CUDA & RDMA devices
readarray -t GPU_ARR <<< "$(ssh "${SERVER_IP}" nvidia-smi -L | awk '{print $1 $2}' | tr -d ':')" ||
fatal "Couldn't get CUDA devices"
readarray -t SERVER_MLNX <<< "$(ssh "${SERVER_IP}" ibdev2netdev | awk '{print $1}')" ||
fatal "Couldn't get NICs from ibdev2netdev"
echo "NIC to GPU affinity:"
for ((i=0; i<${#SERVER_MLNX[@]}; i++)); do
echo "${SERVER_MLNX[i]} <-> ${GPU_ARR[i]}"
done
fi
# Ask the user to confirm
tries=0
while true; do
read -r -p "Is the affinity correct? [yY]/[nN]: " user_confirm
case "${user_confirm}" in
[yY])
break
;;
[nN])
fatal "Please provide affinity file (see README)"
;;
*)
tries=$(( tries + 1 ))
if (( tries == 3 )); then
fatal "Reached maximum attempts. Exiting.."
fi
;;
esac
done
}
# Check for SSH connectivity
check_ssh() {
if ! ssh -q "${SERVER_IP}" exit; then
fatal "SSH connection failed for: ${SERVER_IP}."
fi
}
if (( $# == 1 )); then
check_ssh "${SERVER_IP}"
log "For debugging, please use: journalctl --since \"${CURRENT_DATE}\" -t ngc_multinode_perf"
# Determine if running as VM
if [ "${RUN_AS_VM}" = "true" ]; then
# Check GPU affinity if cuda is required
if [ "${RUN_WITH_CUDA}" = "true" ]; then
nic_to_gpu_affinity
else
readarray -t SERVER_MLNX <<< "$(ssh "${SERVER_IP}" ibdev2netdev | awk '{print $1}')" ||
fatal "Couldn't get NICs from ibdev2netdev"
fi
# VM Loopback without CUDA
ngc_rdma_vm_internal_lb
# VM Loopback with CUDA
if [ "${RUN_WITH_CUDA}" = "true" ]; then
ngc_rdma_vm_internal_lb "use_cuda"
fi
else
# Get MLNX devices
readarray -t SERVER_MLNX <<< "$(ssh "${SERVER_IP}" ibdev2netdev | awk '{print $1}')" ||
fatal "Couldn't get NICs from ibdev2netdev"
# Without CUDA
ngc_rdma_internal_lb
# With CUDA
if [ "${RUN_WITH_CUDA}" = "true" ]; then
ngc_rdma_internal_lb "use_cuda"
fi
fi
else
help
fi