| #!/bin/bash |
| # SPDX-License-Identifier: GPL-2.0+ |
| # Copyright (c) 2017-2018 Western Digital Corporation or its affiliates. |
| # |
| # Functions and global variables used by both the srp and nvmeof-mp tests. |
| |
| . common/shellcheck |
| |
| debug= |
| filesystem_type=ext4 |
| fio_aux_path=/tmp/fio-state-files |
| memtotal=$(sed -n 's/^MemTotal:[[:blank:]]*\([0-9]*\)[[:blank:]]*kB$/\1/p' /proc/meminfo) |
| max_ramdisk_size=$((1<<25)) |
| ramdisk_size=$((memtotal*(1024/16))) # in bytes |
| if [ $ramdisk_size -gt $max_ramdisk_size ]; then |
| ramdisk_size=$max_ramdisk_size |
| fi |
| |
| _have_legacy_dm() { |
| if ! _have_kernel_option DM_MQ_DEFAULT; then |
| SKIP_REASON="legacy device mapper support is missing" |
| return 1 |
| fi |
| } |
| |
| # Check whether version number $1 is less than or equal to version number $2. |
| version_le() { |
| local i v1 v2 |
| |
| IFS='.' read -ra v1 <<<"$1" |
| IFS='.' read -ra v2 <<<"$2" |
| if [ ${#v1[@]} != ${#v2[@]} ]; then |
| echo "Error: version number mismatch $1 <> $2" >&2 |
| return 1 |
| fi |
| for ((i=0; i < ${#v1[@]}; i++)); do |
| [ "${v1[i]}" -lt "${v2[i]}" ] && return 0 |
| [ "${v1[i]}" -gt "${v2[i]}" ] && return 1 |
| done |
| return 0 |
| } |
| |
| # Check whether the multipathd version is at least $1. $1 is a version number |
| # with three components separated by dots. |
| _multipathd_version_ge() { |
| local min_ver=$1 mp_ver |
| |
| mp_ver=$(multipath -k 2>&1 | |
| sed -n 's/^multipath-tools v\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/p') |
| if version_le "$min_ver" "$mp_ver"; then |
| return 0 |
| fi |
| SKIP_REASON="Need multipathd version $min_ver; found multipathd version $mp_ver." |
| return 1 |
| } |
| |
| get_ipv4_addr() { |
| ip -4 -o addr show dev "$1" | |
| sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
| } |
| |
| # Convert e.g. ::1 into 0000:0000:0000:0000:0000:0000:0000:0001. |
| expand_ipv6_addr() { |
| awk -F : 'BEGIN{left=1} { for(i=1;i<=NF;i++) { a=substr("0000", 1+length($i)) $i; if ($i == "") left=0; else if (left) pre = pre ":" a; else suf = suf ":" a }; mid=substr(":0000:0000:0000:0000:0000:0000:0000:0000", (pre!="")+length(pre)+length(suf)); print substr(pre,2) mid suf}' |
| } |
| |
| get_ipv6_addr() { |
| ip -6 -o addr show dev "$1" | |
| sed -n 's/.*[[:blank:]]inet6[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
| } |
| |
| # Whether or not $1 is a number. |
| is_number() { |
| [ "$1" -eq "0$1" ] 2>/dev/null |
| } |
| |
| # Check whether a device is an RDMA device. An example argument: |
| # /sys/devices/pci0000:00/0000:00:03.0/0000:04:00.0 |
| is_rdma_device() { |
| local d i inode1 inode2 |
| |
| inode1=$(stat -c %i "$1") |
| # echo "inode1 = $inode1" |
| for i in /sys/class/infiniband/*; do |
| d=/sys/class/infiniband/"$(readlink "$i")" |
| d=$(dirname "$(dirname "$d")") |
| inode2=$(stat -c %i "$d") |
| # echo "inode2 = $inode2" |
| if [ "$inode1" = "$inode2" ]; then |
| return |
| fi |
| done |
| false |
| } |
| |
| # Lists RDMA capable network interface names, e.g. ib0 ib1. |
| rdma_network_interfaces() { |
| ( |
| cd /sys/class/net && |
| for i in *; do |
| [ -e "$i" ] || continue |
| # Skip IPoIB (ARPHRD_INFINIBAND) network |
| # interfaces. |
| [ "$(<"$i"/type)" = 32 ] && continue |
| [ -L "$i/device" ] || continue |
| d=$(readlink "$i/device" 2>/dev/null) |
| if [ -n "$d" ] && is_rdma_device "$i/$d"; then |
| echo "$i" |
| fi |
| done |
| ) |
| } |
| |
| # Check whether any stacked block device holds block device $1. If so, echo |
| # the name of the holder. |
| held_by() { |
| local d e dev=$1 |
| |
| while [ -L "$dev" ]; do |
| dev=$(realpath "$dev") |
| done |
| dev=${dev#/dev/} |
| for d in /sys/class/block/*/holders/*; do |
| [ -e "$d" ] || continue |
| e=$(basename "$d") |
| if [ "$e" = "$dev" ]; then |
| echo "/dev/$(basename "$(dirname "$(dirname "$d")")")" |
| fi |
| done |
| } |
| |
| # Sleep until either $1 seconds have elapsed or until the deadline $2 has been |
| # reached. Return 1 if and only if the deadline has been met. |
| sleep_until() { |
| local duration=$1 deadline=$2 u |
| |
| u=$(_uptime_s) |
| if [ $((u + duration)) -le "$deadline" ]; then |
| sleep "$duration" |
| else |
| [ "$deadline" -gt "$u" ] && sleep $((deadline - u)) |
| return 1 |
| fi |
| } |
| |
| # Kill all processes that have opened block device $1. |
| stop_bdev_users() { |
| [ -n "$1" ] || return $? |
| lsof -F p "$1" 2>/dev/null | while read -r line; do |
| p="${line#p}" |
| if [ "$p" != "$line" ]; then |
| echo -n " (pid $p)" >>"$FULL" |
| kill -9 "$p" |
| fi |
| done |
| } |
| |
| # RHEL 6 dmsetup accepts mpath<n> but not /dev/dm-<n> as its first argument. |
| # Hence this function that converts /dev/dm-<n> into mpath<n>. |
| dev_to_mpath() { |
| local d e mm |
| |
| d="${1#/dev/mapper/}"; |
| if [ "$d" != "$1" ]; then |
| echo "$d" |
| return 0 |
| fi |
| |
| [ -e "$1" ] || return $? |
| |
| if [ -L "$1" ]; then |
| e=$(readlink -f "$1") |
| else |
| e="$1" |
| fi |
| if ! mm=$(stat -c %t:%T "$e"); then |
| echo "stat $1 -> $e failed" |
| return 1 |
| fi |
| |
| for d in /dev/mapper/mpath*; do |
| if [ -L "$d" ]; then |
| e=$(readlink -f "$d") |
| elif [ -e "$d" ]; then |
| e="$d" |
| else |
| continue |
| fi |
| if [ "$(stat -c %t:%T "$e")" = "$mm" ]; then |
| basename "$d" |
| return 0 |
| fi |
| done |
| return 1 |
| } |
| |
| # Find all multipaths with one or more deleted devices and remove these. |
| remove_stale_mpath_devs() { |
| echo "Examining all multipaths" |
| dmsetup table | while read -r mpdev fs ls type def; do |
| echo "$fs $ls" >/dev/null |
| # shellcheck disable=SC2086 |
| if [ "$type" = multipath ] && |
| { is_qinp_def "$def" || |
| mpath_has_stale_dev $def; }; then |
| echo "${mpdev%:}" |
| fi |
| done | |
| sort -u | |
| while read -r mpdev; do |
| mpdev="/dev/mapper/$mpdev" |
| echo -n "removing $mpdev: " |
| if ! remove_mpath_dev "$mpdev"; then |
| echo "failed" |
| [ -z "$debug" ] || return 1 |
| fi |
| done |
| echo "Finished examining multipaths" |
| } |
| |
| # Modify mpath device $1 to fail_if_no_path mode, unmount the filesystem on top |
| # of it and remove the mpath device. |
| remove_mpath_dev() { |
| local cmd dm i output t1 t2 |
| |
| { |
| for ((i=10;i>0;i--)); do |
| cmd="dm=\$(dev_to_mpath \"$1\")" |
| if ! eval "$cmd"; then |
| echo "$cmd: failed" |
| else |
| t1=$(dmsetup table "$dm") |
| cmd="dmsetup message $dm 0 fail_if_no_path" |
| if ! eval "$cmd"; then |
| echo "$cmd: failed" |
| else |
| t2=$(dmsetup table "$dm") |
| if echo "$t2" | grep -qw queue_if_no_path; then |
| echo "$dm: $t1 -> $t2" |
| fi |
| echo "Attempting to unmount /dev/mapper/$dm" |
| umount "/dev/mapper/$dm" |
| cmd="dmsetup remove $dm" |
| if ! output=$(eval "$cmd" 2>&1); then |
| echo "$cmd: $output; retrying" |
| else |
| echo "done" |
| break |
| fi |
| fi |
| fi |
| if [ ! -e "$1" ]; then |
| break |
| fi |
| ls -l "$1" |
| stop_bdev_users "$(readlink -f "$1")" |
| sleep .5 |
| done |
| if [ $i = 0 ]; then |
| echo "failed" |
| return 1 |
| fi |
| } &>>"$FULL" |
| } |
| |
| # Check whether one or more arguments contain stale device nodes (/dev/...). |
| mpath_has_stale_dev() { |
| local d |
| |
| for d in "$@"; do |
| if [ "${d/://}" != "$d" ]; then |
| grep -qw "$d" /sys/class/block/*/dev 2>/dev/null || |
| return 0 |
| fi |
| done |
| |
| return 1 |
| } |
| |
| # Check whether multipath definition $1 includes the queue_if_no_path keyword. |
| is_qinp_def() { |
| case "$1" in |
| *" 3 queue_if_no_path queue_mode mq "*) |
| return 0;; |
| *" 1 queue_if_no_path "*) |
| return 0;; |
| *) |
| return 1;; |
| esac |
| } |
| |
| # Load the configfs kernel module and mount it. |
| mount_configfs() { |
| if [ ! -e /sys/module/configfs ]; then |
| modprobe configfs || return $? |
| fi |
| if ! mount | grep -qw configfs; then |
| mount -t configfs none /sys/kernel/config || return $? |
| fi |
| } |
| |
| # Set scheduler of block device $1 to $2. |
| set_scheduler() { |
| local b=$1 p s=$2 |
| |
| p=/sys/class/block/$b/queue/scheduler |
| if [ -e "/sys/block/$b/mq" ]; then |
| case "$s" in |
| noop) s=none;; |
| deadline) s=mq-deadline;; |
| bfq) s=bfq;; |
| esac |
| else |
| case "$s" in |
| none) s=noop;; |
| mq-deadline) s=deadline;; |
| bfq-mq) s=bfq;; |
| esac |
| fi |
| if ! echo "$s" > "$p"; then |
| echo "Changing scheduler of $b from $(<"$p") into $s failed" |
| return 1 |
| fi |
| } |
| |
| # Get a /dev/... path that points at dm device number $1. Set its I/O scheduler |
| # to $2 and its timeout to $3. The shell script that includes this file must |
| # define a function get_bdev_path() that translates device number $1 into a |
| # /dev/disk/... path. |
| get_bdev_n() { |
| local b d dev elevator=$2 h i=$1 j realdev timeout=$3 |
| |
| is_number "$i" || return $? |
| dev="" |
| for ((j=0;j<50;j++)); do |
| if dev=$(get_bdev_path "$i") && [ -n "$dev" ] && [ -e "$dev" ] |
| then |
| break |
| fi |
| echo reconfigure | multipathd -k >&/dev/null |
| sleep .1 |
| done |
| if [ -z "$dev" ] || [ ! -e "$dev" ]; then |
| echo "Could not find device $i -> $dev" |
| return 1 |
| fi |
| if [ ! -L "$dev" ]; then |
| echo "$dev: not a soft link" |
| return 1 |
| fi |
| realdev=$(readlink "$dev" 2>/dev/null || echo "?") |
| echo "Using $dev -> ${realdev}" >>"$FULL" |
| for ((j=0; j<50; j++)); do |
| blockdev --getbsz "$dev" >&/dev/null && break |
| echo reconfigure | multipathd -k >& /dev/null |
| sleep .1 |
| done |
| if ! blockdev --getbsz "$dev" >&/dev/null; then |
| echo "$dev: querying block size failed" |
| return 1 |
| fi |
| b=$(basename "$realdev") |
| set_scheduler "$b" "$elevator" |
| for d in /sys/class/block/*"/holders/$b"; do |
| [ -e "$d" ] || continue |
| h="$(basename "$(dirname "$(dirname "$d")")")" |
| set_scheduler "$h" "${elevator}" |
| if [ -e "/sys/class/block/$h/device/timeout" ]; then |
| echo "$timeout" > "/sys/class/block/$h/device/timeout" |
| fi |
| done |
| echo "get_bdev_n() returned $dev" >>"$FULL" |
| echo "$dev" |
| } |
| |
| # Full path of mountpoint $1. fio will be run on top of the filesystem mounted |
| # at the returned mountpoint. |
| function mountpoint() { |
| if [ -z "$TMPDIR" ]; then |
| echo "Error: \$TMPDIR has not been set." 1>&2 |
| exit 1 |
| fi |
| if [ -z "$1" ]; then |
| echo "Error: missing argument" 1>&2 |
| exit 1 |
| fi |
| echo "$TMPDIR/mnt$1" |
| } |
| |
| # All primary RDMA GIDs |
| all_primary_gids() { |
| find /sys/devices -name infiniband | while read -r p; do |
| cat "$p"/*/ports/*/gids/0 |
| done | grep -v ':0000:0000:0000:0000$' |
| } |
| |
| # Check whether or not an rdma_rxe instance has been associated with network |
| # interface $1. |
| has_rdma_rxe() { |
| local f |
| |
| for f in /sys/class/infiniband/*/parent; do |
| if [ -e "$f" ] && [ "$(<"$f")" = "$1" ]; then |
| return 0 |
| fi |
| done |
| |
| return 1 |
| } |
| |
| # Load the rdma_rxe kernel module and associate it with all network interfaces. |
| start_rdma_rxe() { |
| { |
| modprobe rdma_rxe || return $? |
| ( |
| cd /sys/class/net && |
| for i in *; do |
| if [ -e "$i" ] && ! has_rdma_rxe "$i"; then |
| echo "$i" > /sys/module/rdma_rxe/parameters/add |
| fi |
| done |
| ) |
| } >>"$FULL" |
| } |
| |
| # Dissociate the rdma_rxe kernel module from all network interfaces and unload |
| # the rdma_rxe kernel module. |
| stop_rdma_rxe() { |
| ( |
| cd /sys/class/net && |
| for i in *; do |
| if [ -e "$i" ] && has_rdma_rxe "$i"; then |
| { echo "$i" > /sys/module/rdma_rxe/parameters/remove; } \ |
| 2>/dev/null |
| fi |
| done |
| ) |
| if ! unload_module rdma_rxe 10; then |
| echo "Unloading rdma_rxe failed" |
| return 1 |
| fi |
| } |
| |
| # /dev/sd... device node assigned to the scsi_debug kernel module. |
| scsi_debug_dev_path() { |
| local bd="" d |
| |
| for d in /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*/block/*; do |
| [ -e "$d" ] || continue |
| bd=${d/*\//} |
| done |
| [ -n "$bd" ] || return 1 |
| echo "/dev/$bd" |
| } |
| |
| # Look up the block device below the filesystem for directory $1. |
| block_dev_of_dir() { |
| df "$1" | { |
| read -r header |
| echo "$header" >/dev/null |
| read -r blockdev rest |
| echo "$blockdev" |
| } |
| } |
| |
| # Create a filesystem of type "$filesystem_type" on block device $1. |
| create_filesystem() { |
| local dev=$1 |
| |
| case "$filesystem_type" in |
| ext4) |
| mkfs.ext4 -F -O ^has_journal -q "$dev";; |
| xfs) |
| mkfs.xfs -f -q "$dev";; |
| *) |
| return 1;; |
| esac |
| } |
| |
| # Whether or not path "$1" is a mountpoint. |
| is_mountpoint() { |
| [ -n "$1" ] && |
| [ -d "$1" ] && |
| [ "$(block_dev_of_dir "$1")" != \ |
| "$(block_dev_of_dir "$(dirname "$1")")" ] |
| } |
| |
| # Execute mount "$@" and check whether the mount command has succeeded by |
| # verifying whether after mount has finished that ${$#} is a mountpoint. |
| mount_and_check() { |
| local dir last |
| |
| dir=$(for last; do :; done; echo "$last") |
| mount "$@" |
| if ! is_mountpoint "$dir"; then |
| echo "Error: mount $* failed" |
| return 1 |
| fi |
| } |
| |
| # Unmount the filesystem mounted at mountpoint $1. In contrast with the umount |
| # command, this function does not accept a block device as argument. |
| unmount_and_check() { |
| local bd m=$1 mp |
| |
| if is_mountpoint "$m"; then |
| bd=$(block_dev_of_dir "$m") |
| mp=$(dev_to_mpath "$bd") 2>/dev/null |
| if [ -n "$mp" ]; then |
| dmsetup message "$mp" 0 fail_if_no_path |
| fi |
| stop_bdev_users "$bd" |
| echo "Unmounting $m from $bd" >> "$FULL" |
| umount "$m" || umount --lazy "$m" |
| fi |
| if is_mountpoint "$m"; then |
| echo "Error: unmounting $m failed" |
| return 1 |
| fi |
| } |
| |
| # Test whether fio supports command-line options "$@" |
| test_fio_opt() { |
| local opt |
| |
| for opt in "$@"; do |
| opt=${opt//=*} |
| fio --help |& grep -q -- "${opt}=" && continue |
| opt=${opt#--} |
| fio --cmdhelp=all |& grep -q "^${opt}[[:blank:]]" && continue |
| return 1 |
| done |
| } |
| |
| run_fio() { |
| local a args avail_kb="" bd="" d="" j opt output rc |
| |
| args=("$@") |
| j=1 |
| for opt in "${args[@]}"; do |
| case "$opt" in |
| --directory=*) d="${opt#--directory=}";; |
| --filename=*) bd="${opt#--filename=}";; |
| --numjobs=*) j="${opt#--numjobs=}";; |
| --output=*) output="${opt#--output=}";; |
| esac |
| done |
| if [ -n "$d" ]; then |
| a=$(df "$d" | grep "^/" | |
| { |
| if read -r fs blocks used avail use mnt; then |
| echo "$avail" |
| echo "$fs $blocks $used $use $mnt" >/dev/null |
| fi |
| } |
| ) |
| avail_kb=$a |
| fi |
| if [ -n "$bd" ]; then |
| avail_kb=$(("$(blockdev --getsz "$bd")" / 2)) |
| fi |
| if [ -n "$avail_kb" ]; then |
| args+=("--size=$(((avail_kb * 1024 * 7 / 10) / j & ~4095))") |
| fi |
| for opt in --exitall_on_error=1 --gtod_reduce=1 --aux-path=${fio_aux_path} |
| do |
| if test_fio_opt "$opt"; then |
| args+=("$opt") |
| fi |
| done |
| mkdir -p "${fio_aux_path}" |
| echo "fio ${args[*]}" >>"$FULL" |
| fio "${args[@]}" 2>&1 |
| rc=$? |
| if [ $rc = 0 ] && [ -n "$output" ]; then |
| # Return exit code 1 if no I/O has been performed. |
| grep -q ', io=[0-9].*, run=[0-9]' "$output" |
| rc=$? |
| fi |
| echo "run_fio exit code: $rc" >>"$FULL" |
| return $rc |
| } |
| |
| # Configure two null_blk instances. |
| configure_null_blk() { |
| local i |
| |
| ( |
| cd /sys/kernel/config/nullb || return $? |
| for i in nullb0 nullb1; do ( |
| { mkdir -p $i && |
| cd $i && |
| echo 0 > completion_nsec && |
| echo 512 > blocksize && |
| echo $((ramdisk_size>>20)) > size && |
| echo 1 > memory_backed && |
| echo 1 > power; } || exit $? |
| ) done |
| ) |
| ls -l /dev/nullb* &>>"$FULL" |
| } |
| |
| unload_null_blk() { |
| local d |
| |
| for d in /sys/kernel/config/nullb/*; do [ -d "$d" ] && rmdir "$d"; done |
| unload_module null_blk |
| } |
| |
| setup_rdma() { |
| start_rdma_rxe |
| ( |
| echo "RDMA interfaces:" |
| cd /sys/class/infiniband && |
| for i in *; do |
| [ -e "$i" ] || continue |
| for p in "$i/ports/"*; do |
| echo "$i, port $(basename "$p"): $(<"$p/gids/0")" |
| done |
| done |
| ) &>>"$FULL" |
| } |
| |
| # Undo setup() |
| teardown_uncond() { |
| shutdown_client |
| killall -9 multipathd >&/dev/null |
| rm -f /etc/multipath.conf |
| stop_target |
| stop_rdma_rxe |
| unload_null_blk |
| } |
| |
| teardown() { |
| [ -z "$debug" ] && teardown_uncond |
| } |
| |
| # Set up test configuration with "$1" as multipath configuration file. |
| setup_test() { |
| local i m modules |
| |
| set -u |
| |
| if ! shutdown_client; then |
| echo "failed to shutdown client" |
| return 1 |
| fi |
| |
| if ! teardown_uncond; then |
| echo "teardown() failed" |
| return 1 |
| fi |
| |
| modules=( |
| configfs |
| dm-multipath |
| dm_mod |
| scsi_dh_alua |
| scsi_dh_emc |
| scsi_dh_rdac |
| scsi_mod |
| ) |
| for m in "${modules[@]}"; do |
| [ -e "/sys/module/$m" ] || modprobe "$m" || return $? |
| done |
| |
| modprobe null_blk nr_devices=0 || return $? |
| |
| configure_null_blk || return $? |
| |
| if [ ! -e /etc/multipath.conf ]; then |
| ( |
| cd /etc && ln -s "$1" . |
| ) |
| fi |
| multipathd |
| |
| # Load the I/O scheduler kernel modules |
| ( |
| cd "/lib/modules/$(uname -r)/kernel/block" && |
| for m in *.ko; do |
| [ -e "$m" ] && modprobe "${m%.ko}" |
| done |
| ) |
| |
| if [ -d /sys/kernel/debug/dynamic_debug ]; then |
| for m in ; do |
| echo "module $m +pmf" >/sys/kernel/debug/dynamic_debug/control |
| done |
| fi |
| |
| setup_rdma || return $? |
| start_target || return $? |
| echo "Test setup finished" >>"$FULL" |
| } |
| |
| # Run these unit tests as follows: |
| # bash -c '. ./common/multipath-over-rdma && unit_tests' |
| unit_tests() { |
| local t tests pass=1 |
| |
| tests=("version_le 0.1.2 1.2.3" |
| "version_le 0.1.2 0.1.3" |
| "version_le 0.1.2 0.1.2" |
| "! version_le 3 2" |
| "! version_le 3.1 2.3") |
| for t in "${tests[@]}"; do |
| if ! eval "$t"; then |
| echo "FAILED: $t" |
| pass=0 |
| fi |
| done |
| [ $pass = 1 ] && echo PASS |
| } |