| #!/bin/sh |
| # |
| # Copyright (c) 2000-2001 Silicon Graphics, Inc. All Rights Reserved. |
| # |
| # xfscrash - control the XFS crash tests |
| # |
| |
| ####################### |
| ### configuration stuff ######################################################## |
| ####################### |
| |
| # remount, repair or corrupt |
| MODE=remount |
| # where to find xfscrash |
| XFSCRASH=/xfscrash |
| # put log files here |
| LOG=$XFSCRASH |
| # put output to these places |
| OUTPUT="$LOG/xfscrash.log /dev/tty1 /dev/console" |
| # awk... |
| AWK_PROG=gawk |
| # clear FS if >= this percent full at start of run. 100 is a good |
| # number - only used on corrupt test so far |
| FULL_LIMIT=80 |
| |
| case `hostname -s` |
| in |
| leesa) |
| # mount test partition here |
| TEST_MNT=/mnt/arch0 |
| # build test partition here |
| TEST_DEV=/dev/hda6 |
| # backup test partition to here (or empty) |
| BACKUP_DEV=/dev/hda8 |
| # backup block size for dd |
| BACKUP_BS=1024k |
| # base stress time |
| STRESS_TIME=60 |
| # stress random time |
| STRESS_RANDOM=60 |
| ;; |
| lumpy) |
| # mount test partition here |
| TEST_MNT=/mnt/scratch_0 |
| # build test partition here |
| TEST_DEV=/dev/sdc5 |
| # backup test partition to here (or empty) |
| BACKUP_DEV= ;#/dev/sdc6 |
| # backup block size for dd |
| BACKUP_BS=10240k |
| # base stress time |
| STRESS_TIME=360 |
| # stress random time |
| STRESS_RANDOM=360 |
| ;; |
| *) |
| echo "!!! no configuration data for host `hostname -s`" |
| exit 1 |
| ;; |
| esac |
| |
| # avoid stress |
| |
| AVOID="-f resvsp=0 -f unresvsp=0" |
| |
| # DIY stress command |
| STRESS="/usr/local/bin/fsstress -d $TEST_MNT/stress -n 10000000 -p 1 $AVOID" |
| #STRESS="/usr/local/bin/randholes -l 10000000 -c 100000 -b 512 $TEST_MNT/stress/holes" |
| |
| # stress command for the corrupt test |
| CORRUPT_STRESS="/usr/local/bin/fsstress -d $TEST_MNT/stress -n 10000 -p 1 $AVOID" |
| |
| ########################################################################### |
| |
| reboot=-1 |
| |
| _log() |
| { |
| tee -a $OUTPUT > /dev/null |
| } |
| |
| _echo() |
| { |
| echo "$*" | _log |
| } |
| |
| _mount() |
| { |
| _echo " *** Mounting $TEST_DEV on $TEST_MNT" |
| if ! mount -t xfs $TEST_DEV $TEST_MNT |
| then |
| _echo " !!! unable to mount" |
| exit 1 |
| fi |
| } |
| |
| _unmount() |
| { |
| _echo " *** Unmounting $TEST_DEV" |
| if ! umount $TEST_DEV &> /dev/null |
| then |
| _echo " !!! unable to unmount" |
| exit 1 |
| fi |
| } |
| |
| _check() |
| { |
| expect=$1 |
| fail=0 |
| |
| if [ $expect -eq 0 ] |
| then |
| _echo " *** Checking FS (expecting clean fs)" |
| else |
| _echo " *** Checking FS (expecting dirty fs)" |
| fi |
| |
| |
| if [ $expect -eq 0 ] |
| then |
| _echo " *** xfs_check ($LOG/check_clean.out)" |
| _xfs_check $TEST_DEV &> $LOG/check_clean.out || fail=1 |
| [ -s /tmp/xfs_check_clean.out ] && fail=1 |
| else |
| _echo " *** xfs_check ($LOG/check_dirty.out)" |
| _xfs_check $TEST_DEV &> $LOG/check_dirty.out || fail=1 |
| fi |
| |
| if [ $fail -eq 0 -a $expect -eq 0 ] |
| then |
| _echo " *** xfs_repair -n ($LOG/repair_clean.out)" |
| xfs_repair -n $TEST_DEV &> $LOG/repair_clean.out || fail=1 |
| fi |
| |
| if [ $fail -eq 0 ] |
| then |
| _echo " *** FS checks ok" |
| else |
| if [ $expect -eq 0 ] |
| then |
| _echo " !!! FS check failed - inconsistent FS" |
| _echo " !!! (see $LOG/*.out for details)" |
| exit 1 |
| else |
| _echo " *** inconsistent fs (as expected)" |
| fi |
| fi |
| } |
| |
| _check_core() |
| { |
| if [ -e core ] |
| then |
| _echo " !!! core file found!" |
| exit 1 |
| fi |
| } |
| |
| _repair() |
| { |
| rm -f core |
| _echo " *** repair" |
| _echo " *** repair pass 1 (RO)" |
| xfs_repair -n $TEST_DEV &> $LOG/repair_1.out \ |
| && _echo " !!! no errors found (eh?)" \ |
| || _echo " *** errors found (expected)" |
| |
| _check_core |
| |
| _echo " *** repair pass 2 (RW)" |
| |
| if xfs_repair $TEST_DEV &> $LOG/repair_2.out |
| then |
| _echo " *** FS checks ok (now)" |
| else |
| _echo " !!! xfs_repair returned error code" |
| _echo " !!! (see $LOG/repair_*.out for details)" |
| exit 1 |
| fi |
| |
| _check_core |
| |
| _echo " *** repair pass 3 (RO)" |
| if xfs_repair -n $TEST_DEV &> $LOG/repair_3.out |
| then |
| _echo " *** FS checks ok" |
| else |
| _echo " !!! errors found after repair (unexpected)" |
| _echo " !!! (see $LOG/repair_*.out for details)" |
| exit 1 |
| fi |
| |
| _check_core |
| } |
| |
| _cleanup() |
| { |
| rm -f $XFSCRASH/counter $XFSCRASH/start $XFSCRASH/stop $XFSCRASH/active |
| |
| if [ $reboot != -1 ] |
| then |
| kill $reboot |
| fi |
| |
| } |
| |
| _random() |
| { |
| od -tu -N 4 /dev/random | gawk -v v=$1 'NR==1 { print $2 % v }' |
| } |
| |
| _backup() |
| { |
| if [ $count -ne 1 -a "$BACKUP_DEV" != "" ] |
| then |
| _echo " *** Backing up $TEST_DEV to $BACKUP_DEV" |
| if ! dd if=$TEST_DEV of=$BACKUP_DEV bs=$BACKUP_BS &> $LOG/dd.out |
| then |
| _echo " !!! unable to backup fs" |
| _echo " !!! (see $LOG/dd.out)" |
| exit 1 |
| fi |
| else |
| _echo " *** skipping back up step" |
| fi |
| } |
| |
| _logprint() |
| { |
| _echo " *** dumping log to $LOG/logprint.out" |
| rm -f core |
| xfs_logprint $TEST_DEV &> $LOG/logprint.out |
| if [ -e core ] |
| then |
| _echo " !!! xfs_logprint dumped core" |
| echo "" >> $LOG/logprint.out |
| echo "*** CORE DUMPED ***" >> $LOG/logprint.out |
| echo "" >> $LOG/logprint.out |
| fi |
| |
| _echo " *** dumping log (-t -i) to $LOG/logprint_inode.out" |
| |
| rm -f core |
| xfs_logprint -t -i $TEST_DEV &> $LOG/logprint_inode.out |
| if [ -e core ] |
| then |
| _echo " !!! xfs_logprint dumped core" |
| echo "" >> $LOG/logprint_inode.out |
| echo "*** CORE DUMPED ***" >> $LOG/logprint_inode.out |
| echo "" >> $LOG/logprint_inode.out |
| fi |
| |
| _echo " *** dumping log (-t -b) to $LOG/logprint_buf.out" |
| |
| rm -f core |
| xfs_logprint -t -b $TEST_DEV &> $LOG/logprint_buf.out |
| if [ -e core ] |
| then |
| _echo " !!! xfs_logprint dumped core" |
| echo "" >> $LOG/logprint_buf.out |
| echo "*** CORE DUMPED ***" >> $LOG/logprint_buf.out |
| echo "" >> $LOG/logprint_buf.out |
| fi |
| } |
| # |
| # _df_device : get an IRIX style df line for a given device |
| # |
| # - returns "" if not mounted |
| # - returns fs type in field two (ala IRIX) |
| # - joins line together if split by fancy df formatting |
| # - strips header etc |
| # |
| |
| _df_device() |
| { |
| if [ $# -ne 1 ] |
| then |
| echo "Usage: _df_device device" >&2 |
| exit 1 |
| fi |
| |
| df -T 2> /dev/null | $AWK_PROG -v what=$1 ' |
| match($1,what) && NF==1 { |
| v=$1 |
| getline |
| print v, $0 |
| exit |
| } |
| match($1,what) { |
| print |
| exit |
| } |
| ' |
| } |
| |
| # |
| # _df_dir : get an IRIX style df line for device where a directory resides |
| # |
| # - returns fs type in field two (ala IRIX) |
| # - joins line together if split by fancy df formatting |
| # - strips header etc |
| # |
| |
| _df_dir() |
| { |
| if [ $# -ne 1 ] |
| then |
| echo "Usage: _df_dir device" >&2 |
| exit 1 |
| fi |
| |
| df -T $1 2> /dev/null | $AWK_PROG -v what=$1 ' |
| NR == 2 && NF==1 { |
| v=$1 |
| getline |
| print v, $0; |
| exit 0 |
| } |
| NR == 2 { |
| print; |
| exit 0 |
| } |
| {} |
| ' |
| # otherwise, nada |
| } |
| |
| # return percentage used disk space for mounted device |
| |
| _used() |
| { |
| if [ $# -ne 1 ] |
| then |
| echo "Usage: _used device" >&2 |
| exit 1 |
| fi |
| |
| _df_device $1 | $AWK_PROG '{ sub("%", "") ; print $6 }' |
| } |
| |
| _check_free() |
| { |
| used=`_used $TEST_DEV` |
| |
| if [ $used -ge $FULL_LIMIT ] |
| then |
| _echo " *** $used % used on $TEST_DEV - deleting files" |
| rm -rf $TEST_MNT/stress |
| fi |
| } |
| |
| # loop, stressing, unounting and checking |
| # no (expected) rebooting... |
| _corrupt() |
| { |
| count=0 |
| |
| # don't want to restart if we reboot... |
| _cleanup |
| |
| while true |
| do |
| |
| if [ -e $XFSCRASH/stop ] |
| then |
| _echo "### XFS Crash stopped " |
| exit 0 |
| fi |
| |
| _echo "*** run $count" |
| let "count = count + 1" |
| |
| _check 0 |
| _mount |
| |
| _check_free |
| |
| $CORRUPT_STRESS | _log |
| |
| _unmount |
| done |
| } |
| |
| ########################################################################### |
| |
| _echo "" |
| _echo "" |
| echo "XFSCRASH [output to $OUTPUT]" |
| _echo "" |
| |
| if [ "$1" = "start" ] |
| then |
| touch $XFSCRASH/start |
| fi |
| |
| if [ "$1" = "stop" ] |
| then |
| touch $XFSCRASH/stop |
| fi |
| |
| |
| trap "_cleanup; exit \$status" 0 1 2 3 15 |
| |
| |
| if [ -e $XFSCRASH/stop ] |
| then |
| _echo "### XFS Crash stopped " |
| exit 0 |
| fi |
| |
| if [ -e $XFSCRASH/start ] |
| then |
| _echo "### XFS Crash started " |
| _cleanup |
| rm -f $LOG/*.out $LOG/*.log core |
| |
| touch $XFSCRASH/active |
| |
| _echo " *** Building fresh XFS FS" |
| umount $TEST_DEV &> /dev/null |
| if ! mkfs -t xfs -f $TEST_DEV &> $LOG/mkfs.out |
| then |
| _echo " !!! unable to mkfs" |
| _echo " !!! (see $LOG/mkfs.out)" |
| exit 1 |
| fi |
| fi |
| |
| if [ ! -e $XFSCRASH/active ] |
| then |
| _echo "### XFS Crash inactive " |
| exit 0 |
| fi |
| |
| |
| if [ -r $XFSCRASH/counter ] |
| then |
| count=`cat $XFSCRASH/counter` |
| else |
| count=0 |
| fi |
| _echo "### Crash test run $count (mode=$MODE, log=$LOG/{*.out,*.log})" |
| |
| let "count = count +1" |
| echo $count > $XFSCRASH/counter |
| |
| # real test starts here |
| |
| _echo " *** Checking for R/O root" |
| if ! mount | grep "on / type" | grep -q "(ro)" |
| then |
| _echo " !!! root not mounted readonly" |
| exit 1 |
| fi |
| |
| _echo " *** Loading XFS modules" |
| if ! modprobe xfs |
| then |
| _echo " !!! unable to modprobe xfs" |
| exit 1 |
| fi |
| |
| _echo " *** Unmounting $TEST_DEV" |
| umount $TEST_DEV &> /dev/null |
| |
| _logprint |
| if [ $MODE != "corrupt" ] |
| then |
| _backup |
| fi |
| |
| case $MODE |
| in |
| remount) |
| _check 1 # expect errors |
| _mount |
| _unmount |
| ;; |
| repair) |
| _repair |
| ;; |
| corrupt) |
| _corrupt |
| exit 0 |
| ;; |
| *) |
| _echo "xfscrash: MODE must be remount or repair" |
| exit 1 |
| ;; |
| esac |
| |
| _check 0 # don't expect errors |
| _mount |
| |
| _echo " *** Cleaning XFS FS" |
| if ! rm -rf $TEST_MNT/stress $TEST_MNT/lost+found &> $LOG/clean.out |
| then |
| _echo " !!! unable to clean XFS FS" |
| _echo " !!! (see $LOG/clean.out)" |
| exit 1 |
| fi |
| |
| _echo " *** Making stress directory" |
| if ! mkdir $TEST_MNT/stress |
| then |
| _echo " !!! unable to mkdir stress" |
| exit 1 |
| fi |
| |
| let "bang = STRESS_TIME + `_random $STRESS_RANDOM`" |
| |
| _echo " *** Preparing random reboot (in $bang seconds)" |
| ( |
| sleep $bang |
| _echo " *** BANG ****" |
| reboot -fn |
| ) & |
| reboot=$! |
| |
| _echo " *** Causing stress & waiting for the inevitable" |
| $STRESS | _log |
| |
| exit 0 |