| #! /bin/bash |
| # SPDX-License-Identifier: GPL-2.0 |
| # Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved. |
| # |
| # FS QA Test No. 453 |
| # |
| # Create a directory with multiple filenames that all appear the same |
| # (in unicode, anyway) but point to different inodes. In theory all |
| # Linux filesystems should allow this (filenames are a sequence of |
| # arbitrary bytes) even if the user implications are horrifying. |
| # |
| . ./common/preamble |
| _begin_fstest auto quick dir |
| |
| # Import common functions. |
| |
| _require_scratch |
| _require_names_are_bytes |
| |
| echo "Format and mount" |
| _scratch_mkfs > $seqres.full 2>&1 |
| _scratch_mount >> $seqres.full 2>&1 |
| |
| testdir="${SCRATCH_MNT}/test-${seq}" |
| mkdir $testdir |
| |
| hexbytes() { |
| echo -n "$1" | od -tx1 -w99999 | head -n1 | sed -e 's/^0* //g' |
| } |
| |
| setf() { |
| key="$(echo -e "$1")" |
| value="$2" |
| |
| echo "${value}" > "${testdir}/${key}" |
| echo "Storing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full |
| } |
| |
| setd() { |
| key="$(echo -e "$1")" |
| value="$2" |
| |
| mkdir -p "${testdir}/${key}" |
| echo "${value}" > "${testdir}/${key}/value" |
| echo "Storing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full |
| } |
| |
| testf() { |
| key="$(echo -e "$1")" |
| value="$2" |
| fname="${testdir}/${key}" |
| |
| echo "Testing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full |
| |
| if [ ! -e "${fname}" ]; then |
| echo "Key ${key} does not exist for ${value} test??" |
| return |
| fi |
| |
| actual_value="$(cat "${fname}")" |
| if [ "${actual_value}" != "${value}" ]; then |
| echo "Key ${key} has value ${value}, expected ${actual_value}." |
| fi |
| } |
| |
| testd() { |
| key="$(echo -e "$1")" |
| value="$2" |
| fname="${testdir}/${key}/value" |
| |
| echo "Testing ${key} ($(hexbytes "${key}")) -> ${value}" >> $seqres.full |
| |
| if [ ! -e "${fname}" ]; then |
| echo "Key ${key} does not exist for ${value} test??" |
| return |
| fi |
| |
| actual_value="$(cat "${fname}")" |
| if [ "${actual_value}" != "${value}" ]; then |
| echo "Key ${key} has value ${value}, expected ${actual_value}." |
| fi |
| } |
| |
| filter_scrub() { |
| grep 'Unicode' | sed -e 's/^.*Duplicate/Duplicate/g' |
| } |
| |
| echo "Create files" |
| # These two render the same |
| setf "french_caf\xc3\xa9.txt" "NFC" |
| setf "french_cafe\xcc\x81.txt" "NFD" |
| |
| # These two may have different widths |
| setf "chinese_\xef\xbd\xb6.txt" "NFKC1" |
| setf "chinese_\xe3\x82\xab.txt" "NFKC2" |
| |
| # Same point, different byte representations in NFC/NFD/NFKC/NFKD |
| setf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC" |
| setf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD" |
| setf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC" |
| setf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD" |
| |
| # Arabic code point can expand into a muuuch longer series |
| setf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC" |
| setf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC" |
| |
| # Fake slash? |
| setf "urk\xc0\xafmoo" "FAKESLASH" |
| |
| # Emoji: octopus butterfly owl giraffe |
| setf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji" |
| |
| # Line draw characters, because why not? |
| setf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can" |
| |
| # unicode rtl widgets too... |
| setf "moo\xe2\x80\xaegnp.txt" "Well say hello," |
| setf "mootxt.png" "Harvey" |
| |
| # mixed-script confusables |
| setf "mixed_t\xce\xbfp.txt" "greek omicron instead of o" |
| setf "mixed_top.txt" "greek omicron instead of o" |
| |
| # single-script spoofing |
| setf "hyphens_a\xe2\x80\x90b.txt" "hyphens" |
| setf "hyphens_a-b.txt" "hyphens" |
| |
| setf "dz_digraph_dze.txt" "d-z digraph" |
| setf "dz_digraph_\xca\xa3e.txt" "d-z digraph" |
| |
| # inadequate rendering |
| setf "inadequate_al.txt" "is it l or is it 1" |
| setf "inadequate_a1.txt" "is it l or is it 1" |
| |
| # symbols |
| setf "prohibition_Rs.txt" "rupee symbol" |
| setf "prohibition_\xe2\x82\xa8.txt" "rupee symbol" |
| |
| # zero width joiners |
| setf "zerojoin_moocow.txt" "zero width joiners" |
| setf "zerojoin_moo\xe2\x80\x8dcow.txt" "zero width joiners" |
| |
| # combining marks |
| setf "combmark_\xe1\x80\x9c\xe1\x80\xad\xe1\x80\xaf.txt" "combining marks" |
| setf "combmark_\xe1\x80\x9c\xe1\x80\xaf\xe1\x80\xad.txt" "combining marks" |
| |
| # fake dotdot entry |
| setd ".\xe2\x80\x8d" "zero width joiners in dot entry" |
| setd "..\xe2\x80\x8d" "zero width joiners in dotdot entry" |
| |
| ls -la $testdir >> $seqres.full |
| |
| echo "Test files" |
| testf "french_caf\xc3\xa9.txt" "NFC" |
| testf "french_cafe\xcc\x81.txt" "NFD" |
| |
| testf "chinese_\xef\xbd\xb6.txt" "NFKC1" |
| testf "chinese_\xe3\x82\xab.txt" "NFKC2" |
| |
| testf "greek_\xcf\x93.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFC" |
| testf "greek_\xcf\x92\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFD" |
| testf "greek_\xce\x8e.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKC" |
| testf "greek_\xce\xa5\xcc\x81.txt" "GREEK UPSILON WITH ACUTE AND HOOK SYMBOL, NFKD" |
| |
| testf "arabic_\xef\xb7\xba.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFC" |
| testf "arabic_\xd8\xb5\xd9\x84\xd9\x89\x20\xd8\xa7\xd9\x84\xd9\x84\xd9\x87\x20\xd8\xb9\xd9\x84\xd9\x8a\xd9\x87\x20\xd9\x88\xd8\xb3\xd9\x84\xd9\x85.txt" "ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM, NFKC" |
| |
| testf "urk\xc0\xafmoo" "FAKESLASH" |
| |
| testf "emoji_\xf0\x9f\xa6\x91\xf0\x9f\xa6\x8b\xf0\x9f\xa6\x89\xf0\x9f\xa6\x92.txt" "octopus butterfly owl giraffe emoji" |
| |
| testf "\x6c\x69\x6e\x65\x64\x72\x61\x77\x5f\x0a\xe2\x95\x94\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x97\x0a\xe2\x95\x91\x20\x6d\x65\x74\x61\x74\x61\x62\x6c\x65\x20\xe2\x95\x91\x0a\xe2\x95\x9f\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x95\xa2\x0a\xe2\x95\x91\x20\x5f\x5f\x69\x6e\x64\x65\x78\x20\x20\x20\xe2\x95\x91\x0a\xe2\x95\x9a\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x90\xe2\x95\x9d\x0a.txt" "ugly box because we can" |
| |
| testf "moo\xe2\x80\xaegnp.txt" "Well say hello," |
| testf "mootxt.png" "Harvey" |
| |
| testf "mixed_t\xce\xbfp.txt" "greek omicron instead of o" |
| testf "mixed_top.txt" "greek omicron instead of o" |
| |
| testf "hyphens_a\xe2\x80\x90b.txt" "hyphens" |
| testf "hyphens_a-b.txt" "hyphens" |
| |
| testf "dz_digraph_dze.txt" "d-z digraph" |
| testf "dz_digraph_\xca\xa3e.txt" "d-z digraph" |
| |
| testf "inadequate_al.txt" "is it l or is it 1" |
| testf "inadequate_a1.txt" "is it l or is it 1" |
| |
| testf "prohibition_Rs.txt" "rupee symbol" |
| testf "prohibition_\xe2\x82\xa8.txt" "rupee symbol" |
| |
| testf "zerojoin_moocow.txt" "zero width joiners" |
| testf "zerojoin_moo\xe2\x80\x8dcow.txt" "zero width joiners" |
| |
| testf "combmark_\xe1\x80\x9c\xe1\x80\xad\xe1\x80\xaf.txt" "combining marks" |
| testf "combmark_\xe1\x80\x9c\xe1\x80\xaf\xe1\x80\xad.txt" "combining marks" |
| |
| testd ".\xe2\x80\x8d" "zero width joiners in dot entry" |
| testd "..\xe2\x80\x8d" "zero width joiners in dotdot entry" |
| |
| echo "Uniqueness of inodes?" |
| stat -c '%i' "${testdir}/"* | sort | uniq -c | while read nr inum; do |
| if [ "${nr}" -gt 1 ]; then |
| echo "${nr} ${inum}" |
| fi |
| done |
| |
| echo "Test XFS online scrub, if applicable" |
| |
| if _check_xfs_scrub_does_unicode "$SCRATCH_MNT" "$SCRATCH_DEV"; then |
| output="$(LC_ALL="C.UTF-8" ${XFS_SCRUB_PROG} -v -n "${SCRATCH_MNT}" 2>&1 | filter_scrub)" |
| echo "${output}" | grep -q "french_" || echo "No complaints about french e accent?" |
| echo "${output}" | grep -q "greek_" || echo "No complaints about greek letter mess?" |
| echo "${output}" | grep -q "arabic_" || echo "No complaints about arabic expanded string?" |
| echo "${output}" | grep -q "mixed_" || echo "No complaints about mixed script confusables?" |
| echo "${output}" | grep -q "hyphens_" || echo "No complaints about hyphenation confusables?" |
| echo "${output}" | grep -q "dz_digraph_" || echo "No complaints about single script confusables?" |
| echo "${output}" | grep -q "inadequate_" || echo "No complaints about inadequate rendering confusables?" |
| echo "${output}" | grep -q "prohibition_" || echo "No complaints about prohibited sequence confusables?" |
| echo "${output}" | grep -q "zerojoin_" || echo "No complaints about zero-width join confusables?" |
| echo "Actual xfs_scrub output:" >> $seqres.full |
| echo "${output}" >> $seqres.full |
| fi |
| |
| # success, all done |
| status=0 |
| exit |