// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
// for details. All rights reserved. Use of this source code is governed by a | |
// BSD-style license that can be found in the LICENSE file. | |
// Character constants. | |
const int _zero = 0x30; | |
const int _upperCaseA = 0x41; | |
const int _upperCaseZ = 0x5a; | |
const int _lowerCaseA = 0x61; | |
const int _lowerCaseZ = 0x7a; | |
const int _asciiCaseBit = 0x20; | |
/// Checks if strings [a] and [b] differ only on the case of ASCII letters. | |
/// | |
/// Strings are equal if they have the same length, and the characters at | |
/// each index are the same, or they are ASCII letters where one is upper-case | |
/// and the other is the lower-case version of the same letter. | |
/// | |
/// The comparison does not ignore the case of non-ASCII letters, so | |
/// an upper-case ae-ligature (Æ) is different from | |
/// a lower case ae-ligature (æ). | |
/// | |
/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
/// for situations where the strings are known to be ASCII. Examples could | |
/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
/// strings with a known structure. | |
bool equalsIgnoreAsciiCase(String a, String b) { | |
if (a.length != b.length) return false; | |
for (int i = 0; i < a.length; i++) { | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar == bChar) continue; | |
// Quick-check for whether this may be different cases of the same letter. | |
if (aChar ^ bChar != _asciiCaseBit) return false; | |
// If it's possible, then check if either character is actually an ASCII | |
// letter. | |
int aCharUpperCase = aChar | _asciiCaseBit; | |
if (_upperCaseA <= aCharUpperCase && aCharUpperCase <= _upperCaseZ) { | |
continue; | |
} | |
return false; | |
} | |
return true; | |
} | |
/// Hash code for a string which is compatible with [equalsIgnoreAsciiCase]. | |
/// | |
/// The hash code is unaffected by changing the case of ASCII letters, but | |
/// the case of non-ASCII letters do affect the result. | |
int hashIgnoreAsciiCase(String string) { | |
// Jenkins hash code ( http://en.wikipedia.org/wiki/Jenkins_hash_function). | |
// adapted to smi values. | |
// Same hash used by dart2js for strings, modified to ignore ASCII letter | |
// case. | |
int hash = 0; | |
for (int i = 0; i < string.length; i++) { | |
int char = string.codeUnitAt(i); | |
// Convert lower-case ASCII letters to upper case.upper | |
// This ensures that strings that differ only in case will have the | |
// same hash code. | |
if (_lowerCaseA <= char && char <= _lowerCaseZ) char -= _asciiCaseBit; | |
hash = 0x1fffffff & (hash + char); | |
hash = 0x1fffffff & (hash + ((0x0007ffff & hash) << 10)); | |
hash >>= 6; | |
} | |
hash = 0x1fffffff & (hash + ((0x03ffffff & hash) << 3)); | |
hash >>= 11; | |
return 0x1fffffff & (hash + ((0x00003fff & hash) << 15)); | |
} | |
/// Compares [a] and [b] lexically, converting ASCII letters to upper case. | |
/// | |
/// Comparison treats all lower-case ASCII letters as upper-case letters, | |
/// but does no case conversion for non-ASCII letters. | |
/// | |
/// If two strings differ only on the case of ASCII letters, the one with the | |
/// capital letter at the first difference will compare as less than the other | |
/// string. This tie-breaking ensures that the comparison is a total ordering | |
/// on strings and is compatible with equality. | |
/// | |
/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
/// for situations where the strings are known to be ASCII. Examples could | |
/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
/// strings with a known structure. | |
int compareAsciiUpperCase(String a, String b) { | |
int defaultResult = 0; // Returned if no difference found. | |
for (int i = 0; i < a.length; i++) { | |
if (i >= b.length) return 1; | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar == bChar) continue; | |
// Upper-case if letters. | |
int aUpperCase = aChar; | |
int bUpperCase = bChar; | |
if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { | |
aUpperCase -= _asciiCaseBit; | |
} | |
if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | |
bUpperCase -= _asciiCaseBit; | |
} | |
if (aUpperCase != bUpperCase) return (aUpperCase - bUpperCase).sign; | |
if (defaultResult == 0) defaultResult = (aChar - bChar); | |
} | |
if (b.length > a.length) return -1; | |
return defaultResult.sign; | |
} | |
/// Compares [a] and [b] lexically, converting ASCII letters to lower case. | |
/// | |
/// Comparison treats all upper-case ASCII letters as lower-case letters, | |
/// but does no case conversion for non-ASCII letters. | |
/// | |
/// If two strings differ only on the case of ASCII letters, the one with the | |
/// capital letter at the first difference will compare as less than the other | |
/// string. This tie-breaking ensures that the comparison is a total ordering | |
/// on strings. | |
/// | |
/// Ignoring non-ASCII letters is not generally a good idea, but it makes sense | |
/// for situations where the strings are known to be ASCII. Examples could | |
/// be Dart identifiers, base-64 or hex encoded strings, GUIDs or similar | |
/// strings with a known structure. | |
int compareAsciiLowerCase(String a, String b) { | |
int defaultResult = 0; | |
for (int i = 0; i < a.length; i++) { | |
if (i >= b.length) return 1; | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar == bChar) continue; | |
int aLowerCase = aChar; | |
int bLowerCase = bChar; | |
// Upper case if ASCII letters. | |
if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { | |
bLowerCase += _asciiCaseBit; | |
} | |
if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | |
aLowerCase += _asciiCaseBit; | |
} | |
if (aLowerCase != bLowerCase) return (aLowerCase - bLowerCase).sign; | |
if (defaultResult == 0) defaultResult = aChar - bChar; | |
} | |
if (b.length > a.length) return -1; | |
return defaultResult.sign; | |
} | |
/// Compares strings [a] and [b] according to [natural sort ordering]. | |
/// | |
/// A natural sort ordering is a lexical ordering where embedded | |
/// numerals (digit sequences) are treated as a single unit and ordered by | |
/// numerical value. | |
/// This means that `"a10b"` will be ordered after `"a7b"` in natural | |
/// ordering, where lexical ordering would put the `1` before the `7`, ignoring | |
/// that the `1` is part of a larger number. | |
/// | |
/// Example: | |
/// The following strings are in the order they would be sorted by using this | |
/// comparison function: | |
/// | |
/// "a", "a0", "a0b", "a1", "a01", "a9", "a10", "a100", "a100b", "aa" | |
/// | |
/// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
int compareNatural(String a, String b) { | |
for (int i = 0; i < a.length; i++) { | |
if (i >= b.length) return 1; | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar != bChar) { | |
return _compareNaturally(a, b, i, aChar, bChar); | |
} | |
} | |
if (b.length > a.length) return -1; | |
return 0; | |
} | |
/// Compares strings [a] and [b] according to lower-case | |
/// [natural sort ordering]. | |
/// | |
/// ASCII letters are converted to lower case before being compared, like | |
/// for [compareAsciiLowerCase], then the result is compared like for | |
/// [compareNatural]. | |
/// | |
/// If two strings differ only on the case of ASCII letters, the one with the | |
/// capital letter at the first difference will compare as less than the other | |
/// string. This tie-breaking ensures that the comparison is a total ordering | |
/// on strings. | |
/// | |
/// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
int compareAsciiLowerCaseNatural(String a, String b) { | |
int defaultResult = 0; // Returned if no difference found. | |
for (int i = 0; i < a.length; i++) { | |
if (i >= b.length) return 1; | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar == bChar) continue; | |
int aLowerCase = aChar; | |
int bLowerCase = bChar; | |
if (_upperCaseA <= aChar && aChar <= _upperCaseZ) { | |
aLowerCase += _asciiCaseBit; | |
} | |
if (_upperCaseA <= bChar && bChar <= _upperCaseZ) { | |
bLowerCase += _asciiCaseBit; | |
} | |
if (aLowerCase != bLowerCase) { | |
return _compareNaturally(a, b, i, aLowerCase, bLowerCase); | |
} | |
if (defaultResult == 0) defaultResult = aChar - bChar; | |
} | |
if (b.length > a.length) return -1; | |
return defaultResult.sign; | |
} | |
/// Compares strings [a] and [b] according to upper-case | |
/// [natural sort ordering]. | |
/// | |
/// ASCII letters are converted to upper case before being compared, like | |
/// for [compareAsciiUpperCase], then the result is compared like for | |
/// [compareNatural]. | |
/// | |
/// If two strings differ only on the case of ASCII letters, the one with the | |
/// capital letter at the first difference will compare as less than the other | |
/// string. This tie-breaking ensures that the comparison is a total ordering | |
/// on strings | |
/// | |
/// [natural sort ordering]: https://en.wikipedia.org/wiki/Natural_sort_order | |
int compareAsciiUpperCaseNatural(String a, String b) { | |
int defaultResult = 0; | |
for (int i = 0; i < a.length; i++) { | |
if (i >= b.length) return 1; | |
var aChar = a.codeUnitAt(i); | |
var bChar = b.codeUnitAt(i); | |
if (aChar == bChar) continue; | |
int aUpperCase = aChar; | |
int bUpperCase = bChar; | |
if (_lowerCaseA <= aChar && aChar <= _lowerCaseZ) { | |
aUpperCase -= _asciiCaseBit; | |
} | |
if (_lowerCaseA <= bChar && bChar <= _lowerCaseZ) { | |
bUpperCase -= _asciiCaseBit; | |
} | |
if (aUpperCase != bUpperCase) { | |
return _compareNaturally(a, b, i, aUpperCase, bUpperCase); | |
} | |
if (defaultResult == 0) defaultResult = aChar - bChar; | |
} | |
if (b.length > a.length) return -1; | |
return defaultResult.sign; | |
} | |
/// Check for numbers overlapping the current mismatched characters. | |
/// | |
/// If both [aChar] and [bChar] are digits, use numerical comparison. | |
/// Check if the previous characters is a non-zero number, and if not, | |
/// skip - but count - leading zeros before comparing numbers. | |
/// | |
/// If one is a digit and the other isn't, check if the previous character | |
/// is a digit, and if so, the the one with the digit is the greater number. | |
/// | |
/// Otherwise just returns the difference between [aChar] and [bChar]. | |
int _compareNaturally( | |
String a, String b, int index, int aChar, int bChar) { | |
assert(aChar != bChar); | |
var aIsDigit = _isDigit(aChar); | |
var bIsDigit = _isDigit(bChar); | |
if (aIsDigit) { | |
if (bIsDigit) { | |
return _compareNumerically(a, b, aChar, bChar, index); | |
} else if (index > 0 && _isDigit(a.codeUnitAt(index - 1))) { | |
// aChar is the continuation of a longer number. | |
return 1; | |
} | |
} else if (bIsDigit && index > 0 && _isDigit(b.codeUnitAt(index - 1))) { | |
// bChar is the continuation of a longer number. | |
return -1; | |
} | |
// Characters are both non-digits, or not continuation of earlier number. | |
return (aChar - bChar).sign; | |
} | |
/// Compare numbers overlapping [aChar] and [bChar] numerically. | |
/// | |
/// If the numbers have the same numerical value, but one has more leading | |
/// zeros, the longer number is considered greater than the shorter one. | |
/// | |
/// This ensures a total ordering on strings compatible with equality. | |
int _compareNumerically(String a, String b, int aChar, int bChar, int index) { | |
// Both are digits. Find the first significant different digit, then find | |
// the length of the numbers. | |
if (_isNonZeroNumberSuffix(a, index)) { | |
// Part of a longer number, differs at this index, just count the length. | |
int result = _compareDigitCount(a, b, index, index); | |
if (result != 0) return result; | |
// If same length, the current character is the most significant differing | |
// digit. | |
return (aChar - bChar).sign; | |
} | |
// Not part of larger (non-zero) number, so skip leading zeros before | |
// comparing numbers. | |
int aIndex = index; | |
int bIndex = index; | |
if (aChar == _zero) { | |
do { | |
aIndex++; | |
if (aIndex == a.length) return -1; // number in a is zero, b is not. | |
aChar = a.codeUnitAt(aIndex); | |
} while (aChar == _zero); | |
if (!_isDigit(aChar)) return -1; | |
} else if (bChar == _zero) { | |
do { | |
bIndex++; | |
if (bIndex == b.length) return 1; // number in b is zero, a is not. | |
bChar = b.codeUnitAt(bIndex); | |
} while (bChar == _zero); | |
if (!_isDigit(bChar)) return 1; | |
} | |
if (aChar != bChar) { | |
int result = _compareDigitCount(a, b, aIndex, bIndex); | |
if (result != 0) return result; | |
return (aChar - bChar).sign; | |
} | |
// Same leading digit, one had more leading zeros. | |
// Compare digits until reaching a difference. | |
while (true) { | |
var aIsDigit = false; | |
var bIsDigit = false; | |
aChar = 0; | |
bChar = 0; | |
if (++aIndex < a.length) { | |
aChar = a.codeUnitAt(aIndex); | |
aIsDigit = _isDigit(aChar); | |
} | |
if (++bIndex < b.length) { | |
bChar = b.codeUnitAt(bIndex); | |
bIsDigit = _isDigit(bChar); | |
} | |
if (aIsDigit) { | |
if (bIsDigit) { | |
if (aChar == bChar) continue; | |
// First different digit found. | |
break; | |
} | |
// bChar is non-digit, so a has longer number. | |
return 1; | |
} else if (bIsDigit) { | |
return -1; // b has longer number. | |
} else { | |
// Neither is digit, so numbers had same numerical value. | |
// Fall back on number of leading zeros | |
// (reflected by difference in indices). | |
return (aIndex - bIndex).sign; | |
} | |
} | |
// At first differing digits. | |
int result = _compareDigitCount(a, b, aIndex, bIndex); | |
if (result != 0) return result; | |
return (aChar - bChar).sign; | |
} | |
/// Checks which of [a] and [b] has the longest sequence of digits. | |
/// | |
/// Starts counting from `i + 1` and `j + 1` (assumes that `a[i]` and `b[j]` are | |
/// both already known to be digits). | |
int _compareDigitCount(String a, String b, int i, int j) { | |
while (++i < a.length) { | |
bool aIsDigit = _isDigit(a.codeUnitAt(i)); | |
if (++j == b.length) return aIsDigit ? 1 : 0; | |
bool bIsDigit = _isDigit(b.codeUnitAt(j)); | |
if (aIsDigit) { | |
if (bIsDigit) continue; | |
return 1; | |
} else if (bIsDigit) { | |
return -1; | |
} else { | |
return 0; | |
} | |
} | |
if (++j < b.length && _isDigit(b.codeUnitAt(j))) { | |
return -1; | |
} | |
return 0; | |
} | |
bool _isDigit(int charCode) => (charCode ^ _zero) <= 9; | |
/// Check if the digit at [index] is continuing a non-zero number. | |
/// | |
/// If there is no non-zero digits before, then leading zeros at [index] | |
/// are also ignored when comparing numerically. If there is a non-zero digit | |
/// before, then zeros at [index] are significant. | |
bool _isNonZeroNumberSuffix(String string, int index) { | |
while (--index >= 0) { | |
int char = string.codeUnitAt(index); | |
if (char != _zero) return _isDigit(char); | |
} | |
return false; | |
} |