src/string-search.h - v8/v8.git - Git at Google

 // Copyright 2010 the V8 project authors. All rights reserved.
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 //       notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 //       copyright notice, this list of conditions and the following
 //       disclaimer in the documentation and/or other materials provided
 //       with the distribution.
 //     * Neither the name of Google Inc. nor the names of its
 //       contributors may be used to endorse or promote products derived
 //       from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #ifndef V8_STRING_SEARCH_H_
 #define V8_STRING_SEARCH_H_

 namespace v8 {
 namespace internal {


 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
 // limit, we can fix the size of tables. For a needle longer than this limit,
 // search will not be optimal, since we only build tables for a smaller suffix
 // of the string, which is a safe approximation.
 static const int kBMMaxShift = 250;
 // Reduce alphabet to this size.
 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
 // proportional to the input alphabet. We reduce the alphabet size by
 // equating input characters modulo a smaller alphabet size. This gives
 // a potentially less efficient searching, but is a safe approximation.
 // For needles using only characters in the same Unicode 256-code point page,
 // there is no search speed degradation.
 static const int kBMAlphabetSize = 256;
 // For patterns below this length, the skip length of Boyer-Moore is too short
 // to compensate for the algorithmic overhead compared to simple brute force.
 static const int kBMMinPatternLength = 7;

 // Holds the two buffers used by Boyer-Moore string search's Good Suffix
 // shift. Only allows the last kBMMaxShift characters of the needle
 // to be indexed.
 class BMGoodSuffixBuffers {
  public:
   BMGoodSuffixBuffers() {}
   inline void Initialize(int needle_length) {
     ASSERT(needle_length > 1);
     int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift;
     int len = needle_length - start;
     biased_suffixes_ = suffixes_ - start;
     biased_good_suffix_shift_ = good_suffix_shift_ - start;
     for (int i = 0; i <= len; i++) {
       good_suffix_shift_[i] = len;
     }
   }
   inline int& suffix(int index) {
     ASSERT(biased_suffixes_ + index >= suffixes_);
     return biased_suffixes_[index];
   }
   inline int& shift(int index) {
     ASSERT(biased_good_suffix_shift_ + index >= good_suffix_shift_);
     return biased_good_suffix_shift_[index];
   }
  private:
   int suffixes_[kBMMaxShift + 1];
   int good_suffix_shift_[kBMMaxShift + 1];
   int* biased_suffixes_;
   int* biased_good_suffix_shift_;
   DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);
 };

 // buffers reused by BoyerMoore
 struct BMBuffers {
  public:
   static int bad_char_occurrence[kBMAlphabetSize];
   static BMGoodSuffixBuffers bmgs_buffers;
 };

 // State of the string match tables.
 // SIMPLE: No usable content in the buffers.
 // BOYER_MOORE_HORSPOOL: The bad_char_occurence table has been populated.
 // BOYER_MOORE: The bmgs_buffers tables have also been populated.
 // Whenever starting with a new needle, one should call InitializeStringSearch
 // to determine which search strategy to use, and in the case of a long-needle
 // strategy, the call also initializes the algorithm to SIMPLE.
 enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE };
 static StringSearchAlgorithm algorithm;


 // Compute the bad-char table for Boyer-Moore in the static buffer.
 template <typename PatternChar>
 static void BoyerMoorePopulateBadCharTable(Vector<const PatternChar> pattern) {
   // Only preprocess at most kBMMaxShift last characters of pattern.
   int start = Max(pattern.length() - kBMMaxShift, 0);
   // Run forwards to populate bad_char_table, so that *last* instance
   // of character equivalence class is the one registered.
   // Notice: Doesn't include the last character.
   int table_size = (sizeof(PatternChar) == 1) ? String::kMaxAsciiCharCode + 1
                                         : kBMAlphabetSize;
   if (start == 0) {  // All patterns less than kBMMaxShift in length.
     memset(BMBuffers::bad_char_occurrence,
            -1,
            table_size * sizeof(*BMBuffers::bad_char_occurrence));
   } else {
     for (int i = 0; i < table_size; i++) {
       BMBuffers::bad_char_occurrence[i] = start - 1;
     }
   }
   for (int i = start; i < pattern.length() - 1; i++) {
     PatternChar c = pattern[i];
     int bucket = (sizeof(PatternChar) ==1) ? c : c % kBMAlphabetSize;
     BMBuffers::bad_char_occurrence[bucket] = i;
   }
 }


 template <typename PatternChar>
 static void BoyerMoorePopulateGoodSuffixTable(
     Vector<const PatternChar> pattern) {
   int m = pattern.length();
   int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
   int len = m - start;
   // Compute Good Suffix tables.
   BMBuffers::bmgs_buffers.Initialize(m);

   BMBuffers::bmgs_buffers.shift(m-1) = 1;
   BMBuffers::bmgs_buffers.suffix(m) = m + 1;
   PatternChar last_char = pattern[m - 1];
   int suffix = m + 1;
   {
     int i = m;
     while (i > start) {
       PatternChar c = pattern[i - 1];
       while (suffix <= m && c != pattern[suffix - 1]) {
         if (BMBuffers::bmgs_buffers.shift(suffix) == len) {
           BMBuffers::bmgs_buffers.shift(suffix) = suffix - i;
         }
         suffix = BMBuffers::bmgs_buffers.suffix(suffix);
       }
       BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
       if (suffix == m) {
         // No suffix to extend, so we check against last_char only.
         while ((i > start) && (pattern[i - 1] != last_char)) {
           if (BMBuffers::bmgs_buffers.shift(m) == len) {
             BMBuffers::bmgs_buffers.shift(m) = m - i;
           }
           BMBuffers::bmgs_buffers.suffix(--i) = m;
         }
         if (i > start) {
           BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
         }
       }
     }
   }
   if (suffix < m) {
     for (int i = start; i <= m; i++) {
       if (BMBuffers::bmgs_buffers.shift(i) == len) {
         BMBuffers::bmgs_buffers.shift(i) = suffix - start;
       }
       if (i == suffix) {
         suffix = BMBuffers::bmgs_buffers.suffix(suffix);
       }
     }
   }
 }


 template <typename SubjectChar, typename PatternChar>
 static inline int CharOccurrence(int char_code) {
   if (sizeof(SubjectChar) == 1) {
     return BMBuffers::bad_char_occurrence[char_code];
   }
   if (sizeof(PatternChar) == 1) {
     if (char_code > String::kMaxAsciiCharCode) {
       return -1;
     }
     return BMBuffers::bad_char_occurrence[char_code];
   }
   return BMBuffers::bad_char_occurrence[char_code % kBMAlphabetSize];
 }


 // Restricted simplified Boyer-Moore string matching.
 // Uses only the bad-shift table of Boyer-Moore and only uses it
 // for the character compared to the last character of the needle.
 template <typename SubjectChar, typename PatternChar>
 static int BoyerMooreHorspool(Vector<const SubjectChar> subject,
                               Vector<const PatternChar> pattern,
                               int start_index,
                               bool* complete) {
   ASSERT(algorithm <= BOYER_MOORE_HORSPOOL);
   int n = subject.length();
   int m = pattern.length();

   int badness = -m;

   // How bad we are doing without a good-suffix table.
   int idx;  // No matches found prior to this index.
   PatternChar last_char = pattern[m - 1];
   int last_char_shift =
       m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
   // Perform search
   for (idx = start_index; idx <= n - m;) {
     int j = m - 1;
     int c;
     while (last_char != (c = subject[idx + j])) {
       int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
       int shift = j - bc_occ;
       idx += shift;
       badness += 1 - shift;  // at most zero, so badness cannot increase.
       if (idx > n - m) {
         *complete = true;
         return -1;
       }
     }
     j--;
     while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
     if (j < 0) {
       *complete = true;
       return idx;
     } else {
       idx += last_char_shift;
       // Badness increases by the number of characters we have
       // checked, and decreases by the number of characters we
       // can skip by shifting. It's a measure of how we are doing
       // compared to reading each character exactly once.
       badness += (m - j) - last_char_shift;
       if (badness > 0) {
         *complete = false;
         return idx;
       }
     }
   }
   *complete = true;
   return -1;
 }


 template <typename SubjectChar, typename PatternChar>
 static int BoyerMooreIndexOf(Vector<const SubjectChar> subject,
                              Vector<const PatternChar> pattern,
                              int idx) {
   ASSERT(algorithm <= BOYER_MOORE);
   int n = subject.length();
   int m = pattern.length();
   // Only preprocess at most kBMMaxShift last characters of pattern.
   int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

   PatternChar last_char = pattern[m - 1];
   // Continue search from i.
   while (idx <= n - m) {
     int j = m - 1;
     SubjectChar c;
     while (last_char != (c = subject[idx + j])) {
       int shift = j - CharOccurrence<SubjectChar, PatternChar>(c);
       idx += shift;
       if (idx > n - m) {
         return -1;
       }
     }
     while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
     if (j < 0) {
       return idx;
     } else if (j < start) {
       // we have matched more than our tables allow us to be smart about.
       // Fall back on BMH shift.
       idx += m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
     } else {
       int gs_shift = BMBuffers::bmgs_buffers.shift(j + 1);
       int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
       int shift = j - bc_occ;
       if (gs_shift > shift) {
         shift = gs_shift;
       }
       idx += shift;
     }
   }

   return -1;
 }


 // Trivial string search for shorter strings.
 // On return, if "complete" is set to true, the return value is the
 // final result of searching for the patter in the subject.
 // If "complete" is set to false, the return value is the index where
 // further checking should start, i.e., it's guaranteed that the pattern
 // does not occur at a position prior to the returned index.
 template <typename PatternChar, typename SubjectChar>
 static int SimpleIndexOf(Vector<const SubjectChar> subject,
                          Vector<const PatternChar> pattern,
                          int idx,
                          bool* complete) {
   ASSERT(pattern.length() > 1);
   int pattern_length = pattern.length();
   // Badness is a count of how much work we have done.  When we have
   // done enough work we decide it's probably worth switching to a better
   // algorithm.
   int badness = -10 - (pattern_length << 2);

   // We know our pattern is at least 2 characters, we cache the first so
   // the common case of the first character not matching is faster.
   PatternChar pattern_first_char = pattern[0];
   for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
     badness++;
     if (badness > 0) {
       *complete = false;
       return i;
     }
     if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
       const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
           memchr(subject.start() + i,
                  pattern_first_char,
                  n - i + 1));
       if (pos == NULL) {
         *complete = true;
         return -1;
       }
       i = static_cast<int>(pos - subject.start());
     } else {
       if (subject[i] != pattern_first_char) continue;
     }
     int j = 1;
     do {
       if (pattern[j] != subject[i+j]) {
         break;
       }
       j++;
     } while (j < pattern_length);
     if (j == pattern_length) {
       *complete = true;
       return i;
     }
     badness += j;
   }
   *complete = true;
   return -1;
 }

 // Simple indexOf that never bails out. For short patterns only.
 template <typename PatternChar, typename SubjectChar>
 static int SimpleIndexOf(Vector<const SubjectChar> subject,
                          Vector<const PatternChar> pattern,
                          int idx) {
   int pattern_length = pattern.length();
   PatternChar pattern_first_char = pattern[0];
   for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
     if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
       const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
           memchr(subject.start() + i,
                  pattern_first_char,
                  n - i + 1));
       if (pos == NULL) return -1;
       i = static_cast<int>(pos - subject.start());
     } else {
       if (subject[i] != pattern_first_char) continue;
     }
     int j = 1;
     while (j < pattern_length) {
       if (pattern[j] != subject[i+j]) {
         break;
       }
       j++;
     }
     if (j == pattern_length) {
       return i;
     }
   }
   return -1;
 }


 // Strategy for searching for a string in another string.
 enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG };


 template <typename PatternChar>
 static inline StringSearchStrategy InitializeStringSearch(
     Vector<const PatternChar> pat, bool ascii_subject) {
   // We have an ASCII haystack and a non-ASCII needle. Check if there
   // really is a non-ASCII character in the needle and bail out if there
   // is.
   if (ascii_subject && sizeof(PatternChar) > 1) {
     for (int i = 0; i < pat.length(); i++) {
       uc16 c = pat[i];
       if (c > String::kMaxAsciiCharCode) {
         return SEARCH_FAIL;
       }
     }
   }
   if (pat.length() < kBMMinPatternLength) {
     return SEARCH_SHORT;
   }
   algorithm = SIMPLE_SEARCH;
   return SEARCH_LONG;
 }


 // Dispatch long needle searches to different algorithms.
 template <typename SubjectChar, typename PatternChar>
 static int ComplexIndexOf(Vector<const SubjectChar> sub,
                           Vector<const PatternChar> pat,
                           int start_index) {
   ASSERT(pat.length() >= kBMMinPatternLength);
   // Try algorithms in order of increasing setup cost and expected performance.
   bool complete;
   int idx = start_index;
   switch (algorithm) {
     case SIMPLE_SEARCH:
       idx = SimpleIndexOf(sub, pat, idx, &complete);
       if (complete) return idx;
       BoyerMoorePopulateBadCharTable(pat);
       algorithm = BOYER_MOORE_HORSPOOL;
       // FALLTHROUGH.
     case BOYER_MOORE_HORSPOOL:
       idx = BoyerMooreHorspool(sub, pat, idx, &complete);
       if (complete) return idx;
       // Build the Good Suffix table and continue searching.
       BoyerMoorePopulateGoodSuffixTable(pat);
       algorithm = BOYER_MOORE;
       // FALLTHROUGH.
     case BOYER_MOORE:
       return BoyerMooreIndexOf(sub, pat, idx);
   }
   UNREACHABLE();
   return -1;
 }


 // Dispatch to different search strategies for a single search.
 // If searching multiple times on the same needle, the search
 // strategy should only be computed once and then dispatch to different
 // loops.
 template <typename SubjectChar, typename PatternChar>
 static int StringSearch(Vector<const SubjectChar> sub,
                         Vector<const PatternChar> pat,
                         int start_index) {
   bool ascii_subject = (sizeof(SubjectChar) == 1);
   StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject);
   switch (strategy) {
     case SEARCH_FAIL: return -1;
     case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index);
     case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index);
   }
   UNREACHABLE();
   return -1;
 }

 }}  // namespace v8::internal

 #endif  // V8_STRING_SEARCH_H_
	// Copyright 2010 the V8 project authors. All rights reserved.
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following
	// disclaimer in the documentation and/or other materials provided
	// with the distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived
	// from this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#ifndef V8_STRING_SEARCH_H_
	#define V8_STRING_SEARCH_H_

	namespace v8 {
	namespace internal {


	// Cap on the maximal shift in the Boyer-Moore implementation. By setting a
	// limit, we can fix the size of tables. For a needle longer than this limit,
	// search will not be optimal, since we only build tables for a smaller suffix
	// of the string, which is a safe approximation.
	static const int kBMMaxShift = 250;
	// Reduce alphabet to this size.
	// One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
	// proportional to the input alphabet. We reduce the alphabet size by
	// equating input characters modulo a smaller alphabet size. This gives
	// a potentially less efficient searching, but is a safe approximation.
	// For needles using only characters in the same Unicode 256-code point page,
	// there is no search speed degradation.
	static const int kBMAlphabetSize = 256;
	// For patterns below this length, the skip length of Boyer-Moore is too short
	// to compensate for the algorithmic overhead compared to simple brute force.
	static const int kBMMinPatternLength = 7;

	// Holds the two buffers used by Boyer-Moore string search's Good Suffix
	// shift. Only allows the last kBMMaxShift characters of the needle
	// to be indexed.
	class BMGoodSuffixBuffers {
	public:
	BMGoodSuffixBuffers() {}
	inline void Initialize(int needle_length) {
	ASSERT(needle_length > 1);
	int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift;
	int len = needle_length - start;
	biased_suffixes_ = suffixes_ - start;
	biased_good_suffix_shift_ = good_suffix_shift_ - start;
	for (int i = 0; i <= len; i++) {
	good_suffix_shift_[i] = len;
	}
	}
	inline int& suffix(int index) {
	ASSERT(biased_suffixes_ + index >= suffixes_);
	return biased_suffixes_[index];
	}
	inline int& shift(int index) {
	ASSERT(biased_good_suffix_shift_ + index >= good_suffix_shift_);
	return biased_good_suffix_shift_[index];
	}
	private:
	int suffixes_[kBMMaxShift + 1];
	int good_suffix_shift_[kBMMaxShift + 1];
	int* biased_suffixes_;
	int* biased_good_suffix_shift_;
	DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);
	};

	// buffers reused by BoyerMoore
	struct BMBuffers {
	public:
	static int bad_char_occurrence[kBMAlphabetSize];
	static BMGoodSuffixBuffers bmgs_buffers;
	};

	// State of the string match tables.
	// SIMPLE: No usable content in the buffers.
	// BOYER_MOORE_HORSPOOL: The bad_char_occurence table has been populated.
	// BOYER_MOORE: The bmgs_buffers tables have also been populated.
	// Whenever starting with a new needle, one should call InitializeStringSearch
	// to determine which search strategy to use, and in the case of a long-needle
	// strategy, the call also initializes the algorithm to SIMPLE.
	enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE };
	static StringSearchAlgorithm algorithm;


	// Compute the bad-char table for Boyer-Moore in the static buffer.
	template <typename PatternChar>
	static void BoyerMoorePopulateBadCharTable(Vector<const PatternChar> pattern) {
	// Only preprocess at most kBMMaxShift last characters of pattern.
	int start = Max(pattern.length() - kBMMaxShift, 0);
	// Run forwards to populate bad_char_table, so that last instance
	// of character equivalence class is the one registered.
	// Notice: Doesn't include the last character.
	int table_size = (sizeof(PatternChar) == 1) ? String::kMaxAsciiCharCode + 1
	: kBMAlphabetSize;
	if (start == 0) { // All patterns less than kBMMaxShift in length.
	memset(BMBuffers::bad_char_occurrence,
	-1,
	table_size * sizeof(*BMBuffers::bad_char_occurrence));
	} else {
	for (int i = 0; i < table_size; i++) {
	BMBuffers::bad_char_occurrence[i] = start - 1;
	}
	}
	for (int i = start; i < pattern.length() - 1; i++) {
	PatternChar c = pattern[i];
	int bucket = (sizeof(PatternChar) ==1) ? c : c % kBMAlphabetSize;
	BMBuffers::bad_char_occurrence[bucket] = i;
	}
	}


	template <typename PatternChar>
	static void BoyerMoorePopulateGoodSuffixTable(
	Vector<const PatternChar> pattern) {
	int m = pattern.length();
	int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
	int len = m - start;
	// Compute Good Suffix tables.
	BMBuffers::bmgs_buffers.Initialize(m);

	BMBuffers::bmgs_buffers.shift(m-1) = 1;
	BMBuffers::bmgs_buffers.suffix(m) = m + 1;
	PatternChar last_char = pattern[m - 1];
	int suffix = m + 1;
	{
	int i = m;
	while (i > start) {
	PatternChar c = pattern[i - 1];
	while (suffix <= m && c != pattern[suffix - 1]) {
	if (BMBuffers::bmgs_buffers.shift(suffix) == len) {
	BMBuffers::bmgs_buffers.shift(suffix) = suffix - i;
	}
	suffix = BMBuffers::bmgs_buffers.suffix(suffix);
	}
	BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
	if (suffix == m) {
	// No suffix to extend, so we check against last_char only.
	while ((i > start) && (pattern[i - 1] != last_char)) {
	if (BMBuffers::bmgs_buffers.shift(m) == len) {
	BMBuffers::bmgs_buffers.shift(m) = m - i;
	}
	BMBuffers::bmgs_buffers.suffix(--i) = m;
	}
	if (i > start) {
	BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
	}
	}
	}
	}
	if (suffix < m) {
	for (int i = start; i <= m; i++) {
	if (BMBuffers::bmgs_buffers.shift(i) == len) {
	BMBuffers::bmgs_buffers.shift(i) = suffix - start;
	}
	if (i == suffix) {
	suffix = BMBuffers::bmgs_buffers.suffix(suffix);
	}
	}
	}
	}


	template <typename SubjectChar, typename PatternChar>
	static inline int CharOccurrence(int char_code) {
	if (sizeof(SubjectChar) == 1) {
	return BMBuffers::bad_char_occurrence[char_code];
	}
	if (sizeof(PatternChar) == 1) {
	if (char_code > String::kMaxAsciiCharCode) {
	return -1;
	}
	return BMBuffers::bad_char_occurrence[char_code];
	}
	return BMBuffers::bad_char_occurrence[char_code % kBMAlphabetSize];
	}


	// Restricted simplified Boyer-Moore string matching.
	// Uses only the bad-shift table of Boyer-Moore and only uses it
	// for the character compared to the last character of the needle.
	template <typename SubjectChar, typename PatternChar>
	static int BoyerMooreHorspool(Vector<const SubjectChar> subject,
	Vector<const PatternChar> pattern,
	int start_index,
	bool* complete) {
	ASSERT(algorithm <= BOYER_MOORE_HORSPOOL);
	int n = subject.length();
	int m = pattern.length();

	int badness = -m;

	// How bad we are doing without a good-suffix table.
	int idx; // No matches found prior to this index.
	PatternChar last_char = pattern[m - 1];
	int last_char_shift =
	m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
	// Perform search
	for (idx = start_index; idx <= n - m;) {
	int j = m - 1;
	int c;
	while (last_char != (c = subject[idx + j])) {
	int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
	int shift = j - bc_occ;
	idx += shift;
	badness += 1 - shift; // at most zero, so badness cannot increase.
	if (idx > n - m) {
	*complete = true;
	return -1;
	}
	}
	j--;
	while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
	if (j < 0) {
	*complete = true;
	return idx;
	} else {
	idx += last_char_shift;
	// Badness increases by the number of characters we have
	// checked, and decreases by the number of characters we
	// can skip by shifting. It's a measure of how we are doing
	// compared to reading each character exactly once.
	badness += (m - j) - last_char_shift;
	if (badness > 0) {
	*complete = false;
	return idx;
	}
	}
	}
	*complete = true;
	return -1;
	}


	template <typename SubjectChar, typename PatternChar>
	static int BoyerMooreIndexOf(Vector<const SubjectChar> subject,
	Vector<const PatternChar> pattern,
	int idx) {
	ASSERT(algorithm <= BOYER_MOORE);
	int n = subject.length();
	int m = pattern.length();
	// Only preprocess at most kBMMaxShift last characters of pattern.
	int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;

	PatternChar last_char = pattern[m - 1];
	// Continue search from i.
	while (idx <= n - m) {
	int j = m - 1;
	SubjectChar c;
	while (last_char != (c = subject[idx + j])) {
	int shift = j - CharOccurrence<SubjectChar, PatternChar>(c);
	idx += shift;
	if (idx > n - m) {
	return -1;
	}
	}
	while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
	if (j < 0) {
	return idx;
	} else if (j < start) {
	// we have matched more than our tables allow us to be smart about.
	// Fall back on BMH shift.
	idx += m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
	} else {
	int gs_shift = BMBuffers::bmgs_buffers.shift(j + 1);
	int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
	int shift = j - bc_occ;
	if (gs_shift > shift) {
	shift = gs_shift;
	}
	idx += shift;
	}
	}

	return -1;
	}


	// Trivial string search for shorter strings.
	// On return, if "complete" is set to true, the return value is the
	// final result of searching for the patter in the subject.
	// If "complete" is set to false, the return value is the index where
	// further checking should start, i.e., it's guaranteed that the pattern
	// does not occur at a position prior to the returned index.
	template <typename PatternChar, typename SubjectChar>
	static int SimpleIndexOf(Vector<const SubjectChar> subject,
	Vector<const PatternChar> pattern,
	int idx,
	bool* complete) {
	ASSERT(pattern.length() > 1);
	int pattern_length = pattern.length();
	// Badness is a count of how much work we have done. When we have
	// done enough work we decide it's probably worth switching to a better
	// algorithm.
	int badness = -10 - (pattern_length << 2);

	// We know our pattern is at least 2 characters, we cache the first so
	// the common case of the first character not matching is faster.
	PatternChar pattern_first_char = pattern[0];
	for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
	badness++;
	if (badness > 0) {
	*complete = false;
	return i;
	}
	if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
	const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
	memchr(subject.start() + i,
	pattern_first_char,
	n - i + 1));
	if (pos == NULL) {
	*complete = true;
	return -1;
	}
	i = static_cast<int>(pos - subject.start());
	} else {
	if (subject[i] != pattern_first_char) continue;
	}
	int j = 1;
	do {
	if (pattern[j] != subject[i+j]) {
	break;
	}
	j++;
	} while (j < pattern_length);
	if (j == pattern_length) {
	*complete = true;
	return i;
	}
	badness += j;
	}
	*complete = true;
	return -1;
	}

	// Simple indexOf that never bails out. For short patterns only.
	template <typename PatternChar, typename SubjectChar>
	static int SimpleIndexOf(Vector<const SubjectChar> subject,
	Vector<const PatternChar> pattern,
	int idx) {
	int pattern_length = pattern.length();
	PatternChar pattern_first_char = pattern[0];
	for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
	if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
	const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
	memchr(subject.start() + i,
	pattern_first_char,
	n - i + 1));
	if (pos == NULL) return -1;
	i = static_cast<int>(pos - subject.start());
	} else {
	if (subject[i] != pattern_first_char) continue;
	}
	int j = 1;
	while (j < pattern_length) {
	if (pattern[j] != subject[i+j]) {
	break;
	}
	j++;
	}
	if (j == pattern_length) {
	return i;
	}
	}
	return -1;
	}


	// Strategy for searching for a string in another string.
	enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG };


	template <typename PatternChar>
	static inline StringSearchStrategy InitializeStringSearch(
	Vector<const PatternChar> pat, bool ascii_subject) {
	// We have an ASCII haystack and a non-ASCII needle. Check if there
	// really is a non-ASCII character in the needle and bail out if there
	// is.
	if (ascii_subject && sizeof(PatternChar) > 1) {
	for (int i = 0; i < pat.length(); i++) {
	uc16 c = pat[i];
	if (c > String::kMaxAsciiCharCode) {
	return SEARCH_FAIL;
	}
	}
	}
	if (pat.length() < kBMMinPatternLength) {
	return SEARCH_SHORT;
	}
	algorithm = SIMPLE_SEARCH;
	return SEARCH_LONG;
	}


	// Dispatch long needle searches to different algorithms.
	template <typename SubjectChar, typename PatternChar>
	static int ComplexIndexOf(Vector<const SubjectChar> sub,
	Vector<const PatternChar> pat,
	int start_index) {
	ASSERT(pat.length() >= kBMMinPatternLength);
	// Try algorithms in order of increasing setup cost and expected performance.
	bool complete;
	int idx = start_index;
	switch (algorithm) {
	case SIMPLE_SEARCH:
	idx = SimpleIndexOf(sub, pat, idx, &complete);
	if (complete) return idx;
	BoyerMoorePopulateBadCharTable(pat);
	algorithm = BOYER_MOORE_HORSPOOL;
	// FALLTHROUGH.
	case BOYER_MOORE_HORSPOOL:
	idx = BoyerMooreHorspool(sub, pat, idx, &complete);
	if (complete) return idx;
	// Build the Good Suffix table and continue searching.
	BoyerMoorePopulateGoodSuffixTable(pat);
	algorithm = BOYER_MOORE;
	// FALLTHROUGH.
	case BOYER_MOORE:
	return BoyerMooreIndexOf(sub, pat, idx);
	}
	UNREACHABLE();
	return -1;
	}


	// Dispatch to different search strategies for a single search.
	// If searching multiple times on the same needle, the search
	// strategy should only be computed once and then dispatch to different
	// loops.
	template <typename SubjectChar, typename PatternChar>
	static int StringSearch(Vector<const SubjectChar> sub,
	Vector<const PatternChar> pat,
	int start_index) {
	bool ascii_subject = (sizeof(SubjectChar) == 1);
	StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject);
	switch (strategy) {
	case SEARCH_FAIL: return -1;
	case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index);
	case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index);
	}
	UNREACHABLE();
	return -1;
	}

	}} // namespace v8::internal

	#endif // V8_STRING_SEARCH_H_