chrome/chrome_cleaner/strings/string_util.cc - chromium/src - Git at Google

 // Copyright 2018 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/chrome_cleaner/strings/string_util.h"

 #include <algorithm>
 #include <map>
 #include <utility>
 #include <vector>

 #include "base/i18n/streaming_utf8_validator.h"
 #include "base/logging.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"

 namespace chrome_cleaner {

 namespace {

 // A map from (text offset, pattern offest) to bool.
 typedef std::map<std::pair<size_t, size_t>, bool> WildcardMatchCache;

 bool String16WildcardMatchRecursive(const base::string16& text,
                                     size_t text_offset,
                                     const base::string16& pattern,
                                     size_t pattern_offset,
                                     const wchar_t escape_char,
                                     WildcardMatchCache* cache);

 bool String16WildcardMatchRecursiveCached(const base::string16& text,
                                           size_t text_offset,
                                           const base::string16& pattern,
                                           size_t pattern_offset,
                                           const wchar_t escape_char,
                                           WildcardMatchCache* cache) {
   DCHECK(cache);

   // Look for a pre-computed value.
   WildcardMatchCache::key_type key =
       std::make_pair(text_offset, pattern_offset);
   WildcardMatchCache::iterator entry = cache->find(key);
   if (entry != cache->end())
     return entry->second;

   // Compute the recursive match and cache it.
   bool result = String16WildcardMatchRecursive(
       text, text_offset, pattern, pattern_offset, escape_char, cache);
   (*cache)[key] = result;
   return result;
 }

 bool String16WildcardMatchRecursive(const base::string16& text,
                                     size_t text_offset,
                                     const base::string16& pattern,
                                     size_t pattern_offset,
                                     const wchar_t escape_char,
                                     WildcardMatchCache* cache) {
   while (true) {
     if (text_offset >= text.length()) {
       // The text is empty, the matching decision is based on the remaining
       // pattern.
       if (pattern_offset >= pattern.length()) {
         // The pattern is empty. The match is successful.
         return true;
       } else if (pattern[pattern_offset] == L'*') {
         // Multi-characters wild-card can match empty string, skip it.
         pattern_offset++;
         continue;
       } else {
         // There is no way to match the remaining pattern.
         return false;
       }
     } else if (pattern_offset >= pattern.length()) {
       // The text isn't empty but the pattern is, failed to match.
       return false;
     }

     // Retrieve first characters.
     wchar_t first_char = text[text_offset];
     wchar_t first_pattern = pattern[pattern_offset];

     if (first_pattern == L'?') {
       // Eat this character and move to the next one.
       ++text_offset;
       ++pattern_offset;
     } else if (first_pattern == L'*') {
       // The multi-character wild-card can match any number of characters. For
       // each remaining offset, try to recursively match at that position the
       // rest of the pattern. The maximal offset is one after the last
       // character.
       for (size_t match_offset = text_offset; match_offset <= text.size();
            ++match_offset) {
         if (String16WildcardMatchRecursiveCached(text, match_offset, pattern,
                                                  pattern_offset + 1,
                                                  escape_char, cache)) {
           return true;
         }
       }
       return false;
     } else {
       // Skip escaping character
       if (first_pattern == escape_char) {
         if (pattern_offset + 1 >= pattern.size()) {
           // The pattern has an escape character without a following character.
           return false;
         }
         ++pattern_offset;
         first_pattern = pattern[pattern_offset];
         DCHECK(first_pattern == escape_char || first_pattern == L'?' ||
                first_pattern == L'*');
       }

       // Perform a case insensitive comparison between both characters.
       if (base::ToUpperASCII(first_char) != base::ToUpperASCII(first_pattern))
         return false;

       // Eat this character and move to the next one.
       ++text_offset;
       ++pattern_offset;
     }
   }
 }

 }  // namespace

 bool String16EqualsCaseInsensitive(const base::string16& str1,
                                    const base::string16& str2) {
   return _wcsicmp(str1.c_str(), str2.c_str()) == 0;
 }

 bool String16ContainsCaseInsensitive(const base::string16& value,
                                      const base::string16& substring) {
   return std::search(
              value.begin(), value.end(), substring.begin(), substring.end(),
              base::CaseInsensitiveCompareASCII<base::string16::value_type>()) !=
          value.end();
 }

 bool String16SetMatchEntry(const base::string16& value,
                            const base::string16& delimiters,
                            const base::string16& substring,
                            String16Matcher matcher) {
   // Split the string in tokens.
   std::vector<base::string16> tokens = base::SplitString(
       value, delimiters, base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

   // Search for a matching token.
   for (std::vector<base::string16>::const_iterator it = tokens.begin();
        it != tokens.end(); ++it) {
     if (matcher(*it, substring))
       return true;
   }
   return false;
 }

 bool String16WildcardMatchInsensitive(const base::string16& text,
                                       const base::string16& pattern,
                                       const wchar_t escape_char) {
   // TODO(crbug.com/837637): Check the performance of Chromium's MatchPattern
   // and replace this with it if possible.
   WildcardMatchCache cache;
   return String16WildcardMatchRecursive(text, 0, pattern, 0, escape_char,
                                         &cache);
 }

 std::string RemoveInvalidUTF8Chars(const std::string& input) {
   std::string utf8_output;
   size_t mid_point = 0;
   base::StreamingUtf8Validator utf8_validator;
   for (size_t str_index = 0; str_index < input.size(); ++str_index) {
     switch (utf8_validator.AddBytes(&input[str_index], 1)) {
       case base::StreamingUtf8Validator::VALID_ENDPOINT:
         for (size_t offset = mid_point; offset > 0; --offset)
           utf8_output += input[str_index - offset];
         utf8_output += input[str_index];
         mid_point = 0;
         break;
       case base::StreamingUtf8Validator::VALID_MIDPOINT:
         mid_point++;
         break;
       case base::StreamingUtf8Validator::INVALID:
         utf8_validator.Reset();
         mid_point = 0;
         break;
     }
   }
   return utf8_output;
 }

 }  // namespace chrome_cleaner
	// Copyright 2018 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/chrome_cleaner/strings/string_util.h"

	#include <algorithm>
	#include <map>
	#include <utility>
	#include <vector>

	#include "base/i18n/streaming_utf8_validator.h"
	#include "base/logging.h"
	#include "base/strings/string_split.h"
	#include "base/strings/string_util.h"

	namespace chrome_cleaner {

	namespace {

	// A map from (text offset, pattern offest) to bool.
	typedef std::map<std::pair<size_t, size_t>, bool> WildcardMatchCache;

	bool String16WildcardMatchRecursive(const base::string16& text,
	size_t text_offset,
	const base::string16& pattern,
	size_t pattern_offset,
	const wchar_t escape_char,
	WildcardMatchCache* cache);

	bool String16WildcardMatchRecursiveCached(const base::string16& text,
	size_t text_offset,
	const base::string16& pattern,
	size_t pattern_offset,
	const wchar_t escape_char,
	WildcardMatchCache* cache) {
	DCHECK(cache);

	// Look for a pre-computed value.
	WildcardMatchCache::key_type key =
	std::make_pair(text_offset, pattern_offset);
	WildcardMatchCache::iterator entry = cache->find(key);
	if (entry != cache->end())
	return entry->second;

	// Compute the recursive match and cache it.
	bool result = String16WildcardMatchRecursive(
	text, text_offset, pattern, pattern_offset, escape_char, cache);
	(*cache)[key] = result;
	return result;
	}

	bool String16WildcardMatchRecursive(const base::string16& text,
	size_t text_offset,
	const base::string16& pattern,
	size_t pattern_offset,
	const wchar_t escape_char,
	WildcardMatchCache* cache) {
	while (true) {
	if (text_offset >= text.length()) {
	// The text is empty, the matching decision is based on the remaining
	// pattern.
	if (pattern_offset >= pattern.length()) {
	// The pattern is empty. The match is successful.
	return true;
	} else if (pattern[pattern_offset] == L'*') {
	// Multi-characters wild-card can match empty string, skip it.
	pattern_offset++;
	continue;
	} else {
	// There is no way to match the remaining pattern.
	return false;
	}
	} else if (pattern_offset >= pattern.length()) {
	// The text isn't empty but the pattern is, failed to match.
	return false;
	}

	// Retrieve first characters.
	wchar_t first_char = text[text_offset];
	wchar_t first_pattern = pattern[pattern_offset];

	if (first_pattern == L'?') {
	// Eat this character and move to the next one.
	++text_offset;
	++pattern_offset;
	} else if (first_pattern == L'*') {
	// The multi-character wild-card can match any number of characters. For
	// each remaining offset, try to recursively match at that position the
	// rest of the pattern. The maximal offset is one after the last
	// character.
	for (size_t match_offset = text_offset; match_offset <= text.size();
	++match_offset) {
	if (String16WildcardMatchRecursiveCached(text, match_offset, pattern,
	pattern_offset + 1,
	escape_char, cache)) {
	return true;
	}
	}
	return false;
	} else {
	// Skip escaping character
	if (first_pattern == escape_char) {
	if (pattern_offset + 1 >= pattern.size()) {
	// The pattern has an escape character without a following character.
	return false;
	}
	++pattern_offset;
	first_pattern = pattern[pattern_offset];
	DCHECK(first_pattern == escape_char \|\| first_pattern == L'?' \|\|
	first_pattern == L'*');
	}

	// Perform a case insensitive comparison between both characters.
	if (base::ToUpperASCII(first_char) != base::ToUpperASCII(first_pattern))
	return false;

	// Eat this character and move to the next one.
	++text_offset;
	++pattern_offset;
	}
	}
	}

	} // namespace

	bool String16EqualsCaseInsensitive(const base::string16& str1,
	const base::string16& str2) {
	return _wcsicmp(str1.c_str(), str2.c_str()) == 0;
	}

	bool String16ContainsCaseInsensitive(const base::string16& value,
	const base::string16& substring) {
	return std::search(
	value.begin(), value.end(), substring.begin(), substring.end(),
	base::CaseInsensitiveCompareASCII<base::string16::value_type>()) !=
	value.end();
	}

	bool String16SetMatchEntry(const base::string16& value,
	const base::string16& delimiters,
	const base::string16& substring,
	String16Matcher matcher) {
	// Split the string in tokens.
	std::vector<base::string16> tokens = base::SplitString(
	value, delimiters, base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

	// Search for a matching token.
	for (std::vector<base::string16>::const_iterator it = tokens.begin();
	it != tokens.end(); ++it) {
	if (matcher(*it, substring))
	return true;
	}
	return false;
	}

	bool String16WildcardMatchInsensitive(const base::string16& text,
	const base::string16& pattern,
	const wchar_t escape_char) {
	// TODO(crbug.com/837637): Check the performance of Chromium's MatchPattern
	// and replace this with it if possible.
	WildcardMatchCache cache;
	return String16WildcardMatchRecursive(text, 0, pattern, 0, escape_char,
	&cache);
	}

	std::string RemoveInvalidUTF8Chars(const std::string& input) {
	std::string utf8_output;
	size_t mid_point = 0;
	base::StreamingUtf8Validator utf8_validator;
	for (size_t str_index = 0; str_index < input.size(); ++str_index) {
	switch (utf8_validator.AddBytes(&input[str_index], 1)) {
	case base::StreamingUtf8Validator::VALID_ENDPOINT:
	for (size_t offset = mid_point; offset > 0; --offset)
	utf8_output += input[str_index - offset];
	utf8_output += input[str_index];
	mid_point = 0;
	break;
	case base::StreamingUtf8Validator::VALID_MIDPOINT:
	mid_point++;
	break;
	case base::StreamingUtf8Validator::INVALID:
	utf8_validator.Reset();
	mid_point = 0;
	break;
	}
	}
	return utf8_output;
	}

	} // namespace chrome_cleaner