|  | // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "content/renderer/android/email_detector.h" | 
|  |  | 
|  | #include "base/logging.h" | 
|  | #include "base/memory/scoped_ptr.h" | 
|  | #include "base/strings/utf_string_conversions.h" | 
|  | #include "content/public/renderer/android_content_detection_prefixes.h" | 
|  | #include "net/base/escape.h" | 
|  | #include "third_party/icu/source/i18n/unicode/regex.h" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // Maximum length of an email address. | 
|  | const size_t kMaximumEmailLength = 254; | 
|  |  | 
|  | // Regex to match email addresses. | 
|  | // This is more specific than RFC 2822 (uncommon special characters are | 
|  | // disallowed) in order to avoid false positives. | 
|  | // Delimiters are word boundaries to allow punctuation, quote marks etc. around | 
|  | // the address. | 
|  | const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b"; | 
|  |  | 
|  | }  // anonymous namespace | 
|  |  | 
|  | namespace content { | 
|  |  | 
|  | EmailDetector::EmailDetector() { | 
|  | } | 
|  |  | 
|  | size_t EmailDetector::GetMaximumContentLength() { | 
|  | return kMaximumEmailLength; | 
|  | } | 
|  |  | 
|  | GURL EmailDetector::GetIntentURL(const std::string& content_text) { | 
|  | if (content_text.empty()) | 
|  | return GURL(); | 
|  |  | 
|  | return GURL(kEmailPrefix + | 
|  | net::EscapeQueryParamValue(content_text, true)); | 
|  | } | 
|  |  | 
|  | bool EmailDetector::FindContent(const base::string16::const_iterator& begin, | 
|  | const base::string16::const_iterator& end, | 
|  | size_t* start_pos, | 
|  | size_t* end_pos, | 
|  | std::string* content_text) { | 
|  | base::string16 utf16_input = base::string16(begin, end); | 
|  | icu::UnicodeString pattern(kEmailRegex); | 
|  | icu::UnicodeString input(utf16_input.data(), utf16_input.length()); | 
|  | UErrorCode status = U_ZERO_ERROR; | 
|  | scoped_ptr<icu::RegexMatcher> matcher( | 
|  | new icu::RegexMatcher(pattern, | 
|  | input, | 
|  | UREGEX_CASE_INSENSITIVE, | 
|  | status)); | 
|  | if (matcher->find()) { | 
|  | *start_pos = matcher->start(status); | 
|  | DCHECK(U_SUCCESS(status)); | 
|  | *end_pos = matcher->end(status); | 
|  | DCHECK(U_SUCCESS(status)); | 
|  | icu::UnicodeString content_ustr(matcher->group(status)); | 
|  | DCHECK(U_SUCCESS(status)); | 
|  | base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), | 
|  | content_text); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | }  // namespace content |