blob: 65dd1abe657fe8416f48c0683a1a2b6a1906a232 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/renderer/android/email_detector.h"
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/utf_string_conversions.h"
#include "net/base/escape.h"
#include "unicode/regex.h"
namespace {
// Maximum length of an email address.
const size_t kMaximumEmailLength = 254;
// Prefix used for email intent URIs.
const char kEmailSchemaPrefix[] = "mailto:";
// Regex to match email addresses.
// This is more specific than RFC 2822 (uncommon special characters are
// disallowed) in order to avoid false positives.
// Delimiters are word boundaries to allow punctuation, quote marks etc. around
// the address.
const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
} // anonymous namespace
namespace content {
EmailDetector::EmailDetector() {
}
size_t EmailDetector::GetMaximumContentLength() {
return kMaximumEmailLength;
}
GURL EmailDetector::GetIntentURL(const std::string& content_text) {
if (content_text.empty())
return GURL();
return GURL(kEmailSchemaPrefix +
net::EscapeQueryParamValue(content_text, true));
}
bool EmailDetector::FindContent(const string16::const_iterator& begin,
const string16::const_iterator& end,
size_t* start_pos,
size_t* end_pos,
std::string* content_text) {
string16 utf16_input = string16(begin, end);
icu::UnicodeString pattern(kEmailRegex);
icu::UnicodeString input(utf16_input.data(), utf16_input.length());
UErrorCode status = U_ZERO_ERROR;
scoped_ptr<icu::RegexMatcher> matcher(
new icu::RegexMatcher(pattern,
input,
UREGEX_CASE_INSENSITIVE,
status));
if (matcher->find()) {
*start_pos = matcher->start(status);
DCHECK(U_SUCCESS(status));
*end_pos = matcher->end(status);
DCHECK(U_SUCCESS(status));
icu::UnicodeString content_ustr(matcher->group(status));
DCHECK(U_SUCCESS(status));
UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text);
return true;
}
return false;
}
} // namespace content