blob: 855e3c38c9aa9ea67eee7335069ee206cd9a6111 [file] [log] [blame]
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <vector>
#include "app/text_elider.h"
#include "base/file_path.h"
#include "base/i18n/break_iterator.h"
#include "base/i18n/char_iterator.h"
#include "base/i18n/rtl.h"
#include "base/string_split.h"
#include "base/string_util.h"
#include "base/sys_string_conversions.h"
#include "base/utf_string_conversions.h"
#include "gfx/font.h"
#include "googleurl/src/gurl.h"
#include "net/base/escape.h"
#include "net/base/net_util.h"
#include "net/base/registry_controlled_domain.h"
namespace {
const wchar_t kEllipsis[] = L"\x2026";
// Cuts |text| to be |length| characters long. If |cut_in_middle| is true, the
// middle of the string is removed to leave equal-length pieces from the
// beginning and end of the string; otherwise, the end of the string is removed
// and only the beginning remains. If |insert_ellipsis| is true, then an
// ellipsis character will by inserted at the cut point.
string16 CutString(const string16& text,
size_t length,
bool cut_in_middle,
bool insert_ellipsis) {
// TODO(tony): This is wrong, it might split the string in the middle of a
// surrogate pair.
const string16 kInsert = WideToUTF16(insert_ellipsis ? kEllipsis : L"");
if (!cut_in_middle)
return text.substr(0, length) + kInsert;
// We put the extra character, if any, before the cut.
const size_t half_length = length / 2;
return text.substr(0, length - half_length) + kInsert +
text.substr(text.length() - half_length, half_length);
}
} // namespace
namespace gfx {
// This function takes a GURL object and elides it. It returns a string
// which composed of parts from subdomain, domain, path, filename and query.
// A "..." is added automatically at the end if the elided string is bigger
// than the available pixel width. For available pixel width = 0, a formatted,
// but un-elided, string is returned.
//
// TODO(pkasting): http://b/119635 This whole function gets
// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
// a rendered string is always the sum of the widths of its substrings. Also I
// suspect it could be made simpler.
string16 ElideUrl(const GURL& url,
const gfx::Font& font,
int available_pixel_width,
const std::wstring& languages) {
// Get a formatted string and corresponding parsing of the url.
url_parse::Parsed parsed;
string16 url_string = net::FormatUrl(url, WideToUTF8(languages),
net::kFormatUrlOmitAll, UnescapeRule::SPACES, &parsed, NULL, NULL);
if (available_pixel_width <= 0)
return url_string;
// If non-standard or not file type, return plain eliding.
if (!(url.SchemeIsFile() || url.IsStandard()))
return ElideText(url_string, font, available_pixel_width, false);
// Now start eliding url_string to fit within available pixel width.
// Fist pass - check to see whether entire url_string fits.
int pixel_width_url_string = font.GetStringWidth(UTF16ToWideHack(url_string));
if (available_pixel_width >= pixel_width_url_string)
return url_string;
// Get the path substring, including query and reference.
size_t path_start_index = parsed.path.begin;
size_t path_len = parsed.path.len;
string16 url_path_query_etc = url_string.substr(path_start_index);
string16 url_path = url_string.substr(path_start_index, path_len);
// Return general elided text if url minus the query fits.
string16 url_minus_query = url_string.substr(0, path_start_index + path_len);
if (available_pixel_width >=
font.GetStringWidth(UTF16ToWideHack(url_minus_query)))
return ElideText(url_string, font, available_pixel_width, false);
// Get Host.
string16 url_host = UTF8ToUTF16(url.host());
// Get domain and registry information from the URL.
string16 url_domain = UTF8ToUTF16(
net::RegistryControlledDomainService::GetDomainAndRegistry(url));
if (url_domain.empty())
url_domain = url_host;
// Add port if required.
if (!url.port().empty()) {
url_host += UTF8ToUTF16(":" + url.port());
url_domain += UTF8ToUTF16(":" + url.port());
}
// Get sub domain.
string16 url_subdomain;
size_t domain_start_index = url_host.find(url_domain);
if (domain_start_index > 0)
url_subdomain = url_host.substr(0, domain_start_index);
static const string16 kWwwPrefix = UTF8ToUTF16("www.");
if ((url_subdomain == kWwwPrefix || url_subdomain.empty() ||
url.SchemeIsFile())) {
url_subdomain.clear();
}
// If this is a file type, the path is now defined as everything after ":".
// For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
// domain is now C: - this is a nice hack for eliding to work pleasantly.
if (url.SchemeIsFile()) {
// Split the path string using ":"
std::vector<string16> file_path_split;
base::SplitString(url_path, ':', &file_path_split);
if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
url_host.clear();
url_domain.clear();
url_subdomain.clear();
static const string16 kColon = UTF8ToUTF16(":");
url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
url_path_query_etc = url_path = file_path_split.at(1);
}
}
// Second Pass - remove scheme - the rest fits.
int pixel_width_url_host = font.GetStringWidth(UTF16ToWideHack(url_host));
int pixel_width_url_path = font.GetStringWidth(UTF16ToWideHack(
url_path_query_etc));
if (available_pixel_width >=
pixel_width_url_host + pixel_width_url_path)
return url_host + url_path_query_etc;
// Third Pass: Subdomain, domain and entire path fits.
int pixel_width_url_domain = font.GetStringWidth(UTF16ToWideHack(url_domain));
int pixel_width_url_subdomain = font.GetStringWidth(UTF16ToWideHack(
url_subdomain));
if (available_pixel_width >=
pixel_width_url_subdomain + pixel_width_url_domain +
pixel_width_url_path)
return url_subdomain + url_domain + url_path_query_etc;
// Query element.
string16 url_query;
const int kPixelWidthDotsTrailer = font.GetStringWidth(kEllipsis);
if (parsed.query.is_nonempty()) {
url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
if (available_pixel_width >= (pixel_width_url_subdomain +
pixel_width_url_domain + pixel_width_url_path -
font.GetStringWidth(UTF16ToWideHack(url_query)))) {
return ElideText(url_subdomain + url_domain + url_path_query_etc,
font, available_pixel_width, false);
}
}
// Parse url_path using '/'.
static const string16 kForwardSlash = UTF8ToUTF16("/");
std::vector<string16> url_path_elements;
base::SplitString(url_path, kForwardSlash[0], &url_path_elements);
// Get filename - note that for a path ending with /
// such as www.google.com/intl/ads/, the file name is ads/.
size_t url_path_number_of_elements = url_path_elements.size();
DCHECK(url_path_number_of_elements != 0);
string16 url_filename;
if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
url_filename = *(url_path_elements.end() - 1);
} else if (url_path_number_of_elements > 1) { // Path ends with a '/'.
url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
kForwardSlash;
url_path_number_of_elements--;
}
DCHECK(url_path_number_of_elements != 0);
const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
if (url_path_number_of_elements <= 1 ||
url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
// No path to elide, or too long of a path (could overflow in loop below)
// Just elide this as a text string.
return ElideText(url_subdomain + url_domain + url_path_query_etc, font,
available_pixel_width, false);
}
// Start eliding the path and replacing elements by "../".
static const string16 kEllipsisAndSlash = WideToUTF16(kEllipsis) +
kForwardSlash;
int pixel_width_url_filename = font.GetStringWidth(UTF16ToWideHack(
url_filename));
int pixel_width_dot_dot_slash = font.GetStringWidth(UTF16ToWideHack(
kEllipsisAndSlash));
int pixel_width_slash = font.GetStringWidth(L"/");
int pixel_width_url_path_elements[kMaxNumberOfUrlPathElementsAllowed];
for (size_t i = 0; i < url_path_number_of_elements; ++i) {
pixel_width_url_path_elements[i] =
font.GetStringWidth(UTF16ToWideHack(url_path_elements.at(i)));
}
// Check with both subdomain and domain.
string16 elided_path;
int pixel_width_elided_path;
for (size_t i = url_path_number_of_elements - 1; i >= 1; --i) {
// Add the initial elements of the path.
elided_path.clear();
pixel_width_elided_path = 0;
for (size_t j = 0; j < i; ++j) {
elided_path += url_path_elements.at(j) + kForwardSlash;
pixel_width_elided_path += pixel_width_url_path_elements[j] +
pixel_width_slash;
}
// Add url_file_name.
if (i == (url_path_number_of_elements - 1)) {
elided_path += url_filename;
pixel_width_elided_path += pixel_width_url_filename;
} else {
elided_path += kEllipsisAndSlash + url_filename;
pixel_width_elided_path += pixel_width_dot_dot_slash +
pixel_width_url_filename;
}
if (available_pixel_width >=
pixel_width_url_subdomain + pixel_width_url_domain +
pixel_width_elided_path) {
return ElideText(url_subdomain + url_domain + elided_path + url_query,
font, available_pixel_width, false);
}
}
// Check with only domain.
// If a subdomain is present, add an ellipsis before domain.
// This is added only if the subdomain pixel width is larger than
// the pixel width of kEllipsis. Otherwise, subdomain remains,
// which means that this case has been resolved earlier.
string16 url_elided_domain = url_subdomain + url_domain;
int pixel_width_url_elided_domain = pixel_width_url_domain;
if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
if (!url_subdomain.empty()) {
url_elided_domain = kEllipsisAndSlash[0] + url_domain;
pixel_width_url_elided_domain += kPixelWidthDotsTrailer;
} else {
url_elided_domain = url_domain;
}
for (size_t i = url_path_number_of_elements - 1; i >= 1; --i) {
// Add the initial elements of the path.
elided_path.clear();
pixel_width_elided_path = 0;
for (size_t j = 0; j < i; ++j) {
elided_path += url_path_elements.at(j) + kForwardSlash;
pixel_width_elided_path += pixel_width_url_path_elements[j] +
pixel_width_slash;
}
// Add url_file_name.
if (i == (url_path_number_of_elements - 1)) {
elided_path += url_filename;
pixel_width_elided_path += pixel_width_url_filename;
} else {
elided_path += kEllipsisAndSlash + url_filename;
pixel_width_elided_path += pixel_width_dot_dot_slash +
pixel_width_url_filename;
}
if (available_pixel_width >=
pixel_width_url_elided_domain + pixel_width_elided_path) {
return ElideText(url_elided_domain + elided_path + url_query, font,
available_pixel_width, false);
}
}
}
// Return elided domain/../filename anyway.
string16 final_elided_url_string(url_elided_domain);
int url_elided_domain_width = font.GetStringWidth(UTF16ToWideHack(
url_elided_domain));
if ((available_pixel_width - url_elided_domain_width) >
pixel_width_dot_dot_slash + kPixelWidthDotsTrailer +
font.GetStringWidth(L"UV")) // A hack to prevent trailing "../...".
final_elided_url_string += elided_path;
else
final_elided_url_string += url_path;
return ElideText(final_elided_url_string, font, available_pixel_width, false);
}
string16 ElideFilename(const FilePath& filename,
const gfx::Font& font,
int available_pixel_width) {
#if defined(OS_WIN)
string16 filename_utf16 = filename.value();
string16 extension = filename.Extension();
string16 rootname = filename.BaseName().RemoveExtension().value();
#elif defined(OS_POSIX)
string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide(
filename.value()));
string16 extension = WideToUTF16(base::SysNativeMBToWide(
filename.Extension()));
string16 rootname = WideToUTF16(base::SysNativeMBToWide(
filename.BaseName().RemoveExtension().value()));
#endif
int full_width = font.GetStringWidth(UTF16ToWideHack(filename_utf16));
if (full_width <= available_pixel_width)
return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16);
if (rootname.empty() || extension.empty()) {
string16 elided_name = ElideText(filename_utf16, font,
available_pixel_width, false);
return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
}
int ext_width = font.GetStringWidth(UTF16ToWideHack(extension));
int root_width = font.GetStringWidth(UTF16ToWideHack(rootname));
// We may have trimmed the path.
if (root_width + ext_width <= available_pixel_width) {
string16 elided_name = rootname + extension;
return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
}
int available_root_width = available_pixel_width - ext_width;
string16 elided_name =
ElideText(rootname, font, available_root_width, false);
elided_name += extension;
return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
}
// This function adds an ellipsis at the end of the text if the text
// does not fit the given pixel width.
string16 ElideText(const string16& text,
const gfx::Font& font,
int available_pixel_width,
bool elide_in_middle) {
if (text.empty())
return text;
int current_text_pixel_width = font.GetStringWidth(UTF16ToWideHack(text));
// Pango will return 0 width for absurdly long strings. Cut the string in
// half and try again.
// This is caused by an int overflow in Pango (specifically, in
// pango_glyph_string_extents_range). It's actually more subtle than just
// returning 0, since on super absurdly long strings, the int can wrap and
// return positive numbers again. Detecting that is probably not worth it
// (eliding way too much from a ridiculous string is probably still
// ridiculous), but we should check other widths for bogus values as well.
if (current_text_pixel_width <= 0 && !text.empty()) {
return ElideText(CutString(text, text.length() / 2, elide_in_middle, false),
font, available_pixel_width, false);
}
if (current_text_pixel_width <= available_pixel_width)
return text;
if (font.GetStringWidth(kEllipsis) > available_pixel_width)
return string16();
// Use binary search to compute the elided text.
size_t lo = 0;
size_t hi = text.length() - 1;
for (size_t guess = (lo + hi) / 2; guess != lo; guess = (lo + hi) / 2) {
// We check the length of the whole desired string at once to ensure we
// handle kerning/ligatures/etc. correctly.
int guess_length = font.GetStringWidth(UTF16ToWideHack(
CutString(text, guess, elide_in_middle, true)));
// Check again that we didn't hit a Pango width overflow. If so, cut the
// current string in half and start over.
if (guess_length <= 0) {
return ElideText(CutString(text, guess / 2, elide_in_middle, false),
font, available_pixel_width, elide_in_middle);
}
if (guess_length > available_pixel_width)
hi = guess;
else
lo = guess;
}
return CutString(text, lo, elide_in_middle, true);
}
// TODO(viettrungluu): convert |languages| to an |std::string|.
SortedDisplayURL::SortedDisplayURL(const GURL& url,
const std::wstring& languages) {
std::wstring host;
net::AppendFormattedHost(url, languages, &host, NULL, NULL);
sort_host_ = WideToUTF16Hack(host);
string16 host_minus_www = net::StripWWW(WideToUTF16Hack(host));
url_parse::Parsed parsed;
display_url_ = net::FormatUrl(url, WideToUTF8(languages),
net::kFormatUrlOmitAll, UnescapeRule::SPACES, &parsed, &prefix_end_,
NULL);
if (sort_host_.length() > host_minus_www.length()) {
prefix_end_ += sort_host_.length() - host_minus_www.length();
sort_host_.swap(host_minus_www);
}
}
SortedDisplayURL::SortedDisplayURL() {
}
SortedDisplayURL::~SortedDisplayURL() {
}
int SortedDisplayURL::Compare(const SortedDisplayURL& other,
icu::Collator* collator) const {
// Compare on hosts first. The host won't contain 'www.'.
UErrorCode compare_status = U_ZERO_ERROR;
UCollationResult host_compare_result = collator->compare(
static_cast<const UChar*>(sort_host_.c_str()),
static_cast<int>(sort_host_.length()),
static_cast<const UChar*>(other.sort_host_.c_str()),
static_cast<int>(other.sort_host_.length()),
compare_status);
DCHECK(U_SUCCESS(compare_status));
if (host_compare_result != 0)
return host_compare_result;
// Hosts match, compare on the portion of the url after the host.
string16 path = this->AfterHost();
string16 o_path = other.AfterHost();
compare_status = U_ZERO_ERROR;
UCollationResult path_compare_result = collator->compare(
static_cast<const UChar*>(path.c_str()),
static_cast<int>(path.length()),
static_cast<const UChar*>(o_path.c_str()),
static_cast<int>(o_path.length()),
compare_status);
DCHECK(U_SUCCESS(compare_status));
if (path_compare_result != 0)
return path_compare_result;
// Hosts and paths match, compare on the complete url. This'll push the www.
// ones to the end.
compare_status = U_ZERO_ERROR;
UCollationResult display_url_compare_result = collator->compare(
static_cast<const UChar*>(display_url_.c_str()),
static_cast<int>(display_url_.length()),
static_cast<const UChar*>(other.display_url_.c_str()),
static_cast<int>(other.display_url_.length()),
compare_status);
DCHECK(U_SUCCESS(compare_status));
return display_url_compare_result;
}
string16 SortedDisplayURL::AfterHost() const {
size_t slash_index = display_url_.find(sort_host_, prefix_end_);
if (slash_index == string16::npos) {
NOTREACHED();
return string16();
}
return display_url_.substr(slash_index + sort_host_.length());
}
bool ElideString(const std::wstring& input, int max_len, std::wstring* output) {
DCHECK_GE(max_len, 0);
if (static_cast<int>(input.length()) <= max_len) {
output->assign(input);
return false;
}
switch (max_len) {
case 0:
output->clear();
break;
case 1:
output->assign(input.substr(0, 1));
break;
case 2:
output->assign(input.substr(0, 2));
break;
case 3:
output->assign(input.substr(0, 1) + L"." +
input.substr(input.length() - 1));
break;
case 4:
output->assign(input.substr(0, 1) + L".." +
input.substr(input.length() - 1));
break;
default: {
int rstr_len = (max_len - 3) / 2;
int lstr_len = rstr_len + ((max_len - 3) % 2);
output->assign(input.substr(0, lstr_len) + L"..." +
input.substr(input.length() - rstr_len));
break;
}
}
return true;
}
} // namespace gfx
namespace {
// Internal class used to track progress of a rectangular string elide
// operation. Exists so the top-level ElideRectangleString() function
// can be broken into smaller methods sharing this state.
class RectangleString {
public:
RectangleString(size_t max_rows, size_t max_cols, string16 *output)
: max_rows_(max_rows),
max_cols_(max_cols),
current_row_(0),
current_col_(0),
suppressed_(false),
output_(output) {}
// Perform deferred initializions following creation. Must be called
// before any input can be added via AddString().
void Init() { output_->clear(); }
// Add an input string, reformatting to fit the desired dimensions.
// AddString() may be called multiple times to concatenate together
// multiple strings into the region (the current caller doesn't do
// this, however).
void AddString(const string16& input);
// Perform any deferred output processing. Must be called after the
// last AddString() call has occured.
bool Finalize();
private:
// Add a line to the rectangular region at the current position,
// either by itself or by breaking it into words.
void AddLine(const string16& line);
// Add a word to the rectangluar region at the current position,
// either by itelf or by breaking it into characters.
void AddWord(const string16& word);
// Add text to the output string if the rectangular boundaries
// have not been exceeded, advancing the current position.
void Append(const string16& string);
// Add a newline to the output string if the rectangular boundaries
// have not been exceeded, resetting the current position to the
// beginning of the next line.
void NewLine();
// Maximum number of rows allowed in the output string.
size_t max_rows_;
// Maximum number of characters allowed in the output string.
size_t max_cols_;
// Current row position, always incremented and may exceed max_rows_
// when the input can not fit in the region. We stop appending to
// the output string, however, when this condition occurs. In the
// future, we may want to expose this value to allow the caller to
// determine how many rows would actually be required to hold the
// formatted string.
size_t current_row_;
// Current character position, should never exceed max_cols_.
size_t current_col_;
// True when some of the input has been truncated.
bool suppressed_;
// String onto which the output is accumulated.
string16 *output_;
};
void RectangleString::AddString(const string16& input) {
base::BreakIterator lines(&input, base::BreakIterator::BREAK_NEWLINE);
if (lines.Init()) {
while (lines.Advance())
AddLine(lines.GetString());
} else {
NOTREACHED() << "BreakIterator (lines) init failed";
}
}
bool RectangleString::Finalize() {
if (suppressed_) {
output_->append(ASCIIToUTF16("..."));
return true;
}
return false;
}
void RectangleString::AddLine(const string16& line) {
if (line.length() < max_cols_) {
Append(line);
} else {
base::BreakIterator words(&line, base::BreakIterator::BREAK_SPACE);
if (words.Init()) {
while (words.Advance())
AddWord(words.GetString());
} else {
NOTREACHED() << "BreakIterator (words) init failed";
}
}
// Account for naturally-occuring newlines.
++current_row_;
current_col_ = 0;
}
void RectangleString::AddWord(const string16& word) {
if (word.length() < max_cols_) {
// Word can be made to fit, no need to fragment it.
if (current_col_ + word.length() >= max_cols_)
NewLine();
Append(word);
} else {
// Word is so big that it must be fragmented.
int array_start = 0;
int char_start = 0;
base::UTF16CharIterator chars(&word);
while (!chars.end()) {
// When boundary is hit, add as much as will fit on this line.
if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) {
Append(word.substr(array_start, chars.array_pos() - array_start));
NewLine();
array_start = chars.array_pos();
char_start = chars.char_pos();
}
chars.Advance();
}
// add last remaining fragment, if any.
if (array_start != chars.array_pos())
Append(word.substr(array_start, chars.array_pos() - array_start));
}
}
void RectangleString::Append(const string16& string) {
if (current_row_ < max_rows_)
output_->append(string);
else
suppressed_ = true;
current_col_ += string.length();
}
void RectangleString::NewLine() {
if (current_row_ < max_rows_)
output_->append(ASCIIToUTF16("\n"));
else
suppressed_ = true;
++current_row_;
current_col_ = 0;
}
} // namespace
namespace gfx {
bool ElideRectangleString(const string16& input, size_t max_rows,
size_t max_cols, string16* output) {
RectangleString rect(max_rows, max_cols, output);
rect.Init();
rect.AddString(input);
return rect.Finalize();
}
} // namespace gfx