blob: fa9f5458590ed1b17d004a97e980fb156d45b9ed [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/url_formatter/elide_url.h"
#include <stddef.h>
#include "base/check_op.h"
#include "base/i18n/rtl.h"
#include "base/strings/escape.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/url_formatter/url_formatter.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "ui/gfx/text_constants.h"
#include "ui/gfx/text_elider.h"
#include "ui/gfx/text_utils.h"
#include "url/gurl.h"
#include "url/origin.h"
#include "url/url_constants.h"
namespace {
#if !BUILDFLAG(IS_ANDROID)
const char16_t kDot = '.';
// Build a path from the first |num_components| elements in |path_elements|.
// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
std::u16string BuildPathFromComponents(
const std::u16string& path_prefix,
const std::vector<std::u16string>& path_elements,
const std::u16string& filename,
size_t num_components) {
DCHECK_LE(num_components, path_elements.size());
// Add the initial elements of the path.
std::u16string path = path_prefix;
// Build path from first |num_components| elements.
for (size_t j = 0; j < num_components; ++j)
path += path_elements[j] + gfx::kForwardSlash;
// Add |filename|, ellipsis if necessary.
if (num_components != (path_elements.size() - 1))
path += std::u16string(gfx::kEllipsisUTF16) + gfx::kForwardSlash;
path += filename;
return path;
}
// Takes a prefix (Domain, or Domain+subdomain) and a collection of path
// components and elides if possible. Returns a string containing the longest
// possible elided path, or an empty string if elision is not possible.
std::u16string ElideComponentizedPath(
const std::u16string& url_path_prefix,
const std::vector<std::u16string>& url_path_elements,
const std::u16string& url_filename,
const std::u16string& url_query,
const gfx::FontList& font_list,
float available_pixel_width) {
CHECK(!url_path_elements.empty());
// Find the longest set of leading path components that fits in
// |available_pixel_width|. Since BuildPathFromComponents() is O(n), using a
// binary search here makes the overall complexity O(n lg n), which is
// meaningful since there may be thousands of components in extreme cases.
std::u16string elided_path_at_min_index;
size_t min_index = 0;
for (size_t max_index = url_path_elements.size();
min_index != max_index - 1;) {
const size_t cutting_index = (min_index + max_index) / 2;
const std::u16string elided_path = BuildPathFromComponents(
url_path_prefix, url_path_elements, url_filename, cutting_index);
if (gfx::GetStringWidthF(elided_path, font_list) <= available_pixel_width) {
min_index = cutting_index;
elided_path_at_min_index = elided_path;
} else {
max_index = cutting_index;
}
}
// If the cutting point is at the beginning and nothing gets elided, return
// failure even if the whole text could fit. TODO(https://crbug.com/1074034).
if (min_index == 0)
return std::u16string();
// Elide starting at |min_index|.
return gfx::ElideText(elided_path_at_min_index + url_query, font_list,
available_pixel_width, gfx::ELIDE_TAIL);
}
#endif // !BUILDFLAG(IS_ANDROID)
bool ShouldShowScheme(base::StringPiece scheme,
const url_formatter::SchemeDisplay scheme_display) {
switch (scheme_display) {
case url_formatter::SchemeDisplay::SHOW:
return true;
case url_formatter::SchemeDisplay::OMIT_HTTP_AND_HTTPS:
return scheme != url::kHttpsScheme && scheme != url::kHttpScheme;
case url_formatter::SchemeDisplay::OMIT_CRYPTOGRAPHIC:
return scheme != url::kHttpsScheme && scheme != url::kWssScheme;
}
return true;
}
// TODO(jshin): Come up with a way to show Bidi URLs 'safely' (e.g. wrap up
// the entire url with {LSI, PDI} and individual domain labels with {FSI, PDI}).
// See http://crbug.com/650760 . For now, fall back to punycode if there's a
// strong RTL character.
std::u16string HostForDisplay(base::StringPiece host_in_puny) {
std::u16string host = url_formatter::IDNToUnicode(host_in_puny);
return base::i18n::StringContainsStrongRTLChars(host) ?
base::ASCIIToUTF16(host_in_puny) : host;
}
} // namespace
namespace url_formatter {
#if !BUILDFLAG(IS_ANDROID)
// TODO(pkasting): http://crbug.com/77883 This whole function gets
// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
// a rendered string is always the sum of the widths of its substrings. Also I
// suspect it could be made simpler.
std::u16string ElideUrl(const GURL& url,
const gfx::FontList& font_list,
float available_pixel_width) {
// Get a formatted string and corresponding parsing of the url.
url::Parsed parsed;
const std::u16string url_string = url_formatter::FormatUrl(
url, url_formatter::kFormatUrlOmitDefaults, base::UnescapeRule::SPACES,
&parsed, nullptr, nullptr);
if (available_pixel_width <= 0)
return url_string;
if (!url.IsStandard()) {
return gfx::ElideText(url_string, font_list, available_pixel_width,
gfx::ELIDE_TAIL);
}
// Now start eliding url_string to fit within available pixel width.
// Fist pass - check to see whether entire url_string fits.
const float pixel_width_url_string =
gfx::GetStringWidthF(url_string, font_list);
if (available_pixel_width >= pixel_width_url_string)
return url_string;
// Get the path substring, including query and reference.
const size_t path_start_index = parsed.path.begin;
const size_t path_len = parsed.path.len;
std::u16string url_path_query_etc;
std::u16string url_path;
if (parsed.path.is_valid()) {
url_path_query_etc = url_string.substr(path_start_index);
url_path = url_string.substr(path_start_index, path_len);
}
// Return general elided text if url minus the query fits.
const std::u16string url_minus_query =
url_string.substr(0, path_start_index + path_len);
if (available_pixel_width >=
gfx::GetStringWidthF(url_minus_query, font_list)) {
return gfx::ElideText(url_string, font_list, available_pixel_width,
gfx::ELIDE_TAIL);
}
std::u16string url_host;
std::u16string url_domain;
std::u16string url_subdomain;
url_formatter::SplitHost(url, &url_host, &url_domain, &url_subdomain);
// If this is a file type, the path is now defined as everything after ":".
// For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
// domain is now C: - this is a nice hack for eliding to work pleasantly.
if (url.SchemeIsFile()) {
// Split the path string using ":"
constexpr base::StringPiece16 kColon(u":", 1);
std::vector<std::u16string> file_path_split = base::SplitString(
url_path, kColon, base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
url_host.clear();
url_domain.clear();
url_subdomain.clear();
url_host = url_domain =
base::StrCat({file_path_split.at(0).substr(1), kColon});
url_path_query_etc = url_path = file_path_split.at(1);
}
}
// Second Pass - remove scheme - the rest fits.
const float pixel_width_url_host = gfx::GetStringWidthF(url_host, font_list);
const float pixel_width_url_path =
gfx::GetStringWidthF(url_path_query_etc, font_list);
if (available_pixel_width >= pixel_width_url_host + pixel_width_url_path)
return url_host + url_path_query_etc;
// Third Pass: Subdomain, domain and entire path fits.
const float pixel_width_url_domain =
gfx::GetStringWidthF(url_domain, font_list);
const float pixel_width_url_subdomain =
gfx::GetStringWidthF(url_subdomain, font_list);
if (available_pixel_width >=
pixel_width_url_subdomain + pixel_width_url_domain + pixel_width_url_path)
return url_subdomain + url_domain + url_path_query_etc;
// Query element.
std::u16string url_query;
const float kPixelWidthDotsTrailer =
gfx::GetStringWidthF(std::u16string(gfx::kEllipsisUTF16), font_list);
if (parsed.query.is_nonempty()) {
url_query = u"?" + url_string.substr(parsed.query.begin);
if (available_pixel_width >=
(pixel_width_url_subdomain + pixel_width_url_domain +
pixel_width_url_path - gfx::GetStringWidthF(url_query, font_list))) {
return gfx::ElideText(url_subdomain + url_domain + url_path_query_etc,
font_list, available_pixel_width, gfx::ELIDE_TAIL);
}
}
// Parse url_path using '/'.
std::vector<std::u16string> url_path_elements =
base::SplitString(url_path, std::u16string(1, gfx::kForwardSlash),
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
// Get filename - note that for a path ending with /
// such as www.google.com/intl/ads/, the file name is ads/.
std::u16string url_filename(
url_path_elements.empty() ? std::u16string() : url_path_elements.back());
size_t url_path_number_of_elements = url_path_elements.size();
if (url_filename.empty() && (url_path_number_of_elements > 1)) {
// Path ends with a '/'.
--url_path_number_of_elements;
url_filename =
url_path_elements[url_path_number_of_elements - 1] + gfx::kForwardSlash;
}
// Start eliding the path and replacing elements by ".../".
const std::u16string kEllipsisAndSlash =
std::u16string(gfx::kEllipsisUTF16) + gfx::kForwardSlash;
const float pixel_width_ellipsis_slash =
gfx::GetStringWidthF(kEllipsisAndSlash, font_list);
// Check with both subdomain and domain.
if (url_path_number_of_elements > 0) {
std::u16string elided_path = ElideComponentizedPath(
url_subdomain + url_domain, url_path_elements, url_filename, url_query,
font_list, available_pixel_width);
if (!elided_path.empty())
return elided_path;
}
// Check with only domain.
// If a subdomain is present, add an ellipsis before domain.
// This is added only if the subdomain pixel width is larger than
// the pixel width of kEllipsis. Otherwise, subdomain remains,
// which means that this case has been resolved earlier.
std::u16string url_elided_domain = url_subdomain + url_domain;
if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
if (!url_subdomain.empty())
url_elided_domain = kEllipsisAndSlash[0] + url_domain;
else
url_elided_domain = url_domain;
if (url_path_number_of_elements > 0) {
std::u16string elided_path = ElideComponentizedPath(
url_elided_domain, url_path_elements, url_filename, url_query,
font_list, available_pixel_width);
if (!elided_path.empty())
return elided_path;
}
}
// Return elided domain/.../filename anyway.
std::u16string final_elided_url_string(url_elided_domain);
const float url_elided_domain_width =
gfx::GetStringWidthF(url_elided_domain, font_list);
// A hack to prevent trailing ".../...".
if (url_path_number_of_elements > 0 &&
url_elided_domain_width + pixel_width_ellipsis_slash +
kPixelWidthDotsTrailer + gfx::GetStringWidthF(u"UV", font_list) <
available_pixel_width) {
final_elided_url_string += BuildPathFromComponents(
std::u16string(), url_path_elements, url_filename, 1);
} else {
final_elided_url_string += url_path;
}
return gfx::ElideText(final_elided_url_string, font_list,
available_pixel_width, gfx::ELIDE_TAIL);
}
std::u16string ElideHost(const GURL& url,
const gfx::FontList& font_list,
float available_pixel_width) {
std::u16string url_host;
std::u16string url_domain;
std::u16string url_subdomain;
url_formatter::SplitHost(url, &url_host, &url_domain, &url_subdomain);
const float pixel_width_url_host = gfx::GetStringWidthF(url_host, font_list);
if (available_pixel_width >= pixel_width_url_host)
return url_host;
if (url_subdomain.empty())
return url_domain;
const float pixel_width_url_domain =
gfx::GetStringWidthF(url_domain, font_list);
float subdomain_width = available_pixel_width - pixel_width_url_domain;
if (subdomain_width <= 0)
return std::u16string(gfx::kEllipsisUTF16) + kDot + url_domain;
return gfx::ElideText(url_host, font_list, available_pixel_width,
gfx::ELIDE_HEAD);
}
#endif // !BUILDFLAG(IS_ANDROID)
std::u16string FormatUrlForSecurityDisplay(const GURL& url,
const SchemeDisplay scheme_display) {
if (!url.is_valid() || url.is_empty() || !url.IsStandard())
return url_formatter::FormatUrl(url);
constexpr base::StringPiece16 colon(u":");
if (url.SchemeIsFile()) {
return base::StrCat({url::kFileScheme16, url::kStandardSchemeSeparator16,
base::UTF8ToUTF16(url.path())});
}
if (url.SchemeIsFileSystem()) {
const GURL* inner_url = url.inner_url();
if (inner_url->SchemeIsFile()) {
return base::StrCat({url::kFileSystemScheme16, colon,
FormatUrlForSecurityDisplay(*inner_url),
base::UTF8ToUTF16(url.path())});
}
return base::StrCat({url::kFileSystemScheme16, colon,
FormatUrlForSecurityDisplay(*inner_url)});
}
const GURL origin = url.DeprecatedGetOriginAsURL();
base::StringPiece scheme = origin.scheme_piece();
base::StringPiece host = origin.host_piece();
std::u16string result;
if (ShouldShowScheme(scheme, scheme_display)) {
result = base::StrCat(
{base::UTF8ToUTF16(scheme), url::kStandardSchemeSeparator16});
}
result += HostForDisplay(host);
const int port = origin.IntPort();
const int default_port = url::DefaultPortForScheme(
scheme.data(), static_cast<int>(scheme.length()));
if (port != url::PORT_UNSPECIFIED && port != default_port)
result += base::StrCat({colon, base::UTF8ToUTF16(origin.port_piece())});
return result;
}
std::u16string FormatOriginForSecurityDisplay(
const url::Origin& origin,
const SchemeDisplay scheme_display) {
base::StringPiece scheme = origin.scheme();
base::StringPiece host = origin.host();
if (scheme.empty() && host.empty())
return std::u16string();
constexpr base::StringPiece16 colon(u":");
std::u16string result;
if (ShouldShowScheme(scheme, scheme_display)) {
result = base::StrCat(
{base::UTF8ToUTF16(scheme), url::kStandardSchemeSeparator16});
}
result += HostForDisplay(host);
int port = static_cast<int>(origin.port());
const int default_port = url::DefaultPortForScheme(
scheme.data(), static_cast<int>(scheme.length()));
if (port != 0 && port != default_port)
result += base::StrCat({colon, base::NumberToString16(origin.port())});
return result;
}
std::u16string FormatUrlForDisplayOmitSchemePathAndTrivialSubdomains(
const GURL& url) {
return url_formatter::FormatUrl(
url,
url_formatter::kFormatUrlOmitDefaults |
url_formatter::kFormatUrlTrimAfterHost |
url_formatter::kFormatUrlOmitHTTPS |
url_formatter::kFormatUrlOmitTrivialSubdomains,
base::UnescapeRule::SPACES, nullptr, nullptr, nullptr);
}
#if BUILDFLAG(IS_IOS)
std::u16string
FormatUrlForDisplayOmitSchemePathTrivialSubdomainsAndMobilePrefix(
const GURL& url) {
return url_formatter::FormatUrl(
url,
url_formatter::kFormatUrlOmitDefaults |
url_formatter::kFormatUrlTrimAfterHost |
url_formatter::kFormatUrlOmitHTTPS |
url_formatter::kFormatUrlOmitTrivialSubdomains |
url_formatter::kFormatUrlOmitMobilePrefix,
base::UnescapeRule::SPACES, nullptr, nullptr, nullptr);
}
#endif
void SplitHost(const GURL& url,
std::u16string* url_host,
std::u16string* url_domain,
std::u16string* url_subdomain) {
// GURL stores IDN hostnames in punycode. Convert back to Unicode for
// display to the user. (IDNToUnicode() will only perform this conversion
// if it's safe to display this host/domain in Unicode.)
*url_host = url_formatter::IDNToUnicode(url.host());
// Get domain and registry information from the URL.
std::string domain_puny =
net::registry_controlled_domains::GetDomainAndRegistry(
url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
*url_domain = domain_puny.empty() ? *url_host
: url_formatter::IDNToUnicode(domain_puny);
// Add port if required.
if (!url.port().empty()) {
*url_host += base::UTF8ToUTF16(":" + url.port());
*url_domain += base::UTF8ToUTF16(":" + url.port());
}
// Get sub domain if requested.
if (url_subdomain) {
const size_t domain_start_index = url_host->find(*url_domain);
constexpr base::StringPiece16 kWwwPrefix = u"www.";
if (domain_start_index != std::u16string::npos)
*url_subdomain = url_host->substr(0, domain_start_index);
if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() ||
url.SchemeIsFile())) {
url_subdomain->clear();
}
}
}
} // namespace url_formatter