blob: d5be2d01c9214415657b93040f10189e7255ad15 [file] [log] [blame]
// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This binary takes a list of domain names in ASCII or unicode, passes them
// through the IDN decoding algorithm and prints out the result. The list can be
// passed as a text file or via stdin. In both cases, the output is printed as
// (input_domain, output_domain, spoof_check_result) tuples on separate lines.
// spoof_check_result is the string representation of IDNSpoofChecker::Result
// enum with an additional kTopDomainLookalike value.
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include "base/command_line.h"
#include "base/compiler_specific.h"
#include "base/i18n/icu_util.h"
#include "base/logging.h"
#include "base/notreached.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/url_formatter/spoof_checks/idn_spoof_checker.h"
#include "components/url_formatter/url_formatter.h"
#include "url/gurl.h"
using url_formatter::IDNConversionResult;
using url_formatter::IDNSpoofChecker;
void PrintUsage(const char* process_name) {
std::cout << "Usage:" << std::endl;
std::cout << process_name << " <file>" << std::endl;
std::cout << std::endl;
std::cout << "<file> is a text file with one hostname per line." << std::endl;
std::cout << "Hostnames can be ASCII or unicode. Internationalized domain "
"can (IDN) be encoded in unicode or punycode."
<< std::endl;
std::cout << "Each hostname is converted to unicode, if safe. Otherwise, "
<< "ASCII hostnames are printed unchanged and unicode hostnames "
<< "are printed in punycode." << std::endl;
}
std::string SpoofCheckResultToString(IDNSpoofChecker::Result result) {
switch (result) {
case IDNSpoofChecker::Result::kNone:
return "kNone";
case IDNSpoofChecker::Result::kSafe:
return "kSafe";
case IDNSpoofChecker::Result::kICUSpoofChecks:
return "kICUSpoofChecks";
case IDNSpoofChecker::Result::kDeviationCharacters:
return "kDeviationCharacters";
case IDNSpoofChecker::Result::kTLDSpecificCharacters:
return "kTLDSpecificCharacters";
case IDNSpoofChecker::Result::kUnsafeMiddleDot:
return "kUnsafeMiddleDot";
case IDNSpoofChecker::Result::kWholeScriptConfusable:
return "kWholeScriptConfusable";
case IDNSpoofChecker::Result::kDigitLookalikes:
return "kDigitLookalikes";
case IDNSpoofChecker::Result::kNonAsciiLatinCharMixedWithNonLatin:
return "kNonAsciiLatinCharMixedWithNonLatin";
case IDNSpoofChecker::Result::kDangerousPattern:
return "kDangerousPattern";
default:
NOTREACHED();
};
}
// Returns the spoof check result as a string. |ascii_domain| must contain
// ASCII characters only. |unicode_domain| is the IDN conversion result
// according to url_formatter. It can be either punycode or unicode.
std::string GetSpoofCheckResult(const std::string& ascii_domain,
const std::u16string& unicode_domain) {
IDNConversionResult result =
url_formatter::UnsafeIDNToUnicodeWithDetails(ascii_domain);
std::string spoof_check_result =
SpoofCheckResultToString(result.spoof_check_result);
if (result.spoof_check_result == IDNSpoofChecker::Result::kNone) {
// Input was not punycode.
return spoof_check_result;
}
if (result.spoof_check_result != IDNSpoofChecker::Result::kSafe) {
return spoof_check_result;
}
// If the domain passed all spoof checks but |unicode_domain| is still in
// punycode, the domain must be a lookalike of a top domain.
if (base::ASCIIToUTF16(ascii_domain) == unicode_domain) {
return "kTopDomainLookalike";
}
return spoof_check_result;
}
void Convert(std::istream& input) {
base::i18n::InitializeICU();
for (std::string line; std::getline(input, line);) {
CHECK(
!base::StartsWith(line,
"http:", base::CompareCase::INSENSITIVE_ASCII) &&
!base::StartsWith(line, "https:", base::CompareCase::INSENSITIVE_ASCII))
<< "This binary only accepts hostnames" << line;
const std::string ascii_hostname =
base::IsStringASCII(line) ? line : GURL("https://" + line).GetHost();
// Convert twice, first with spoof checks on, then with spoof checks
// ignored inside GetSpoofCheckResult(). This is because only the call to
// UnsafeIDNToUnicodeWithDetails returns information about spoof check
// results (a quirk of the url_formatter interface).
const std::u16string converted_hostname =
url_formatter::IDNToUnicode(ascii_hostname);
const std::string spoof_check_result =
GetSpoofCheckResult(ascii_hostname, converted_hostname);
std::cout << ascii_hostname << ", " << converted_hostname << ", "
<< spoof_check_result << std::endl;
}
}
int main(int argc, char* argv[]) {
base::CommandLine::Init(argc, argv);
base::CommandLine* cmd = base::CommandLine::ForCurrentProcess();
if (cmd->HasSwitch("help")) {
PrintUsage(argv[0]);
return 0;
}
if (argc > 1) {
const std::string filename = UNSAFE_TODO(argv[1]);
std::ifstream input(filename);
if (!input.good()) {
LOG(ERROR) << "Could not open file " << filename;
return -1;
}
Convert(input);
} else {
Convert(std::cin);
}
return EXIT_SUCCESS;
}