| // Copyright 2012 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This command-line program converts an effective-TLD data file in UTF-8 from |
| // the format provided by Mozilla to the format expected by Chrome. This |
| // program generates an intermediate file which is then used by gperf to |
| // generate a perfect hash map. The benefit of this approach is that no time is |
| // spent on program initialization to generate the map of this data. |
| // |
| // Running this program finds "effective_tld_names.dat" in the expected location |
| // in the source checkout and generates "effective_tld_names.gperf" next to it. |
| // |
| // Any errors or warnings from this program are recorded in tld_cleanup.log. |
| // |
| // In particular, it |
| // * Strips blank lines and comments, as well as notes for individual rules. |
| // * Strips a single leading and/or trailing dot from each rule, if present. |
| // * Logs a warning if a rule contains '!' or '*.' other than at the beginning |
| // of the rule. (This also catches multiple ! or *. at the start of a rule.) |
| // * Logs a warning if GURL reports a rule as invalid, but keeps the rule. |
| // * Canonicalizes each rule's domain by converting it to a GURL and back. |
| // * Adds explicit rules for true TLDs found in any rule. |
| // * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS===" |
| // and "// ===END PRIVATE DOMAINS===" as private. |
| |
| #include <iostream> |
| |
| #include "base/at_exit.h" |
| #include "base/command_line.h" |
| #include "base/files/file_path.h" |
| #include "base/i18n/icu_util.h" |
| #include "base/logging.h" |
| #include "base/logging/logging_settings.h" |
| #include "base/path_service.h" |
| #include "base/process/memory.h" |
| #include "net/tools/tld_cleanup/tld_cleanup_util.h" |
| |
| int main(int argc, const char* argv[]) { |
| base::EnableTerminationOnHeapCorruption(); |
| if (argc != 1) { |
| std::cerr << "Normalizes and verifies UTF-8 TLD data files\n"; |
| std::cerr << "Usage: " << argv[0] << "\n"; |
| return 1; |
| } |
| |
| // Manages the destruction of singletons. |
| base::AtExitManager exit_manager; |
| |
| // Only use OutputDebugString in debug mode. |
| #ifdef NDEBUG |
| logging::LoggingDestination destination = logging::LOG_TO_FILE; |
| #else |
| logging::LoggingDestination destination = |
| logging::LOG_TO_ALL; |
| #endif |
| |
| base::CommandLine::Init(argc, argv); |
| |
| logging::LoggingSettings settings; |
| settings.logging_dest = destination; |
| settings.log_file_path = base::PathService::CheckedGet(base::DIR_EXE) |
| .AppendASCII("tld_cleanup.log") |
| .value(); |
| settings.delete_old = logging::DELETE_OLD_LOG_FILE; |
| logging::InitLogging(settings); |
| |
| base::i18n::InitializeICU(); |
| |
| base::FilePath src_root = |
| base::PathService::CheckedGet(base::DIR_SRC_TEST_DATA_ROOT); |
| base::FilePath input_file = src_root.AppendASCII("net") |
| .AppendASCII("base") |
| .AppendASCII("registry_controlled_domains") |
| .AppendASCII("effective_tld_names.dat"); |
| base::FilePath output_file = src_root.AppendASCII("net") |
| .AppendASCII("base") |
| .AppendASCII("registry_controlled_domains") |
| .AppendASCII("effective_tld_names.gperf"); |
| net::tld_cleanup::NormalizeResult result = |
| net::tld_cleanup::NormalizeFile(input_file, output_file); |
| if (result != net::tld_cleanup::NormalizeResult::kSuccess) { |
| std::cerr << "Errors or warnings processing file. See log in " |
| << settings.log_file_path << "." << std::endl; |
| } |
| |
| if (result == net::tld_cleanup::NormalizeResult::kError) |
| return 1; |
| return 0; |
| } |