blob: a8159ad04ccbe53bc30055ea57a39420866f1ed2 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Clang tool to perform simple rewrites of C++ code using clang's AST matchers.
// For more general documentation, as well as building & running instructions,
// see
// https://chromium.googlesource.com/chromium/src/+/HEAD/docs/clang_tool_refactoring.md
//
// As implemented, this tool looks for instances of `b ? "true" : "false"` and
// replaces them with calls to `base::ToString`.
//
// If you want to create your own tool based on this one:
// 1. Copy the ast_rewriter directory, and update CMakeLists.txt appropriately
// 2. Follow the building and running procedure described in
// the linked documentation:
// a. Bootstrap the plugin
// b. Build chrome once normally, without precompiled headers
// c. Run using run_tool.py
// 3. Perform any post-processing of the generated directives using dedup.py
// 4. Apply the directives as described in the linked documentation
//
// Note: When running the tool, you may get spurious warnings due to chromium-
// specific changes (e.g. #pragma allow_unsafe_buffers) that aren't. If so,
// it's easiest to disable -Werror in build/config/compiler.gni (set
// treat_warnings_as_errors = false). You may also want to disable the warning
// entirely while running the tool, by adding "-Wno-unknown-pragmas" to
// cflags_cc in an appropriate part of build/config/BUILD.gn. Make sure to
// rebuild the project (repeat step 2b) after changing the build config.
#include <string>
#include "OutputHelper.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersMacros.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/TargetSelect.h"
// Prints a clang::SourceLocation or clang::SourceRange.
// Most AST types also have a dump() function to print to stderr.
#define LOG(e) \
llvm::errs() << __FILE__ << ":" << __LINE__ << ": " << #e << " " \
<< (e).printToString(*result.SourceManager) << '\n';
namespace {
// Setting up the command-line; you can add additional options here if needed
static llvm::cl::OptionCategory rewriter_category("ast_rewriter options");
llvm::cl::extrahelp common_help(
clang::tooling::CommonOptionsParser::HelpMessage);
llvm::cl::extrahelp more_help(
"This tool replaces instances of `b ? \"true\" : \"false\"` into"
"`base::ToString(b)`");
using namespace clang;
using namespace clang::ast_matchers;
// Specify what code patterns you're looking for here. AST matchers have more
// complete documentation on the clang website: see
// https://clang.llvm.org/docs/LibASTMatchers.html
// and
// https://clang.llvm.org/docs/LibASTMatchersReference.html
//
// This particular matcher looks for ternary operators whose second and third
// operators are "true" and "false", e.g. `b ? "true" : "false"`.
// Unfortunately, the matchers clang supports are incomplete; it can't directly
// check string contents, but it can check string length. Fortunately, we can
// perform additional checks on the AST itself once we have a potential match.
// Therefore, it's usually best to write a general matcher, and narrow down the
// final results later.
//
// The general process for creating a new matcher is to follow the AST matcher
// link above, then manually sift through the gigantic listing to determine
// which matchers (if any) fit your use case. It is strongly recommended to use
// clang-query to test matchers dynamically until you've got them working the
// way you want; see the clang_tool_refactoring.md file for more information.
//
// Arguments to a matcher are sub-matchers that serve to narrow down matches.
// Some arguments (stmt(), expr(), etc) don't narrow at all, but provide a way
// to reference different parts of the match. These arguments can be bound
// by calling .bind() with a string; this allows the part of the match to be
// referenced later by passing that string.
//
// The various kinds of matchers and the way they're expected to be combined is
// complicated; the best way to learn about it is to read the docs and play
// around with clang-query.
StatementMatcher matchTernaryTrueFalse() {
return conditionalOperator( // Matches ternary boolean operators ( _ ? _ : _)
stmt().bind(
"root"), // Bind the cond operator itself so we can refer to it
hasCondition(
expr().bind("cond")), // Bind just the condition, same reason
// Check that the true and false branches are if they're string literals
// of length 4 and 5, and bind them. Match either order to account for `b
// ? "false" : "true"`
hasTrueExpression(
expr(anyOf(stringLiteral(hasSize(4)), stringLiteral(hasSize(5))))
.bind("tru")),
hasFalseExpression(
expr(anyOf(stringLiteral(hasSize(4)), stringLiteral(hasSize(5))))
.bind("fls")));
}
const char* headers_to_add[] = {"base/strings/to_string.h"};
// Once you know what you're looking for, the next step is to specify what to do
// when you find it. This can be done by creating a class which inherits from
// MatchFinder::MatchCallback and implements the `run` function.
//
// The Printer class is a minimal example: it takes the result of the matcher,
// pulls out whatever was bound to "root", and dumps it to the screen. Good for
// debugging, although clang-query is better for debugging the matcher itself.
class Printer : public MatchFinder::MatchCallback {
public:
virtual void run(const MatchFinder::MatchResult& Result) override {
// Only works if the matcher bound a Stmt to the name "root".
if (const Stmt* FS = Result.Nodes.getNodeAs<clang::Stmt>("root")) {
FS->dump();
}
}
};
// The ASTRewriter class is a more interesting example; in addition to the `run`
// function, it stores an OutputHelper, which will also be passed to clang's
// FrontendFactory. The factory will ensure that the OutputHelper's setup and
// teardown methods are invoked at the beginning/end of each run, so our
// rewriter can safely call it to emit output.
class ASTRewriter : public MatchFinder::MatchCallback {
protected:
OutputHelper& output_helper_;
public:
explicit ASTRewriter(OutputHelper* output_helper)
: output_helper_(*output_helper) {}
// Replaces `b ? "true" : "false"` with base::ToString(b).
// This function has access to the full power of clang's AST, so
// you can do as much work as you want. Unfortunately, much like matchers, the
// best way to figure out what AST methods are available to you is to sift
// through the documentation (https://clang.llvm.org/doxygen/) for whatever
// classes you have at hand, and hope you find something applicable to your
// situation.
virtual void run(const MatchFinder::MatchResult& result) override {
ASTContext* Context = result.Context;
// Extract the entire statement we matched.
const Stmt* root = result.Nodes.getNodeAs<ConditionalOperator>("root");
if (!root) {
return;
}
// Don't replace in macros
// Things WILL go wrong if you try
// Just do them by hand
if (root->getBeginLoc().isMacroID()) {
return;
}
// Don't replace in third-party code, or in the function we're replacing
// things with.
StringRef filename =
Context->getSourceManager().getFilename(root->getBeginLoc());
if (filename.contains("third_party/") ||
filename.contains("base/strings/to_string.h")) {
return;
}
// Extract the various components that we care about.
const Expr* cond = result.Nodes.getNodeAs<Expr>("cond");
const StringLiteral* tru = result.Nodes.getNodeAs<StringLiteral>("tru");
const StringLiteral* fls = result.Nodes.getNodeAs<StringLiteral>("fls");
if (!cond || !tru || !fls) {
return;
}
bool true_is_first = false;
if (!tru->getString().compare("true") &&
!fls->getString().compare("false")) {
// "true" : "false"
true_is_first = true;
} else if (!tru->getString().compare("false") &&
!fls->getString().compare("true")) {
// "false" : "true"
true_is_first = false;
} else {
return;
}
// An example of something more complicated that we can't easily do with
// matchers: See if the original expression was parenthesized,
// and remove the parens as well if so.
const auto& parents = Context->getParents(*root);
if (!parents.empty()) {
const Stmt* paren_root = parents[0].get<ParenExpr>();
if (paren_root) {
root = paren_root;
}
}
// We use getTokenRange here because that seems to be the format returned by
// getSourceRange.
CharSourceRange root_range =
CharSourceRange::getTokenRange(root->getSourceRange());
CharSourceRange cond_range =
CharSourceRange::getTokenRange(cond->getSourceRange());
// Compute the replacement text
auto cond_text = Lexer::getSourceText(cond_range, *result.SourceManager,
result.Context->getLangOpts());
std::string cond_text_str = std::string(cond_text);
if (!true_is_first) {
cond_text_str = "!(" + cond_text_str + ")";
}
std::string replacement_text = "base::ToString(" + cond_text_str + ")";
// Will emit a directive to replace root_tange with replacement_text
output_helper_.Replace(root_range, replacement_text, *result.SourceManager,
result.Context->getLangOpts());
}
};
} // namespace
// Putting it all together: this function is mostly boilerplate that combines
// the stuff we've already defined. The most interesting part is specifying the
// traversal method to use; TK_IgnoreUnlessSpelledInSource will ignore most
// implicit AST nodes that the user didn't write themselves. This is required
// to use matchers unless you have a deep understanding of clang's AST.
int main(int argc, const char* argv[]) {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmParser();
llvm::Expected<clang::tooling::CommonOptionsParser> options =
clang::tooling::CommonOptionsParser::create(argc, argv,
rewriter_category);
assert(static_cast<bool>(options));
clang::tooling::ClangTool tool(options->getCompilations(),
options->getSourcePathList());
OutputHelper output_helper((llvm::StringSet<>(headers_to_add)));
MatchFinder match_finder;
MatchFinder::MatchCallback* callback =
// new Printer();
new ASTRewriter(&output_helper);
StatementMatcher final_matcher =
traverse(TK_IgnoreUnlessSpelledInSource, matchTernaryTrueFalse());
// More complicated use cases may want to add multiple matchers and callbacks
match_finder.addMatcher(final_matcher, callback);
std::unique_ptr<clang::tooling::FrontendActionFactory> factory =
clang::tooling::newFrontendActionFactory(&match_finder, &output_helper);
return tool.run(factory.get());
}