| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <assert.h> |
| |
| #include <algorithm> |
| #include <map> |
| #include <set> |
| #include <sstream> |
| #include <string> |
| #include <vector> |
| |
| #include "RawPtrHelpers.h" |
| #include "clang/AST/ASTContext.h" |
| #include "clang/ASTMatchers/ASTMatchFinder.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Rewrite/Core/Rewriter.h" |
| #include "clang/Tooling/CommonOptionsParser.h" |
| #include "clang/Tooling/Refactoring.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "llvm/Support/TargetSelect.h" |
| |
| using namespace clang::ast_matchers; |
| |
| namespace { |
| |
| // Special keywords: |
| constexpr char kEmptyKeyword[] = "<empty>"; |
| |
| const char kBaseSpanIncludePath[] = "base/containers/span.h"; |
| |
| // Include path that needs to be added to all the files where |
| // base::raw_span<...> replaces a raw_ptr<...>. |
| const char kBaseRawSpanIncludePath[] = "base/memory/raw_span.h"; |
| |
| // This iterates over function parameters and matches the ones that match |
| // parm_var_decl_matcher. |
| AST_MATCHER_P(clang::FunctionDecl, |
| forEachParmVarDecl, |
| clang::ast_matchers::internal::Matcher<clang::ParmVarDecl>, |
| parm_var_decl_matcher) { |
| const clang::FunctionDecl& function_decl = Node; |
| |
| unsigned num_params = function_decl.getNumParams(); |
| bool is_matching = false; |
| clang::ast_matchers::internal::BoundNodesTreeBuilder result; |
| for (unsigned i = 0; i < num_params; i++) { |
| const clang::ParmVarDecl* param = function_decl.getParamDecl(i); |
| clang::ast_matchers::internal::BoundNodesTreeBuilder param_matches; |
| if (parm_var_decl_matcher.matches(*param, Finder, ¶m_matches)) { |
| is_matching = true; |
| result.addMatch(param_matches); |
| } |
| } |
| *Builder = std::move(result); |
| return is_matching; |
| } |
| |
| std::string EscapeReplacementText(std::string text) { |
| static const std::string_view escaped = "\n\r%@,:<>"; |
| static const std::string_view hex = "0123456789ABCDEF"; |
| |
| // <empty> is a special keyword. It is never escaped. |
| if (text == kEmptyKeyword) { |
| return text; |
| } |
| |
| std::string out; |
| for (auto ch : text) { |
| if (escaped.find(ch) != std::string_view::npos) { |
| uint8_t value = static_cast<uint8_t>(ch); |
| out += '%'; |
| out += hex[(value >> 4) & 0x0F]; |
| out += hex[(value >> 0) & 0x0F]; |
| } else { |
| out += ch; |
| } |
| } |
| return out; |
| } |
| |
| struct Node { |
| bool is_buffer = false; |
| |
| // A replacement follows the following format: |
| // `r:::<file path>:::<offset>:::<length>:::<replacement text>` |
| std::string replacement; |
| |
| // An include directive follows the following format: |
| // `include-user-header:::<file path>:::-1:::-1:::<include text>` |
| std::string include_directive; |
| |
| // This is true for nodes representing the following: |
| // - nullptr => size is zero |
| // - calls to new/new[n] => size is 1/n |
| // - constant arrays buf[1024] => size is 1024 |
| // - calls to third_party functions that we can't rewrite (they should |
| // provide a size for the pointer returned) |
| bool size_info_available = false; |
| |
| // This is true for dereference expressions. |
| // Example: *buf, *fct(), *(buf++), ... |
| bool is_deref_expr = false; |
| |
| // This is true for the cases where the lhs node doesn't get rewritten while |
| // the rhs does. in that case, we create a special node that adds a `.data()` |
| // call to the rhs. Example: ptr[index] = something; => ptr is used as a |
| // buffer => gets spanified T* temp = ptr; => temp never used as a buffer => |
| // need to add `.data()` The statement becomes: T* temp = ptr.data(); |
| bool is_data_change = false; |
| |
| bool operator==(const Node& other) const { |
| return replacement == other.replacement; |
| } |
| |
| bool operator<(const Node& other) const { |
| return replacement < other.replacement; |
| } |
| |
| // The resulting string follows the following format: |
| // {is_buffer\,r:::<filepath>:::<offset>:::<length>:::<replacement_text> |
| //\,include-user-header:::<file path>:::-1:::-1:::<include |
| // text>\,size_info_available\,is_deref_expr\,is_data_change} |
| // where the booleans are represented as 0 or 1. |
| std::string ToString() const { |
| return llvm::formatv("{{{0:d}\\,{1}\\,{2}\\,{3:d}\\,{4:d}\\,{5:d}}", |
| is_buffer, replacement, include_directive, |
| size_info_available, is_deref_expr, is_data_change); |
| } |
| }; |
| |
| // Helper class to add edges to the set of node_pairs_; |
| class OutputHelper { |
| public: |
| OutputHelper() = default; |
| |
| void AddEdge(const Node& lhs, const Node& rhs) { |
| node_pairs_.insert( |
| llvm::formatv("{0}@{1}\n", lhs.ToString(), rhs.ToString())); |
| } |
| |
| void AddSingleNode(const Node& lhs) { |
| node_pairs_.insert(llvm::formatv("{0}\n", lhs.ToString())); |
| } |
| |
| void Emit() { |
| for (const auto& p : node_pairs_) { |
| llvm::outs() << p; |
| } |
| } |
| |
| private: |
| // This represents a line for every 2 adjacent nodes. |
| // The format is: {lhs};{rhs}\n where lhs & rhs are generated using |
| // Node::ToString(). |
| // Buffer expressions are added to the graph as a single node |
| // in which case the line is {lhs};\n |
| std::set<std::string> node_pairs_; |
| }; |
| |
| static std::pair<std::string, std::string> GetReplacementAndIncludeDirectives( |
| const clang::SourceRange replacement_range, |
| std::string replacement_text, |
| const clang::SourceManager& source_manager, |
| const char* include_path = nullptr, |
| bool is_system_include_path = false) { |
| clang::tooling::Replacement replacement( |
| source_manager, clang::CharSourceRange::getCharRange(replacement_range), |
| replacement_text); |
| llvm::StringRef file_path = replacement.getFilePath(); |
| if (file_path.empty()) { |
| return {"", ""}; |
| } |
| // If `replacement_text` is a special keyword, e.g. "<empty>", should not |
| // escape `replacement_text`. |
| replacement_text = EscapeReplacementText(replacement_text); |
| std::string replacement_directive = llvm::formatv( |
| "r:::{0}:::{1}:::{2}:::{3}", file_path, replacement.getOffset(), |
| replacement.getLength(), replacement_text); |
| |
| if (!include_path) { |
| include_path = kBaseSpanIncludePath; |
| is_system_include_path = false; |
| } |
| std::string include_directive; |
| if (is_system_include_path) { |
| include_directive = llvm::formatv( |
| "include-system-header:::{0}:::-1:::-1:::{1}", file_path, include_path); |
| } else { |
| include_directive = llvm::formatv( |
| "include-user-header:::{0}:::-1:::-1:::{1}", file_path, include_path); |
| } |
| |
| return {replacement_directive, include_directive}; |
| } |
| |
| // Clang doesn't seem to be providing correct begin/end locations for |
| // clang::MemberExpr and clang::DeclRefExpr. This function handles these cases, |
| // otherwise returns expression's begin_loc and end_loc offset by 1. |
| clang::SourceRange getExprRange(const clang::Expr* expr) { |
| if (const auto* member_expr = clang::dyn_cast<clang::MemberExpr>(expr)) { |
| clang::SourceLocation begin_loc = member_expr->getMemberLoc(); |
| size_t member_name_length = member_expr->getMemberDecl()->getName().size(); |
| clang::SourceLocation end_loc = |
| begin_loc.getLocWithOffset(member_name_length); |
| return {begin_loc, end_loc}; |
| } |
| |
| if (const auto* decl_ref = clang::dyn_cast<clang::DeclRefExpr>(expr)) { |
| auto name = decl_ref->getNameInfo().getName().getAsString(); |
| return {decl_ref->getBeginLoc(), |
| decl_ref->getEndLoc().getLocWithOffset(name.size())}; |
| } |
| |
| return {expr->getBeginLoc(), expr->getEndLoc().getLocWithOffset(1)}; |
| } |
| |
| std::string GetTypeAsString(const clang::QualType& qual_type, |
| const clang::ASTContext& ast_context) { |
| clang::PrintingPolicy printing_policy(ast_context.getLangOpts()); |
| printing_policy.SuppressScope = 0; |
| printing_policy.SuppressUnwrittenScope = 1; |
| printing_policy.SuppressElaboration = 0; |
| printing_policy.SuppressInlineNamespace = 1; |
| printing_policy.SuppressDefaultTemplateArgs = 1; |
| printing_policy.PrintCanonicalTypes = 0; |
| return qual_type.getAsString(printing_policy); |
| } |
| |
| // This functions generates a string representing the converted type from a |
| // raw pointer type to a base::span type. It handles preservation of |
| // const/volatile qualifiers and uses a specific printing policy to format the |
| // underlying pointee type. |
| // This functions generates a string representing the converted type from a |
| // raw pointer type to a base::span type. It handles preservation of |
| // const/volatile qualifiers and uses a specific printing policy to format the |
| // underlying pointee type. |
| std::string GenerateSpanType(clang::SourceManager& source_manager, |
| const clang::ASTContext& ast_context, |
| const clang::DeclaratorDecl& decl) { |
| // Preserve qualifiers. |
| const clang::QualType& pointer_type = decl.getType(); |
| std::ostringstream qualifiers; |
| qualifiers << (pointer_type.isConstQualified() ? "const " : "") |
| << (pointer_type.isVolatileQualified() ? "volatile " : ""); |
| |
| // If the original type cannot be recovered from the source, we need to |
| // consult the clang deduced type. |
| // |
| // Please note that the deduced type may not be the same as the original type. |
| // For example, if we have the following code: |
| // const auto* p = get_buffer<uint16_t>(); |
| // we will get:`unsigned short` instead of `uint16_t`. |
| std::string type = |
| GetTypeAsString(pointer_type->getPointeeType(), ast_context); |
| return qualifiers.str() + llvm::formatv("base::span<{0}>", type).str(); |
| } |
| |
| // It is intentional that this function ignores cast expressions and applies |
| // the `.data()` addition to the internal expression. if we have: |
| // type* ptr = reinterpret_cast<type*>(buf); where buf needs to be rewritten |
| // to span and ptr doesn't. The `.data()` call is added right after buffer as |
| // follows: type* ptr = reinterpret_cast<type*>(buf.data()); |
| static clang::SourceRange getSourceRange( |
| const MatchFinder::MatchResult& result) { |
| if (auto* op = |
| result.Nodes.getNodeAs<clang::UnaryOperator>("unaryOperator")) { |
| if (op->isPostfix()) { |
| return {op->getBeginLoc(), op->getEndLoc().getLocWithOffset(2)}; |
| } |
| auto* expr = result.Nodes.getNodeAs<clang::Expr>("rhs_expr"); |
| return {op->getBeginLoc(), getExprRange(expr).getEnd()}; |
| } |
| if (auto* op = result.Nodes.getNodeAs<clang::Expr>("binaryOperator")) { |
| auto* sub_expr = result.Nodes.getNodeAs<clang::Expr>("bin_op_rhs"); |
| auto end_loc = getExprRange(sub_expr).getEnd(); |
| return {op->getBeginLoc(), end_loc}; |
| } |
| if (auto* op = result.Nodes.getNodeAs<clang::CXXOperatorCallExpr>( |
| "raw_ptr_operator++")) { |
| auto* callee = op->getDirectCallee(); |
| if (callee->getNumParams() == 0) { // postfix op++ on raw_ptr; |
| auto* expr = result.Nodes.getNodeAs<clang::Expr>("rhs_expr"); |
| return clang::SourceRange(getExprRange(expr).getEnd()); |
| } |
| return clang::SourceRange(op->getEndLoc().getLocWithOffset(2)); |
| } |
| |
| auto* expr = result.Nodes.getNodeAs<clang::Expr>("rhs_expr"); |
| return clang::SourceRange(getExprRange(expr).getEnd()); |
| } |
| |
| static void maybeUpdateSourceRangeIfInMacro( |
| const clang::SourceManager& source_manager, |
| const MatchFinder::MatchResult& result, |
| clang::SourceRange& range) { |
| if (!range.isValid() || !range.getBegin().isMacroID()) { |
| return; |
| } |
| // We need to find the reference to the object that might be getting |
| // accessed and rewritten to find the location to rewrite. SpellingLocation |
| // returns a different position if the source was pointing into the macro |
| // definition. See clang::SourceManager for details but relevant section: |
| // |
| // "Spelling locations represent where the bytes corresponding to a token came |
| // from and expansion locations represent where the location is in the user's |
| // view. In the case of a macro expansion, for example, the spelling location |
| // indicates where the expanded token came from and the expansion location |
| // specifies where it was expanded." |
| auto* rhs_decl_ref = |
| result.Nodes.getNodeAs<clang::DeclRefExpr>("declRefExpr"); |
| if (!rhs_decl_ref) { |
| return; |
| } |
| // We're extracting the spellingLocation's position and then we'll move the |
| // location forward by the length of the variable. This will allow us to |
| // insert .data() at the end of the decl_ref. |
| clang::SourceLocation correct_start = |
| source_manager.getSpellingLoc(rhs_decl_ref->getLocation()); |
| |
| bool invalid_line, invalid_col = false; |
| auto line = |
| source_manager.getSpellingLineNumber(correct_start, &invalid_line); |
| auto col = |
| source_manager.getSpellingColumnNumber(correct_start, &invalid_col); |
| assert(correct_start.isValid() && !invalid_line && !invalid_col && |
| "Unable to get SpellingLocation info"); |
| // Get the name and find the end of the decl_ref. |
| std::string name = rhs_decl_ref->getFoundDecl()->getNameAsString(); |
| clang::SourceLocation correct_end = source_manager.translateLineCol( |
| source_manager.getFileID(correct_start), line, col + name.size()); |
| assert(correct_end.isValid() && |
| "Incorrectly got an End SourceLocation for macro"); |
| // This returns at the end of the variable being referenced so we can |
| // insert .data(), if we wanted it wrapped in params (variable).data() |
| // we'd need {correct_start, correct_end} but this doesn't seem needed in |
| // macros tested on so far. |
| range = clang::SourceRange{correct_end}; |
| } |
| |
| static Node getNodeFromPointerTypeLoc(const clang::PointerTypeLoc* type_loc, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| const auto& lang_opts = ast_context.getLangOpts(); |
| // We are in the case of a function return type loc. |
| // This doesn't always generate the right range since type_loc doesn't |
| // account for qualifiers (like const). Didn't find a proper way for now |
| // to get the location with type qualifiers taken into account. |
| clang::SourceRange replacement_range = { |
| type_loc->getBeginLoc(), type_loc->getEndLoc().getLocWithOffset(1)}; |
| std::string initial_text = |
| clang::Lexer::getSourceText( |
| clang::CharSourceRange::getCharRange(replacement_range), |
| source_manager, lang_opts) |
| .str(); |
| initial_text.pop_back(); |
| std::string replacement_text = "base::span<" + initial_text + ">"; |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, replacement_text, source_manager); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| return n; |
| } |
| |
| static Node getNodeFromRawPtrTypeLoc( |
| const clang::TemplateSpecializationTypeLoc* raw_ptr_type_loc, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| auto replacement_range = clang::SourceRange(raw_ptr_type_loc->getBeginLoc(), |
| raw_ptr_type_loc->getLAngleLoc()); |
| |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, "base::raw_span", source_manager, |
| kBaseRawSpanIncludePath); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| return n; |
| } |
| |
| static Node getNodeFromDecl(const clang::DeclaratorDecl* decl, |
| const MatchFinder::MatchResult& result) { |
| clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| clang::SourceRange replacement_range{decl->getBeginLoc(), |
| decl->getLocation()}; |
| auto replacement_text = GenerateSpanType(source_manager, ast_context, *decl); |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, replacement_text, source_manager); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| return n; |
| } |
| |
| static Node getNodeFromDerefExpr(const clang::Expr* deref_expr, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| const auto& lang_opts = ast_context.getLangOpts(); |
| auto source_range = clang::SourceRange(deref_expr->getBeginLoc(), |
| getSourceRange(result).getEnd()); |
| std::string initial_text = |
| clang::Lexer::getSourceText( |
| clang::CharSourceRange::getCharRange(source_range), source_manager, |
| lang_opts) |
| .str(); |
| |
| std::string replacement_text = initial_text.substr(1) + "[0]"; |
| if (result.Nodes.getNodeAs<clang::Expr>("unaryOperator") || |
| result.Nodes.getNodeAs<clang::Expr>("binaryOperator")) { |
| replacement_text = "(" + initial_text.substr(1) + ")[0]"; |
| } |
| |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| source_range, replacement_text, source_manager); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = "<empty>"; |
| n.is_deref_expr = true; |
| return n; |
| } |
| |
| static Node getNodeFromMemberCallExpr(const clang::CXXMemberCallExpr* get_call, |
| const char* member_expr_id, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::MemberExpr* member_expr = |
| result.Nodes.getNodeAs<clang::MemberExpr>(member_expr_id); |
| clang::SourceLocation begin_loc = member_expr->getMemberLoc(); |
| size_t member_name_length = |
| member_expr->getMemberDecl()->getName().size() + 2; |
| clang::SourceLocation end_loc = |
| begin_loc.getLocWithOffset(member_name_length); |
| begin_loc = begin_loc.getLocWithOffset(-1); |
| clang::SourceRange replacement_range(begin_loc, end_loc); |
| |
| // This deletes the member call expression part. Example: |
| // char* ptr = member_.get(); which is then rewritten to |
| // span<char> ptr = member_; |
| // member_ here is a raw_ptr |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, " ", source_manager); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| return n; |
| } |
| |
| static Node getNodeFromCallToExternalFunction( |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| const auto& lang_opts = ast_context.getLangOpts(); |
| auto rep_range = getSourceRange(result); |
| std::string initial_text = |
| clang::Lexer::getSourceText( |
| clang::CharSourceRange::getCharRange(rep_range), source_manager, |
| lang_opts) |
| .str(); |
| std::string replacement_text = |
| initial_text.empty() ? ".data()" : "(" + initial_text + ").data()"; |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| rep_range, replacement_text, source_manager); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = "<empty>"; |
| n.is_deref_expr = true; |
| return n; |
| } |
| |
| static Node getNodeFromSizeExpr(const clang::Expr* size_expr, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| std::string replacement = kEmptyKeyword; |
| clang::SourceRange replacement_range; |
| if (const auto* nullptr_expr = |
| result.Nodes.getNodeAs<clang::CXXNullPtrLiteralExpr>( |
| "nullptr_expr")) { |
| replacement = "{}"; |
| // The hardcoded offset corresponds to the length of "nullptr" keyword. |
| replacement_range = {nullptr_expr->getBeginLoc(), |
| nullptr_expr->getBeginLoc().getLocWithOffset(7)}; |
| } else { |
| // Generate empty insertion just to keep track of the node's loc; |
| replacement_range = |
| clang::SourceRange(size_expr->getSourceRange().getBegin(), |
| size_expr->getSourceRange().getBegin()); |
| } |
| |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, replacement, source_manager); |
| Node n; |
| n.size_info_available = true; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| return n; |
| } |
| |
| static Node getDataChangeNode(const std::string& lhs_replacement, |
| const MatchFinder::MatchResult& result) { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| const auto& lang_opts = ast_context.getLangOpts(); |
| auto rep_range = getSourceRange(result); |
| |
| // If we're inside a macro the rep_range computed above is going to be |
| // incorrect because it will point into the file where the macro is defined. |
| // We need to get the "SpellingLocation", and then we figure out the end of |
| // the parameter so we can insert .data() at the end if needed. |
| maybeUpdateSourceRangeIfInMacro(source_manager, result, rep_range); |
| |
| std::string initial_text = |
| clang::Lexer::getSourceText( |
| clang::CharSourceRange::getCharRange(rep_range), source_manager, |
| lang_opts) |
| .str(); |
| std::string replacement_text = |
| initial_text.empty() ? ".data()" : "(" + initial_text + ").data()"; |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| rep_range, replacement_text, source_manager); |
| Node data_node; |
| data_node.replacement = replacement_and_include_pair.first; |
| // We need a way to check whether the lhs node was rewritten, in which |
| // case we don't need to add this change. We achieve this by storing the |
| // lhs key (the replacement which is unique) in the data_node's include |
| // directive. |
| data_node.include_directive = lhs_replacement; |
| data_node.is_data_change = true; |
| return data_node; |
| } |
| |
| // Takes in a copy of a variable assumed to be in snake_case and switches it |
| // into CamelCase. |
| std::string snakeCaseToCamelCase(std::string snake_case) { |
| // We want the first char to be capitalized so start with '_'. |
| char prev = '_'; |
| for (char& c : snake_case) { |
| if (prev == '_') { |
| c = std::toupper(c); |
| } |
| prev = c; |
| } |
| // Now we need to remove the '_'s from the string, recall std::remove moves |
| // everything to the end and then returns the first '_' (or end()). We then |
| // call erase from there to the end to actually remove. |
| snake_case.erase(std::remove(snake_case.begin(), snake_case.end(), '_'), |
| snake_case.end()); |
| return snake_case; |
| } |
| |
| // Checks if the given array definition involves an unnamed struct type |
| // or is declared inline within a struct/class definition. |
| // |
| // These cases currently pose challenges for the C array to std::array |
| // conversion and are therefore skipped by the tool. |
| // |
| // Examples of problematic definitions: |
| // - Unnamed struct: |
| // `struct { int x, y; } point_array[10];` |
| // - Inline definition: |
| // `struct Point { int x, y; } inline_points[5];` |
| // |
| // Returns the pair of a suggested type name (if unnamed struct, empty string |
| // otherwise) and the inline definition with a semi-colon ';' added to split it |
| // away from the declaration (empty string otherwise). |
| // I.E.: |
| // - {"", ""} -> If this is not one of the problematic definitions above. |
| // - {"", "struct Point { int x, y; };"} -> for the inline definition case. |
| // - {"PointArray", "struct PointArray { ... };"} -> for the unnamed struct |
| // case. |
| std::pair<std::string, std::string> maybeGetUnnamedAndDefinition( |
| const clang::QualType element_type, |
| const clang::VarDecl* array_variable, |
| const std::string& array_variable_as_string, |
| const clang::ASTContext& ast_context) { |
| if (!element_type->hasUnnamedOrLocalType()) { |
| return std::make_pair("", ""); |
| } |
| |
| std::string new_class_name_string; |
| std::string class_definition; |
| // Structs/classes can be defined alongside an option list of variable |
| // declarations. |
| // |
| // struct <OptionalName> { ... } var1[3]; |
| // |
| // In this case we need the class_definition and in the case of unnamed |
| // types, we have to construct a name to use instead of the compiler |
| // generated one. |
| if (auto record_decl = element_type->getAsRecordDecl()) { |
| // If the `VarDecl` contains the `RecordDecl`'s {}, the `VarDecl` contains |
| // the struct/class definition. |
| bool has_definition = array_variable->getSourceRange().fullyContains( |
| record_decl->getBraceRange()); |
| bool is_unnamed = record_decl->getDeclName().isEmpty(); |
| |
| // If the struct/class has an empty name (=unnamed) and has its |
| // definition, we will temporariliy assign a new name to the `RecordDecl` |
| // and invoke `getAsString()` to obtain the definition with the new name. |
| clang::DeclarationName original_name = record_decl->getDeclName(); |
| clang::DeclarationName temporal_class_name; |
| if (is_unnamed) { |
| new_class_name_string = snakeCaseToCamelCase(array_variable_as_string); |
| clang::StringRef new_class_name(new_class_name_string); |
| clang::IdentifierInfo& new_class_name_identifier = |
| ast_context.Idents.get(new_class_name); |
| temporal_class_name = ast_context.DeclarationNames.getIdentifier( |
| &new_class_name_identifier); |
| record_decl->setDeclName(temporal_class_name); |
| } |
| if (has_definition) { |
| clang::PrintingPolicy printing_policy(ast_context.getLangOpts()); |
| // Because of class/struct definition, we will drop any qualifiers from |
| // `element_type`. E.g. `const struct { int val; }` must be |
| // `struct { int val; }`. |
| clang::QualType new_qual_type(element_type.getTypePtr(), 0); |
| printing_policy.SuppressScope = 0; |
| printing_policy.SuppressUnwrittenScope = 1; |
| printing_policy.SuppressElaboration = 0; |
| printing_policy.SuppressInlineNamespace = 1; |
| printing_policy.SuppressDefaultTemplateArgs = 1; |
| printing_policy.PrintCanonicalTypes = 0; |
| printing_policy.IncludeTagDefinition = 1; |
| printing_policy.AnonymousTagLocations = 1; |
| class_definition = new_qual_type.getAsString(printing_policy) + ";\n"; |
| } |
| if (is_unnamed) { |
| record_decl->setDeclName(original_name); |
| } |
| } |
| return std::make_pair(new_class_name_string, class_definition); |
| } |
| |
| // Gets the array size as written in the source code if it's explicitly |
| // specified. Otherwise, returns the empty string. |
| std::string GetArraySize(const clang::ArrayTypeLoc& array_type_loc, |
| const clang::SourceManager& source_manager, |
| const clang::ASTContext& ast_context) { |
| assert(!array_type_loc.isNull()); |
| |
| clang::SourceRange source_range( |
| array_type_loc.getLBracketLoc().getLocWithOffset(1), |
| array_type_loc.getRBracketLoc()); |
| return clang::Lexer::getSourceText( |
| clang::CharSourceRange::getCharRange(source_range), source_manager, |
| ast_context.getLangOpts()) |
| .str(); |
| } |
| |
| // Produces a std::array type from the given (potentially nested) C array type. |
| // Returns a string representation of the std::array type. |
| std::string RewriteCArrayToStdArray(const clang::QualType& type, |
| const clang::TypeLoc& type_loc, |
| const clang::SourceManager& source_manager, |
| const clang::ASTContext& ast_context) { |
| const clang::ArrayType* array_type = ast_context.getAsArrayType(type); |
| if (!array_type) { |
| return GetTypeAsString(type, ast_context); |
| } |
| const clang::ArrayTypeLoc& array_type_loc = |
| type_loc.getUnqualifiedLoc().getAs<clang::ArrayTypeLoc>(); |
| assert(!array_type_loc.isNull()); |
| |
| const clang::QualType& element_type = array_type->getElementType(); |
| const clang::TypeLoc& element_type_loc = array_type_loc.getElementLoc(); |
| const std::string& element_type_as_string = RewriteCArrayToStdArray( |
| element_type, element_type_loc, source_manager, ast_context); |
| |
| const std::string& size_as_string = |
| GetArraySize(array_type_loc, source_manager, ast_context); |
| |
| std::ostringstream result; |
| result << "std::array<" << element_type_as_string << ", " << size_as_string |
| << ">"; |
| return result.str(); |
| } |
| |
| // Returns an initializer list(`initListExpr`) of the given |
| // `var_decl`(`clang::VarDecl`) if exists. Otherwise, returns `nullptr`. |
| const clang::InitListExpr* GetArrayInitList(const clang::VarDecl* var_decl) { |
| const clang::Expr* init_expr = var_decl->getInit(); |
| if (!init_expr) { |
| return nullptr; |
| } |
| |
| const clang::InitListExpr* init_list_expr = |
| clang::dyn_cast_or_null<clang::InitListExpr>(init_expr); |
| if (init_list_expr) { |
| return init_list_expr; |
| } |
| |
| // If we have the following array of std::vector<>: |
| // `std::vector<Quad> quad[2] = {{...},{...}};` |
| // we may not be able to use `dyn_cast` with `init_expr` to obtain |
| // `InitListExpr`: |
| // ExprWithCleanups 0x557ea7bdc860 'std::vector<Quad>[2]' |
| // `-InitListExpr 0x557ea7ba3950 'std::vector<Quad>[2]' |
| // |-CXXConstructExpr 0x557ea7bdc750 ... |
| // ... |
| // `-CXXConstructExpr |
| // ... |
| // `init_expr` is an instance of `ExprWithCleanups`. |
| const clang::ExprWithCleanups* expr_with_cleanups = |
| clang::dyn_cast_or_null<clang::ExprWithCleanups>(init_expr); |
| if (!expr_with_cleanups) { |
| return nullptr; |
| } |
| |
| auto first_child = expr_with_cleanups->child_begin(); |
| if (first_child == expr_with_cleanups->child_end()) { |
| return nullptr; |
| } |
| return clang::dyn_cast_or_null<clang::InitListExpr>(*first_child); |
| } |
| |
| // Creates a replacement node for c-style arrays on which we invoke operator[]. |
| // These arrays are rewritten to std::array<Type, Size>. |
| Node getNodeFromArrayType(const MatchFinder::MatchResult& result) { |
| clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::ASTContext& ast_context = *result.Context; |
| |
| const auto* type_loc = |
| result.Nodes.getNodeAs<clang::TypeLoc>("array_type_loc"); |
| const clang::ArrayTypeLoc& array_type_loc = |
| type_loc->getUnqualifiedLoc().getAs<clang::ArrayTypeLoc>(); |
| assert(!array_type_loc.isNull()); |
| const auto* array_type = |
| result.Nodes.getNodeAs<clang::ArrayType>("array_type"); |
| const auto* array_variable = |
| result.Nodes.getNodeAs<clang::VarDecl>("array_variable"); |
| const std::string& array_variable_as_string = |
| array_variable->getNameAsString(); |
| const std::string& array_size_as_string = |
| GetArraySize(array_type_loc, source_manager, ast_context); |
| const clang::QualType& element_type = array_type->getElementType(); |
| |
| std::stringstream qualifier_string; |
| if (array_variable->isConstexpr()) { |
| qualifier_string << "constexpr "; |
| } |
| if (array_variable->isStaticLocal()) { |
| qualifier_string << "static "; |
| } |
| // `const int buf[] = ...` must be `const std::array<int,...> buf = ...`. |
| if (!element_type->isPointerOrReferenceType() && |
| element_type.isConstant(ast_context)) { |
| qualifier_string << "const "; |
| } |
| |
| // TODO(yukishiino): Currently we support only simple cases like: |
| // - Unnamed struct/class |
| // - Redundant struct/class keyword |
| // and |
| // - Multi-dimensional array |
| // But we need to support combinations of above: |
| // - Multi-dimensional array of unnamed struct/class |
| // - Multi-dimensional array with redundant struct/class keyword |
| std::string element_type_as_string; |
| const auto& [unnamed_class, class_definition] = maybeGetUnnamedAndDefinition( |
| element_type, array_variable, array_variable_as_string, ast_context); |
| if (!unnamed_class.empty()) { |
| element_type_as_string = unnamed_class; |
| } else if (element_type->isElaboratedTypeSpecifier()) { |
| // If the `element_type` is an elaborated type with a keyword, i.e. |
| // `struct`, `class`, `union`, we will create another ElaboratedType |
| // without the keyword. So `struct funcHasName` will be `funcHasHame`. |
| auto* original_type = element_type->getAs<clang::ElaboratedType>(); |
| // Create a new ElaboratedType without 'struct', 'class', 'union' |
| // keywords. |
| auto new_element_type = ast_context.getElaboratedType( |
| // Use `None` to suppress tag names. |
| clang::ElaboratedTypeKeyword::None, |
| // Keep the same as the original. |
| original_type->getQualifier(), |
| // Keep the same as the original. |
| original_type->getNamedType(), |
| // Remove `OwnedTagDecl`. We don't need IncludeTagDefinition. |
| nullptr); |
| element_type_as_string = GetTypeAsString(new_element_type, ast_context); |
| } else { |
| element_type_as_string = |
| RewriteCArrayToStdArray(element_type, array_type_loc.getElementLoc(), |
| source_manager, ast_context); |
| } |
| |
| const clang::InitListExpr* init_list_expr = GetArrayInitList(array_variable); |
| |
| // static const char* array[] = {...}; |
| // | | |
| // | +-- type_loc->getSourceRange().getBegin() |
| // | |
| // +---- array_variable->getSourceRange().getBegin() |
| // |
| // The `static` is a part of `VarDecl`, but the `const` is a part of |
| // the element type, i.e. `const char*`. |
| // |
| // The array must be rewritten into: |
| // |
| // static auto array = std::to_array<const char*>({...}); |
| // |
| // So the `replacement_range` need to include the `const` and |
| // `init_list_expr` if any. |
| clang::SourceRange replacement_range = { |
| array_variable->getSourceRange().getBegin(), |
| init_list_expr ? init_list_expr->getEndLoc().getLocWithOffset(1) |
| : type_loc->getSourceRange().getEnd().getLocWithOffset(1)}; |
| |
| std::string replacement_text; |
| if (init_list_expr) { |
| clang::Rewriter rw(source_manager, ast_context.getLangOpts()); |
| std::string init_expr_as_string = |
| rw.getRewrittenText(init_list_expr->getSourceRange()); |
| |
| if (array_size_as_string.empty()) { |
| replacement_text = llvm::formatv( |
| "auto {0} = std::to_array<{1}>({2})", array_variable_as_string, |
| element_type_as_string, init_expr_as_string); |
| } else { |
| replacement_text = llvm::formatv( |
| "auto {0} = std::to_array<{1}, {2}>({3})", array_variable_as_string, |
| element_type_as_string, array_size_as_string, init_expr_as_string); |
| } |
| } else { |
| replacement_text = |
| llvm::formatv("std::array<{0}, {1}> {2}", element_type_as_string, |
| array_size_as_string, array_variable_as_string); |
| } |
| |
| auto replacement_and_include_pair = GetReplacementAndIncludeDirectives( |
| replacement_range, |
| class_definition + qualifier_string.str() + replacement_text, |
| source_manager, "array", |
| /* is_system_include_header =*/true); |
| Node n; |
| n.replacement = replacement_and_include_pair.first; |
| n.include_directive = replacement_and_include_pair.second; |
| n.size_info_available = true; |
| return n; |
| } |
| |
| // Called when the Match registered for it was successfully found in the AST. |
| // The matches registered represent two categories: |
| // 1- An adjacency relationship |
| // In that case, a node pair is created, using matched node ids, and added |
| // to the node_pair list using `OutputHelper::AddEdge` |
| // 2- A single is_buffer node match |
| // In that case, a single node is created and added to the node_pair list |
| // using `OutputHelper::AddSingleNode` |
| class PotentialNodes : public MatchFinder::MatchCallback { |
| public: |
| explicit PotentialNodes(OutputHelper& helper) : output_helper_(helper) {} |
| |
| PotentialNodes(const PotentialNodes&) = delete; |
| PotentialNodes& operator=(const PotentialNodes&) = delete; |
| |
| // Extracts the lhs node from the match result. |
| Node getLHSNodeFromMatchResult(const MatchFinder::MatchResult& result) { |
| if (auto* type_loc = |
| result.Nodes.getNodeAs<clang::PointerTypeLoc>("lhs_type_loc")) { |
| return getNodeFromPointerTypeLoc(type_loc, result); |
| } |
| |
| if (auto* raw_ptr_type_loc = |
| result.Nodes.getNodeAs<clang::TemplateSpecializationTypeLoc>( |
| "lhs_raw_ptr_type_loc")) { |
| return getNodeFromRawPtrTypeLoc(raw_ptr_type_loc, result); |
| } |
| |
| if (auto* lhs_begin = |
| result.Nodes.getNodeAs<clang::DeclaratorDecl>("lhs_begin")) { |
| return getNodeFromDecl(lhs_begin, result); |
| } |
| |
| if (auto* deref_op = result.Nodes.getNodeAs<clang::Expr>("deref_expr")) { |
| return getNodeFromDerefExpr(deref_op, result); |
| } |
| |
| if (auto* get_call = result.Nodes.getNodeAs<clang::CXXMemberCallExpr>( |
| "raw_ptr_get_call")) { |
| Node n = getNodeFromMemberCallExpr(get_call, "get_member_expr", result); |
| n.include_directive = "<empty>"; |
| n.is_deref_expr = true; |
| return n; |
| } |
| |
| if (result.Nodes.getNodeAs<clang::Expr>( |
| "passing_a_buffer_to_third_party_function")) { |
| return getNodeFromCallToExternalFunction(result); |
| } |
| |
| if (result.Nodes.getNodeAs<clang::VarDecl>("array_variable")) { |
| return getNodeFromArrayType(result); |
| } |
| assert(false); |
| } |
| |
| // Extracts the rhs node from the match result. |
| Node getRHSNodeFromMatchResult(const MatchFinder::MatchResult& result) { |
| if (auto* type_loc = |
| result.Nodes.getNodeAs<clang::PointerTypeLoc>("rhs_type_loc")) { |
| return getNodeFromPointerTypeLoc(type_loc, result); |
| } |
| |
| if (auto* raw_ptr_type_loc = |
| result.Nodes.getNodeAs<clang::TemplateSpecializationTypeLoc>( |
| "rhs_raw_ptr_type_loc")) { |
| return getNodeFromRawPtrTypeLoc(raw_ptr_type_loc, result); |
| } |
| |
| if (auto* rhs_begin = |
| result.Nodes.getNodeAs<clang::DeclaratorDecl>("rhs_begin")) { |
| return getNodeFromDecl(rhs_begin, result); |
| } |
| |
| if (const clang::CXXMemberCallExpr* data_call = |
| result.Nodes.getNodeAs<clang::CXXMemberCallExpr>( |
| "member_data_call")) { |
| auto node = |
| getNodeFromMemberCallExpr(data_call, "data_member_expr", result); |
| node.size_info_available = true; |
| return node; |
| } |
| |
| if (const clang::Expr* size_expr = |
| result.Nodes.getNodeAs<clang::Expr>("size_node")) { |
| return getNodeFromSizeExpr(size_expr, result); |
| } |
| // Not supposed to get here. |
| assert(false); |
| } |
| |
| // MatchFinder::MatchCallback: |
| void run(const MatchFinder::MatchResult& result) override { |
| Node lhs = getLHSNodeFromMatchResult(result); |
| |
| // Buffer usage expressions are added as a single node, return |
| // early in this case. |
| if (result.Nodes.getNodeAs<clang::Expr>("buffer_expr")) { |
| lhs.is_buffer = true; |
| output_helper_.AddSingleNode(lhs); |
| return; |
| } |
| |
| Node rhs = getRHSNodeFromMatchResult(result); |
| |
| auto* expr = result.Nodes.getNodeAs<clang::Expr>("span_frontier"); |
| if (expr && !lhs.is_deref_expr && !rhs.size_info_available) { |
| // Node to add `.data()`; |
| // This is needed in the case where rhs is rewritten and lhs is not. |
| // Adding `.data()` is thus needed to extract the pointer since lhs and |
| // rhs no longer have the same type. |
| Node data_node = getDataChangeNode(lhs.replacement, result); |
| output_helper_.AddEdge(data_node, rhs); |
| } |
| |
| output_helper_.AddEdge(lhs, rhs); |
| } |
| |
| private: |
| OutputHelper& output_helper_; |
| }; |
| |
| // Called when the registered Match is found in the AST. |
| // |
| // The match includes: |
| // - A parmVarDecl or RTNode |
| // - Corresponding function declaration |
| // |
| // Using the function declaration, this: |
| // 1. Create a unique key for the current function: `current_key` |
| // 2. If the function has previous declarations or is overridden: |
| // - Retrieve previous declarations |
| // - Create keys for each previous declaration: `prev_key` |
| // - For each `prev_key`, add the pair (`current_key`, `prev_key`) to |
| // `fct_sig_pairs_` |
| // |
| // Using the parmVarDecl or RTNode, this: |
| // 1. Create a node |
| // 2. Insert the node into `fct_sig_nodes_[current_key]` |
| // |
| // At the end of the tool run for a given translation unit, edges between |
| // corresponding nodes of two adjacent function signatures are created. |
| class FunctionSignatureNodes : public MatchFinder::MatchCallback { |
| public: |
| explicit FunctionSignatureNodes( |
| std::map<std::string, std::set<Node>>& sig_nodes, |
| std::vector<std::pair<std::string, std::string>>& sig_pairs) |
| : fct_sig_nodes_(sig_nodes), fct_sig_pairs_(sig_pairs) {} |
| |
| FunctionSignatureNodes(const FunctionSignatureNodes&) = delete; |
| FunctionSignatureNodes& operator=(const FunctionSignatureNodes&) = delete; |
| |
| // Key here means a unique string generated from a function signature |
| std::string GetKey(const clang::FunctionDecl* fct_decl, |
| const clang::SourceManager& source_manager) { |
| auto name = fct_decl->getNameInfo().getName().getAsString(); |
| clang::SourceLocation start_loc = fct_decl->getBeginLoc(); |
| // This is done here to get the spelling loc of a functionDecl. This is |
| // needed to handle cases where the function is in a Macro Expansion. |
| clang::SourceRange replacement_range(source_manager.getFileLoc(start_loc), |
| source_manager.getFileLoc(start_loc)); |
| clang::tooling::Replacement replacement( |
| source_manager, clang::CharSourceRange::getCharRange(replacement_range), |
| name.c_str()); |
| llvm::StringRef file_path = replacement.getFilePath(); |
| |
| return llvm::formatv("r:::{0}:::{1}:::{2}:::{3}", file_path, |
| replacement.getOffset(), replacement.getLength(), |
| name.c_str()); |
| } |
| |
| Node getNodeFromMatchResult(const MatchFinder::MatchResult& result) { |
| if (auto* type_loc = |
| result.Nodes.getNodeAs<clang::PointerTypeLoc>("rhs_type_loc")) { |
| return getNodeFromPointerTypeLoc(type_loc, result); |
| } |
| |
| if (auto* raw_ptr_type_loc = |
| result.Nodes.getNodeAs<clang::TemplateSpecializationTypeLoc>( |
| "rhs_raw_ptr_type_loc")) { |
| return getNodeFromRawPtrTypeLoc(raw_ptr_type_loc, result); |
| } |
| |
| // "rhs_begin" match id could refer to a declaration that has a raw_ptr |
| // type. Those are handled in getNodeFromRawPtrTypeLoc. We |
| // should always check for a "rhs_raw_ptr_type_loc" match id and call |
| // getNodeFromRawPtrTypeLoc first. |
| if (auto* rhs_begin = |
| result.Nodes.getNodeAs<clang::DeclaratorDecl>("rhs_begin")) { |
| return getNodeFromDecl(rhs_begin, result); |
| } |
| |
| // Shouldn't get here. |
| assert(false); |
| } |
| |
| void run(const MatchFinder::MatchResult& result) override { |
| const clang::SourceManager& source_manager = *result.SourceManager; |
| const clang::FunctionDecl* fct_decl = |
| result.Nodes.getNodeAs<clang::FunctionDecl>("fct_decl"); |
| const clang::CXXMethodDecl* method_decl = |
| result.Nodes.getNodeAs<clang::CXXMethodDecl>("fct_decl"); |
| |
| const std::string current_key = GetKey(fct_decl, source_manager); |
| |
| // Function related by separate declaration and definition: |
| { |
| for (auto* previous_decl = fct_decl->getPreviousDecl(); previous_decl; |
| previous_decl = previous_decl->getPreviousDecl()) { |
| // TODO(356666773): The `previous_decl` might be part of third_party/. |
| // Then it won't be matched by the matcher. So only one of the pair |
| // would have a node. |
| const std::string previous_key = GetKey(previous_decl, source_manager); |
| fct_sig_pairs_.push_back({ |
| current_key, |
| previous_key, |
| }); |
| } |
| } |
| |
| // Function related by overriding: |
| if (method_decl) { |
| for (auto* m : method_decl->overridden_methods()) { |
| const std::string previous_key = GetKey(m, source_manager); |
| fct_sig_pairs_.push_back({ |
| current_key, |
| previous_key, |
| }); |
| } |
| } |
| |
| Node n = getNodeFromMatchResult(result); |
| fct_sig_nodes_[current_key].insert(n); |
| } |
| |
| private: |
| // Map a function signature, which is modeled as a string representing file |
| // location, to its matched graph nodes (RTNode and ParmVarDecl nodes). |
| // Note: `RTNode` represents a function return type node. |
| // In order to avoid relying on the order with which nodes are matched in |
| // the AST, and to guarantee that nodes are stored in the file declaration |
| // order, we use a `std::set<Node>` which sorts Nodes based on the replacement |
| // directive which contains the file offset of a given node. |
| // Note that a replacement directive has the following format: |
| // `r:::<file path>:::<offset>:::<length>:::<replacement text>` |
| // The order is important because at the end of a tool run on a |
| // translationUnit, for each pair of function signatures, we iterate |
| // concurrently through the two sets of Nodes creating edges between nodes |
| // that appear at the same index. |
| // AddEdge(first function's node1, second function's node1) |
| // AddEdge(first function's node2, second function's node2) |
| // and so on... |
| std::map<std::string, std::set<Node>>& fct_sig_nodes_; |
| |
| // Map related function signatures to each other, this is needed for |
| // functions |
| // with separate definition and declaration, and for overridden functions. |
| std::vector<std::pair<std::string, std::string>>& fct_sig_pairs_; |
| }; |
| |
| class Spanifier { |
| public: |
| explicit Spanifier( |
| MatchFinder& finder, |
| OutputHelper& output_helper, |
| std::map<std::string, std::set<Node>>& sig_nodes, |
| std::vector<std::pair<std::string, std::string>>& sig_pairs) |
| : match_finder_(finder), |
| potential_nodes_(output_helper), |
| fct_sig_nodes_(sig_nodes, sig_pairs) {} |
| |
| void addMatchers() { |
| auto exclusions = anyOf( |
| isExpansionInSystemHeader(), raw_ptr_plugin::isInExternCContext(), |
| raw_ptr_plugin::isInThirdPartyLocation(), |
| raw_ptr_plugin::isInGeneratedLocation(), |
| raw_ptr_plugin::ImplicitFieldDeclaration(), |
| raw_ptr_plugin::isInMacroLocation(), |
| hasAncestor(cxxRecordDecl(anyOf(hasName("raw_ptr"), hasName("span"))))); |
| |
| // Exclude literal strings as these need to become string_view |
| auto pointer_type = pointerType(pointee(qualType(unless(anyOf( |
| qualType(hasDeclaration( |
| cxxRecordDecl(raw_ptr_plugin::isAnonymousStructOrUnion()))), |
| hasUnqualifiedDesugaredType(anyOf(functionType(), memberPointerType())), |
| hasCanonicalType( |
| anyOf(asString("const char"), asString("const wchar_t"), |
| asString("const char8_t"), asString("const char16_t"), |
| asString("const char32_t")))))))); |
| |
| auto raw_ptr_type = qualType( |
| hasDeclaration(classTemplateSpecializationDecl(hasName("raw_ptr")))); |
| auto raw_ptr_type_loc = templateSpecializationTypeLoc(loc(raw_ptr_type)); |
| |
| auto lhs_type_loc = anyOf( |
| hasType(pointer_type), |
| allOf(hasType(raw_ptr_type), |
| hasDescendant(raw_ptr_type_loc.bind("lhs_raw_ptr_type_loc")))); |
| auto rhs_type_loc = anyOf( |
| hasType(pointer_type), |
| allOf(hasType(raw_ptr_type), |
| hasDescendant(raw_ptr_type_loc.bind("rhs_raw_ptr_type_loc")))); |
| |
| auto lhs_field = |
| fieldDecl(raw_ptr_plugin::hasExplicitFieldDecl(lhs_type_loc), |
| unless(exclusions), |
| unless(hasParent(cxxRecordDecl(hasName("raw_ptr"))))) |
| .bind("lhs_begin"); |
| auto rhs_field = |
| fieldDecl(raw_ptr_plugin::hasExplicitFieldDecl(rhs_type_loc), |
| unless(exclusions), |
| unless(hasParent(cxxRecordDecl(hasName("raw_ptr"))))) |
| .bind("rhs_begin"); |
| |
| auto lhs_var = varDecl(lhs_type_loc, unless(exclusions)).bind("lhs_begin"); |
| auto rhs_var = varDecl(rhs_type_loc, unless(exclusions)).bind("rhs_begin"); |
| |
| auto lhs_param = |
| parmVarDecl(lhs_type_loc, unless(exclusions)).bind("lhs_begin"); |
| |
| auto rhs_param = |
| parmVarDecl(rhs_type_loc, unless(exclusions)).bind("rhs_begin"); |
| |
| // Exclude functions returning literal strings as these need to become |
| // string_view. |
| auto exclude_literal_strings = |
| unless(returns(qualType(pointsTo(qualType(hasCanonicalType( |
| anyOf(asString("const char"), asString("const wchar_t"), |
| asString("const char8_t"), asString("const char16_t"), |
| asString("const char32_t")))))))); |
| |
| auto rhs_call_expr = callExpr(callee( |
| functionDecl(hasReturnTypeLoc(pointerTypeLoc().bind("rhs_type_loc")), |
| exclude_literal_strings, unless(exclusions)))); |
| |
| auto lhs_call_expr = callExpr(callee( |
| functionDecl(hasReturnTypeLoc(pointerTypeLoc().bind("lhs_type_loc")), |
| exclude_literal_strings, unless(exclusions)))); |
| |
| auto lhs_expr = expr(anyOf(declRefExpr(to(anyOf(lhs_var, lhs_param))), |
| memberExpr(member(lhs_field)), lhs_call_expr)); |
| |
| auto constant_array_exprs = |
| declRefExpr(to(anyOf(varDecl(hasType(constantArrayType())), |
| parmVarDecl(hasType(constantArrayType())), |
| fieldDecl(hasType(constantArrayType()))))); |
| |
| // Matches statements of the form: &buf[n] where buf is a container type |
| // (span, vector, array,...). |
| auto buff_address_from_container = unaryOperator( |
| hasOperatorName("&"), |
| hasUnaryOperand(cxxOperatorCallExpr(callee(functionDecl( |
| hasName("operator[]"), |
| hasParent(cxxRecordDecl(hasMethod(hasName("size"))))))))); |
| |
| // t* a = buf.data(); |
| auto member_data_call = |
| cxxMemberCallExpr( |
| callee(functionDecl( |
| hasName("data"), |
| hasParent(cxxRecordDecl(hasMethod(hasName("size")))))), |
| has(memberExpr().bind("data_member_expr"))) |
| .bind("member_data_call"); |
| |
| // Defines nodes that contain size information, these include: |
| // - nullptr => size is zero |
| // - calls to new/new[n] => size is 1/n |
| // - constant arrays buf[1024] => size is 1024 |
| // - calls to third_party functions that we can't rewrite (they should |
| // provide a size for the pointer returned) |
| // TODO(353710304): Consider handling functions taking in/out args ex: |
| // void alloc(**ptr); |
| // TODO(353710304): Consider making member_data_call and size_node mutually |
| // exclusive. We rely here on the ordering of expressions |
| // in the anyOf matcher to first match member_data_call |
| // which is a subset of size_node. |
| auto size_node_matcher = expr(anyOf( |
| member_data_call, |
| expr(anyOf(callExpr(callee(functionDecl( |
| hasReturnTypeLoc(pointerTypeLoc()), |
| anyOf(raw_ptr_plugin::isInThirdPartyLocation(), |
| isExpansionInSystemHeader(), |
| raw_ptr_plugin::isInExternCContext())))), |
| cxxNullPtrLiteralExpr().bind("nullptr_expr"), cxxNewExpr(), |
| constant_array_exprs, buff_address_from_container)) |
| .bind("size_node"))); |
| |
| auto rhs_expr = |
| expr(ignoringParenCasts(anyOf( |
| declRefExpr(to(anyOf(rhs_var, rhs_param))).bind("declRefExpr"), |
| memberExpr(member(rhs_field)).bind("memberExpr"), |
| rhs_call_expr.bind("callExpr")))) |
| .bind("rhs_expr"); |
| |
| auto get_calls_on_raw_ptr = cxxMemberCallExpr( |
| callee(cxxMethodDecl(hasName("get"), ofClass(hasName("raw_ptr")))), |
| has(memberExpr(has(rhs_expr)))); |
| |
| auto rhs_exprs_without_size_nodes = |
| expr(ignoringParenCasts(anyOf( |
| rhs_expr, |
| binaryOperation(hasOperatorName("+"), hasLHS(rhs_expr), |
| hasRHS(expr().bind("bin_op_rhs"))) |
| .bind("binaryOperator"), |
| unaryOperator(hasOperatorName("++"), hasUnaryOperand(rhs_expr)) |
| .bind("unaryOperator"), |
| cxxOperatorCallExpr( |
| callee(cxxMethodDecl(ofClass(hasName("raw_ptr")))), |
| hasOperatorName("++"), hasArgument(0, rhs_expr)) |
| .bind("raw_ptr_operator++"), |
| get_calls_on_raw_ptr))) |
| .bind("span_frontier"); |
| |
| // This represents the forms under which an expr could appear on the right |
| // hand side of an assignment operation, var construction, or an expr passed |
| // as callExpr argument. Examples: |
| // rhs_expr, rhs_expr++, ++rhs_expr, rhs_expr + n, cast(rhs_expr); |
| auto rhs_expr_variations = expr(ignoringParenCasts( |
| anyOf(size_node_matcher, rhs_exprs_without_size_nodes))); |
| |
| auto lhs_expr_variations = expr(ignoringParenCasts(lhs_expr)); |
| |
| // Expressions used to decide the pointer is used as a buffer include: |
| // expr[n], expr++, ++expr, expr + n, expr += n |
| auto buffer_expr1 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| expr(ignoringParenCasts(anyOf( |
| arraySubscriptExpr(hasLHS(lhs_expr_variations)), |
| binaryOperation( |
| anyOf(hasOperatorName("+="), hasOperatorName("+")), |
| hasLHS(lhs_expr_variations)), |
| unaryOperator(hasOperatorName("++"), |
| hasUnaryOperand(lhs_expr_variations)), |
| // for raw_ptr ops |
| cxxOperatorCallExpr(anyOf(hasOverloadedOperatorName("[]"), |
| hasOperatorName("++")), |
| hasArgument(0, lhs_expr_variations))))) |
| .bind("buffer_expr")); |
| match_finder_.addMatcher(buffer_expr1, &potential_nodes_); |
| |
| auto buffer_expr2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| expr(ignoringParenCasts(arraySubscriptExpr(hasLHS(declRefExpr(to( |
| varDecl(hasType(arrayType().bind("array_type")), |
| hasTypeLoc( |
| loc(qualType(anything())).bind("array_type_loc")), |
| unless(exclusions), unless(hasExternalFormalLinkage())) |
| .bind("array_variable"))))))) |
| .bind("buffer_expr")); |
| match_finder_.addMatcher(buffer_expr2, &potential_nodes_); |
| |
| auto deref_expression = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| expr(anyOf(unaryOperator(hasOperatorName("*"), |
| hasUnaryOperand(rhs_exprs_without_size_nodes)), |
| cxxOperatorCallExpr( |
| hasOverloadedOperatorName("*"), |
| hasArgument(0, rhs_exprs_without_size_nodes))), |
| unless(raw_ptr_plugin::isInMacroLocation())) |
| .bind("deref_expr")); |
| match_finder_.addMatcher(deref_expression, &potential_nodes_); |
| |
| // This is needed to remove the `.get()` call on raw_ptr from rewritten |
| // expressions. Example: raw_ptr<T> member; auto* temp = member.get(); if |
| // member's type is rewritten to a raw_span<T>, this matcher is used to |
| // remove the `.get()` call. |
| auto raw_ptr_get_call = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| cxxMemberCallExpr( |
| callee(cxxMethodDecl(hasName("get"), ofClass(hasName("raw_ptr")))), |
| has(memberExpr(has(rhs_expr)).bind("get_member_expr"))) |
| .bind("raw_ptr_get_call")); |
| match_finder_.addMatcher(raw_ptr_get_call, &potential_nodes_); |
| |
| // When passing now-span buffers to third_party functions as parameters, we |
| // need to add `.data()` to extract the pointer and keep things compiling. |
| auto passing_a_buffer_to_external_functions = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| callExpr(callee(functionDecl( |
| anyOf(isExpansionInSystemHeader(), |
| raw_ptr_plugin::isInExternCContext(), |
| raw_ptr_plugin::isInThirdPartyLocation()))), |
| forEachArgumentWithParam( |
| expr(rhs_expr_variations, |
| unless(anyOf( |
| castExpr(hasSourceExpression(size_node_matcher)), |
| size_node_matcher))) |
| .bind("passing_a_buffer_to_third_party_function"), |
| parmVarDecl()))); |
| match_finder_.addMatcher(passing_a_buffer_to_external_functions, |
| &potential_nodes_); |
| |
| // Handles assignment: |
| // a = b; |
| // a = fct(); |
| // a = reinterpret_cast<>(b); |
| // a = (cond) ? expr1 : expr2; |
| auto assignement_relationship = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| binaryOperation(hasOperatorName("="), |
| hasOperands(lhs_expr_variations, |
| anyOf(rhs_expr_variations, |
| conditionalOperator(hasTrueExpression( |
| rhs_expr_variations)))), |
| unless(isExpansionInSystemHeader()))); |
| match_finder_.addMatcher(assignement_relationship, &potential_nodes_); |
| |
| // Creates the edge from lhs to false_expr in a ternary conditional |
| // operator. |
| auto assignement_relationship2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| binaryOperation(hasOperatorName("="), |
| hasOperands(lhs_expr_variations, |
| conditionalOperator(hasFalseExpression( |
| rhs_expr_variations))), |
| unless(isExpansionInSystemHeader()))); |
| match_finder_.addMatcher(assignement_relationship2, &potential_nodes_); |
| |
| // Supports: |
| // T* temp = member; |
| // T* temp = init(); |
| // T* temp = (cond) ? expr1 : expr2; |
| // T* temp = reinterpret_cast<>(b); |
| auto var_construction = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| varDecl( |
| lhs_var, |
| has(expr(anyOf( |
| rhs_expr_variations, |
| conditionalOperator(hasTrueExpression(rhs_expr_variations)), |
| cxxConstructExpr(has(expr(anyOf( |
| rhs_expr_variations, conditionalOperator(hasTrueExpression( |
| rhs_expr_variations))))))))), |
| unless(isExpansionInSystemHeader()))); |
| match_finder_.addMatcher(var_construction, &potential_nodes_); |
| |
| // Creates the edge from lhs to false_expr in a ternary conditional |
| // operator. |
| auto var_construction2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| varDecl( |
| lhs_var, |
| has(expr(anyOf( |
| conditionalOperator(hasFalseExpression(rhs_expr_variations)), |
| cxxConstructExpr(has(expr(conditionalOperator( |
| hasFalseExpression(rhs_expr_variations)))))))), |
| unless(isExpansionInSystemHeader()))); |
| match_finder_.addMatcher(var_construction2, &potential_nodes_); |
| |
| // Supports: |
| // return member; |
| // return fct(); |
| // return reinterpret_cast(expr); |
| // return (cond) ? expr1 : expr2; |
| auto returned_var_or_member = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| returnStmt( |
| hasReturnValue(expr(anyOf( |
| rhs_expr_variations, |
| conditionalOperator(hasTrueExpression(rhs_expr_variations))))), |
| unless(isExpansionInSystemHeader()), |
| forFunction(functionDecl( |
| hasReturnTypeLoc(pointerTypeLoc().bind("lhs_type_loc")), |
| unless(exclusions)))) |
| .bind("lhs_stmt")); |
| match_finder_.addMatcher(returned_var_or_member, &potential_nodes_); |
| |
| // Creates the edge from lhs to false_expr in a ternary conditional |
| // operator. |
| auto returned_var_or_member2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| returnStmt(hasReturnValue(conditionalOperator( |
| hasFalseExpression(rhs_expr_variations))), |
| unless(isExpansionInSystemHeader()), |
| forFunction(functionDecl( |
| hasReturnTypeLoc(pointerTypeLoc().bind("lhs_type_loc")), |
| unless(exclusions)))) |
| .bind("lhs_stmt")); |
| match_finder_.addMatcher(returned_var_or_member2, &potential_nodes_); |
| |
| // Handles expressions of the form member(arg). |
| // A(const T* arg): member(arg){} |
| // member(init()); |
| // member(fct()); |
| auto ctor_initilizer = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| cxxCtorInitializer(withInitializer(anyOf( |
| cxxConstructExpr(has(expr(rhs_expr_variations))), |
| rhs_expr_variations)), |
| forField(lhs_field))); |
| |
| match_finder_.addMatcher(ctor_initilizer, &potential_nodes_); |
| |
| // Supports: |
| // S* temp; |
| // Obj o(temp); Obj o{temp}; |
| // This links temp to the parameter in Obj's constructor. |
| auto var_passed_in_constructor = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| cxxConstructExpr(forEachArgumentWithParam( |
| expr(anyOf( |
| rhs_expr_variations, |
| conditionalOperator(hasTrueExpression(rhs_expr_variations)))), |
| lhs_param))); |
| match_finder_.addMatcher(var_passed_in_constructor, &potential_nodes_); |
| |
| // Creates the edge from lhs to false_expr in a ternary conditional |
| // operator. |
| auto var_passed_in_constructor2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| cxxConstructExpr(forEachArgumentWithParam( |
| expr(conditionalOperator(hasFalseExpression(rhs_expr_variations))), |
| lhs_param))); |
| match_finder_.addMatcher(var_passed_in_constructor2, &potential_nodes_); |
| |
| // handles Obj o{temp} when Obj has no constructor. |
| // This creates a link between the expr and the underlying field. |
| auto var_passed_in_initlistExpr = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| initListExpr(raw_ptr_plugin::forEachInitExprWithFieldDecl( |
| expr(anyOf( |
| rhs_expr_variations, |
| conditionalOperator(hasTrueExpression(rhs_expr_variations)))), |
| lhs_field))); |
| match_finder_.addMatcher(var_passed_in_initlistExpr, &potential_nodes_); |
| |
| auto var_passed_in_initlistExpr2 = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| initListExpr(raw_ptr_plugin::forEachInitExprWithFieldDecl( |
| expr(conditionalOperator(hasFalseExpression(rhs_expr_variations))), |
| lhs_field))); |
| match_finder_.addMatcher(var_passed_in_initlistExpr2, &potential_nodes_); |
| |
| // Link var/field passed as function arguments to function parameter |
| // This handles func(var/member/param), func(func2()) |
| // cxxOpCallExprs excluded here since operator= can be invoked as a call |
| // expr for classes/structs. |
| auto call_expr = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| callExpr(forEachArgumentWithParam( |
| expr(anyOf(rhs_expr_variations, |
| conditionalOperator( |
| hasTrueExpression(rhs_expr_variations)))), |
| lhs_param), |
| unless(isExpansionInSystemHeader()), |
| unless(cxxOperatorCallExpr(hasOperatorName("="))))); |
| match_finder_.addMatcher(call_expr, &potential_nodes_); |
| |
| // Map function declaration signature to function definition signature; |
| // This is problematic in the case of callbacks defined in function. |
| auto fct_decls_params = |
| traverse(clang::TK_IgnoreUnlessSpelledInSource, |
| functionDecl(forEachParmVarDecl(rhs_param), unless(exclusions)) |
| .bind("fct_decl")); |
| match_finder_.addMatcher(fct_decls_params, &fct_sig_nodes_); |
| |
| auto fct_decls_returns = traverse( |
| clang::TK_IgnoreUnlessSpelledInSource, |
| functionDecl(hasReturnTypeLoc(pointerTypeLoc().bind("rhs_type_loc")), |
| unless(exclusions)) |
| .bind("fct_decl")); |
| match_finder_.addMatcher(fct_decls_returns, &fct_sig_nodes_); |
| } |
| |
| private: |
| MatchFinder& match_finder_; |
| PotentialNodes potential_nodes_; |
| FunctionSignatureNodes fct_sig_nodes_; |
| }; |
| |
| } // namespace |
| |
| int main(int argc, const char* argv[]) { |
| llvm::InitializeNativeTarget(); |
| llvm::InitializeNativeTargetAsmParser(); |
| llvm::cl::OptionCategory category( |
| "spanifier: changes" |
| " 1- |T* var| to |base::span<T> var|." |
| " 2- |raw_ptr<T> var| to |base::raw_span<T> var|"); |
| |
| llvm::Expected<clang::tooling::CommonOptionsParser> options = |
| clang::tooling::CommonOptionsParser::create(argc, argv, category); |
| assert(static_cast<bool>(options)); // Should not return an error. |
| clang::tooling::ClangTool tool(options->getCompilations(), |
| options->getSourcePathList()); |
| |
| // Map a function signature, which is modeled as a string representing file |
| // location, to it's graph nodes (RTNode and ParmVarDecl nodes). |
| // RTNode represents a function return type. |
| std::map<std::string, std::set<Node>> fct_sig_nodes; |
| // Map related function signatures to each other, this is needed for functions |
| // with separate definition and declaration, and for overridden functions. |
| std::vector<std::pair<std::string, std::string>> fct_sig_pairs; |
| OutputHelper output_helper; |
| MatchFinder match_finder; |
| Spanifier rewriter(match_finder, output_helper, fct_sig_nodes, fct_sig_pairs); |
| rewriter.addMatchers(); |
| |
| // Prepare and run the tool. |
| std::unique_ptr<clang::tooling::FrontendActionFactory> factory = |
| clang::tooling::newFrontendActionFactory(&match_finder); |
| int result = tool.run(factory.get()); |
| |
| // Establish connections between corresponding parameters of adjacent function |
| // signatures. Two functions are considered adjacent if one overrides the |
| // other or if one is a function declaration while the other is its |
| // corresponding definition. |
| for (auto& [l, r] : fct_sig_pairs) { |
| // By construction, only the left side of the pair is guaranteed to have a |
| // matching set of nodes. |
| assert(fct_sig_nodes.find(l) != fct_sig_nodes.end()); |
| |
| // TODO(356666773): Handle the case where both side of the pair haven't |
| // been matched. This happens when a function is declared in third_party/, |
| // but implemented in first party. |
| if (fct_sig_nodes.find(r) == fct_sig_nodes.end()) { |
| continue; |
| } |
| |
| auto& s1 = fct_sig_nodes[l]; |
| auto& s2 = fct_sig_nodes[r]; |
| assert(s1.size() == s2.size()); |
| auto i1 = s1.begin(); |
| auto i2 = s2.begin(); |
| while (i1 != s1.end()) { |
| output_helper.AddEdge(*i1, *i2); |
| output_helper.AddEdge(*i2, *i1); |
| i1++; |
| i2++; |
| } |
| } |
| |
| // Emits the list of edges. |
| output_helper.Emit(); |
| return result; |
| } |