blob: 421a3165a2d9f811ceffb3ce379049cd4ec8ff02 [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <cstdint>
#include <memory>
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/Expr.h"
#include "clang/AST/OperationKinds.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/NoopLattice.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Tooling/Transformer/Stencil.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
// This clang plugin check for iterators used after they have been
// invalidated.
//
// Pre-existing bugs found: https://crbug.com/1421293
namespace {
const char kInvalidIteratorUsage[] =
"[iterator-checker] Potentially invalid iterator used.";
const char kInvalidIteratorComparison[] =
"[iterator-checker] Potentially invalid iterator comparison.";
// To understand C++ code, we need a way to encode what is an iterator and what
// are the functions that might invalidate them.
//
// The Clang frontend supports several source-level annotations in the form of
// GCC-style attributes and pragmas that can help make using the Clang Static
// Analyzer useful. We aim to provide support for those annotations. For now, we
// hard code those for "known" interesting classes.
// TODO(https://crbug.com/1455371) Support source-level annotations.
enum Annotation : uint8_t {
kNone = 0,
// Annotate function returning an iterator.
kReturnIterator = 1 << 0,
// Annotate function returning an "end" iterator.
// The distinction with `kReturnIterator` is important because we need to
// special case its iterator creation.
kReturnEndIterator = 1 << 1,
// Annotate function returning a pair of iterators.
// TODO(https://crbug.com/1455371) Not yet implemented.
kReturnIteratorPair = 1 << 2,
// Annotate function invalidating the iterator in its arguments.
kInvalidateArgs = 1 << 3,
// Annotate function invalidating every iterators.
kInvalidateAll = 1 << 4,
};
static llvm::DenseMap<llvm::StringRef, uint8_t> g_functions_annotations = {
{"std::begin", Annotation::kReturnIterator},
{"std::cbegin", Annotation::kReturnIterator},
{"std::end", Annotation::kReturnEndIterator},
{"std::cend", Annotation::kReturnEndIterator},
{"std::next", Annotation::kReturnIterator},
{"std::prev", Annotation::kReturnIterator},
{"std::find", Annotation::kReturnIterator},
// TODO(https://crbug.com/1455371) Add additional functions.
};
static llvm::DenseMap<llvm::StringRef, llvm::DenseMap<llvm::StringRef, uint8_t>>
g_member_function_annotations = {
{
"std::vector",
{
{"append_range", Annotation::kInvalidateAll},
{"assign", Annotation::kInvalidateAll},
{"assign_range", Annotation::kInvalidateAll},
{"back", Annotation::kNone},
{"begin", Annotation::kReturnIterator},
{"capacity", Annotation::kNone},
{"cbegin", Annotation::kReturnIterator},
{"cend", Annotation::kReturnEndIterator},
{"clear", Annotation::kInvalidateAll},
{"crbegin", Annotation::kReturnIterator},
{"crend", Annotation::kReturnIterator},
{"data", Annotation::kNone},
{"emplace",
Annotation::kInvalidateAll | Annotation::kInvalidateAll},
{"emplace_back", Annotation::kInvalidateAll},
{"empty", Annotation::kNone},
{"end", Annotation::kReturnEndIterator},
{"erase",
Annotation::kReturnIterator | Annotation::kInvalidateAll},
{"front", Annotation::kNone},
{"insert",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"insert_range",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"max_size", Annotation::kNone},
{"pop_back", Annotation::kInvalidateAll},
{"push_back", Annotation::kInvalidateAll},
{"rbegin", Annotation::kReturnIterator},
{"rend", Annotation::kReturnIterator},
{"reserve", Annotation::kInvalidateAll},
{"resize", Annotation::kInvalidateAll},
{"shrink_to_fit", Annotation::kInvalidateAll},
{"size", Annotation::kNone},
{"swap", Annotation::kNone},
},
},
{
"std::unordered_set",
{
{"begin", Annotation::kReturnIterator},
{"cbegin", Annotation::kReturnIterator},
{"end", Annotation::kReturnEndIterator},
{"cend", Annotation::kReturnEndIterator},
{"clear", Annotation::kInvalidateAll},
{"insert",
Annotation::kInvalidateAll | Annotation::kReturnIteratorPair},
{"emplace",
Annotation::kInvalidateAll | Annotation::kReturnIteratorPair},
{"emplace_hint",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"erase",
Annotation::kInvalidateArgs | Annotation::kReturnIterator},
{"extract", Annotation::kInvalidateArgs},
{"find", Annotation::kReturnIterator},
// TODO(https://crbug.com/1455371) Add additional functions.
},
},
{
"WTF::Vector",
{
{"begin", Annotation::kReturnIterator},
{"rbegin", Annotation::kReturnIterator},
{"end", Annotation::kReturnEndIterator},
{"rend", Annotation::kReturnEndIterator},
{"clear", Annotation::kInvalidateAll},
{"shrink_to_fit", Annotation::kInvalidateAll},
{"push_back", Annotation::kInvalidateAll},
{"emplace_back", Annotation::kInvalidateAll},
{"insert", Annotation::kInvalidateAll},
{"InsertAt", Annotation::kInvalidateAll},
{"InsertVector", Annotation::kInvalidateAll},
{"push_front", Annotation::kInvalidateAll},
{"PrependVector", Annotation::kInvalidateAll},
{"EraseAt", Annotation::kInvalidateAll},
{"erase",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
// `pop_back` invalidates only the iterator pointed to the last
// element, but we have no way to track it.
{"pop_back", Annotation::kNone},
// TODO(https://crbug.com/1455371) Add additional functions.
},
},
{
"std::deque",
{
{"begin", Annotation::kReturnIterator},
{"cbegin", Annotation::kReturnIterator},
{"rbegin", Annotation::kReturnIterator},
{"end", Annotation::kReturnEndIterator},
{"cend", Annotation::kReturnEndIterator},
{"rend", Annotation::kReturnEndIterator},
{"clear", Annotation::kInvalidateAll},
{"shrink_to_fit", Annotation::kInvalidateAll},
{"insert",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"emplace",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"erase",
Annotation::kInvalidateAll | Annotation::kReturnIterator},
{"push_back", Annotation::kInvalidateAll},
{"emplace_back", Annotation::kInvalidateAll},
{"push_front", Annotation::kInvalidateAll},
{"emplace_front", Annotation::kInvalidateAll},
// TODO(https://crbug.com/1455371) Add additional functions.
},
},
};
llvm::raw_ostream& DebugStream() {
// Updates to llvm::outs() to get debugs logs.
return llvm::nulls();
}
llvm::raw_ostream& InfoStream() {
// Updates to llvm::outs() to get info logs.
return llvm::nulls();
}
// In DataflowAnalysis, we associate to every C++ prvalue:
//
// - A RecordStorageLocation:
// This will be used to reference the actual location of the values being used
// during the analysis. For example, in `auto it = std::begin(cont)`, it will
// be assigned a RecordStorageLocation.
//
// - Some fields:
// Those are just one-to-one mapping with the actual record type being
// modeled.
//
// - Some synthetic fields:
// Those are the essence of how dataflow analysis work. Those fields are not
// actually mapped to existing fields in the record type, but are ones that we
// use in order to perform the analysis. For instance, in this analysis, those
// fields are:
// - `is_valid` - This field is used to store the iterator validity.
// - `is_end` - Stores whether the current iterator points to the end
// iterator.
//
// We also keep track of the `iterator` -> `container` mapping in order to
// invalidate iterators when necessary.
clang::dataflow::BoolValue* GetSyntheticFieldWithname(
llvm::StringRef name,
const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
auto* record = clang::cast_or_null<clang::dataflow::RecordValue>(
const_cast<clang::dataflow::Value*>(&iterator));
auto& loc = record->getLoc();
auto& field_loc = loc.getSyntheticField(name);
return clang::cast_or_null<clang::dataflow::BoolValue>(
env.getValue(field_loc));
}
clang::dataflow::BoolValue* GetIsValid(const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
return GetSyntheticFieldWithname("is_valid", env, iterator);
}
clang::dataflow::BoolValue* GetIsEnd(const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
return GetSyntheticFieldWithname("is_end", env, iterator);
}
void SetSyntheticFieldWithName(llvm::StringRef name,
clang::dataflow::Environment& env,
const clang::dataflow::Value& val,
clang::dataflow::BoolValue& res) {
auto* record = clang::cast_or_null<clang::dataflow::RecordValue>(
const_cast<clang::dataflow::Value*>(&val));
auto& loc = record->getLoc();
auto& field_loc = loc.getSyntheticField(name);
env.setValue(field_loc, res);
}
void SetIsValid(clang::dataflow::Environment& env,
const clang::dataflow::Value& val,
clang::dataflow::BoolValue& res) {
SetSyntheticFieldWithName("is_valid", env, val, res);
}
void SetIsEnd(clang::dataflow::Environment& env,
const clang::dataflow::Value& val,
clang::dataflow::BoolValue& res) {
SetSyntheticFieldWithName("is_end", env, val, res);
}
const clang::dataflow::Formula& ForceBoolValue(
clang::dataflow::Environment& env,
const clang::Expr& expr) {
auto* value = env.get<clang::dataflow::BoolValue>(expr);
if (value != nullptr) {
return value->formula();
}
value = &env.makeAtomicBoolValue();
env.setValue(expr, *value);
return value->formula();
}
// We don't use DataflowAnalysis lattices. Hence why the NoopLattice. Instead,
// we use the WatchedLiteralsSolver and populate different `Environment` with
// `Values`. The DataFlowAnalysis will iterate up until it can't make new
// deductions:
// - The `transfer` function updates an environment after executing one more
// instructions.
// - The `merge` function merge together the environments from two code
// diverging code paths. For instance the `if` and `for` loop.
class InvalidIteratorAnalysis
: public clang::dataflow::DataflowAnalysis<InvalidIteratorAnalysis,
clang::dataflow::NoopLattice> {
public:
InvalidIteratorAnalysis(const clang::FunctionDecl* func,
clang::DiagnosticsEngine& diagnostic)
: DataflowAnalysis(func->getASTContext()), diagnostic_(diagnostic) {}
// Used by DataflowAnalysis template.
clang::dataflow::NoopLattice initialElement() const {
return clang::dataflow::NoopLattice();
}
// Used by DataflowAnalysis template.
void transfer(const clang::CFGElement& elt,
clang::dataflow::NoopLattice& state,
clang::dataflow::Environment& env) {
if (auto cfg_stmt = elt.getAs<clang::CFGStmt>()) {
Transfer(*cfg_stmt->getStmt(), env);
}
}
// Used by DataflowAnalysis template.
bool merge(clang::QualType type,
const clang::dataflow::Value& val1,
const clang::dataflow::Environment& env1,
const clang::dataflow::Value& val2,
const clang::dataflow::Environment& env2,
clang::dataflow::Value& merged_val,
clang::dataflow::Environment& merged_env) final {
if (!IsIterator(type)) {
return true;
}
auto* container1 = GetContainerValue(env1, val1);
auto* container2 = GetContainerValue(env2, val2);
DebugStream() << "HERE: " << DebugString(env1, val1);
DebugStream() << "HERE: " << DebugString(env2, val2);
if (container1 != container2) {
// See tests/iterator-with-multiple-container.cpp
// TODO(https://crbug.com/1455371) Ban iterator associated with multiple
// containers.
UnsetContainerValue(merged_env, merged_val);
return true;
}
SetContainerValue(merged_env, merged_val, *container1);
return true;
}
llvm::StringMap<clang::QualType> GetSyntheticFields(clang::QualType Type) {
return llvm::StringMap<clang::QualType>{
{"is_valid", getASTContext().BoolTy},
{"is_end", getASTContext().BoolTy},
};
}
private:
// Stmt: https://clang.llvm.org/doxygen/classclang_1_1Stmt.html
void Transfer(const clang::Stmt& stmt, clang::dataflow::Environment& env) {
if (auto* decl_stmt = clang::dyn_cast<clang::DeclStmt>(&stmt)) {
Transfer(*decl_stmt, env);
return;
}
if (auto* value_stmt = clang::dyn_cast<clang::ValueStmt>(&stmt)) {
Transfer(*value_stmt, env);
return;
}
}
// DeclStmt: https://clang.llvm.org/doxygen/classclang_1_1DeclStmt.html
void Transfer(const clang::DeclStmt& declaration_statement,
clang::dataflow::Environment& env) {
for (auto* decl : declaration_statement.decls()) {
if (auto* var_decl = clang::dyn_cast<clang::VarDecl>(decl)) {
Transfer(*var_decl, env);
}
}
}
// VarDecl: https://clang.llvm.org/doxygen/classclang_1_1VarDecl.html
void Transfer(const clang::VarDecl& var_decl,
clang::dataflow::Environment& env) {}
// ValueStmt: https://clang.llvm.org/doxygen/classclang_1_1ValueStmt.html
void Transfer(const clang::ValueStmt& value_stmt,
clang::dataflow::Environment& env) {
if (auto* expr = clang::dyn_cast<clang::Expr>(&value_stmt)) {
Transfer(*expr, env);
}
}
// Expr: https://clang.llvm.org/doxygen/classclang_1_1Expr.html
void Transfer(const clang::Expr& expr, clang::dataflow::Environment& env) {
if (auto* call_expr = clang::dyn_cast<clang::CallExpr>(&expr)) {
Transfer(*call_expr, env);
return;
}
if (auto* ctor = clang::dyn_cast<clang::CXXConstructExpr>(&expr)) {
Transfer(*ctor, env);
return;
}
if (auto* cast_expr = clang::dyn_cast<clang::CastExpr>(&expr)) {
Transfer(*cast_expr, env);
return;
}
// TODO(https://crbug.com/1455371): Add support for operator[]
// (ArraySubscriptExpr)
}
void Transfer(const clang::CXXConstructExpr& expr,
clang::dataflow::Environment& env) {
if (!IsIterator(expr.getType().getCanonicalType())) {
return;
}
const clang::CXXConstructorDecl* ctor = expr.getConstructor();
assert(ctor != nullptr);
if (ctor->isCopyOrMoveConstructor()) {
auto* it = UnwrapAsIterator(expr.getArg(0), env);
assert(it);
env.setValue(expr, *it);
}
}
// CallExpr: https://clang.llvm.org/doxygen/classclang_1_1CallExpr.html
void Transfer(const clang::CallExpr& callexpr,
clang::dataflow::Environment& env) {
TransferCallExprCommon(callexpr, env);
if (auto* expr = clang::dyn_cast<clang::CXXMemberCallExpr>(&callexpr)) {
Transfer(*expr, env);
return;
}
if (auto* expr = clang::dyn_cast<clang::CXXOperatorCallExpr>(&callexpr)) {
Transfer(*expr, env);
return;
}
}
void TransferCallExprCommon(const clang::CallExpr& expr,
clang::dataflow::Environment& env) {
auto* callee = expr.getDirectCallee();
if (!callee) {
return;
}
// If the function is known to return an iterator and we can associate it
// with a known container, then we deduce the resulting expression is itself
// an iterator:
std::string callee_name = callee->getQualifiedNameAsString();
auto it = g_functions_annotations.find(callee_name);
if (it == g_functions_annotations.end()) {
return;
}
if (!(it->second & Annotation::kReturnIterator) &&
!(it->second & Annotation::kReturnEndIterator)) {
return;
}
bool is_end = (it->second & Annotation::kReturnEndIterator) != 0;
clang::dataflow::Value* iterator = UnwrapAsIterator(expr.getArg(0), env);
clang::dataflow::Value* container = iterator
? GetContainerValue(env, *iterator)
: env.getValue(*expr.getArg(0));
if (!iterator && !container) {
return;
}
TransferCallReturningIterator(
&expr, *container,
is_end ? env.getBoolLiteralValue(false) : env.makeAtomicBoolValue(),
is_end ? env.getBoolLiteralValue(true) : env.makeAtomicBoolValue(),
env);
}
void TransferCallReturningIterator(const clang::CallExpr* expr,
clang::dataflow::Value& container,
clang::dataflow::BoolValue& is_valid,
clang::dataflow::BoolValue& is_end,
clang::dataflow::Environment& env) {
clang::dataflow::RecordStorageLocation* loc = nullptr;
if (expr->isPRValue()) {
loc = &env.getResultObjectLocation(*expr);
} else {
loc = env.get<clang::dataflow::RecordStorageLocation>(*expr);
if (loc == nullptr) {
loc = &clang::cast<clang::dataflow::RecordStorageLocation>(
env.createStorageLocation(*expr));
env.setStorageLocation(*expr, *loc);
}
}
assert(loc);
auto& value = CreateIteratorValue(loc->getType(), env, *loc, container,
is_valid, is_end);
if (expr->isPRValue()) {
env.setValue(*expr, value);
}
}
// CXXMemberCallExpr:
// https://clang.llvm.org/doxygen/classclang_1_1CXXMemberCallExpr.html
void Transfer(const clang::CXXMemberCallExpr& callexpr,
clang::dataflow::Environment& env) {
auto* callee = callexpr.getDirectCallee();
if (!callee) {
return;
}
const std::string callee_type = clang::cast<clang::CXXMethodDecl>(callee)
->getParent()
->getQualifiedNameAsString();
auto container_annotations =
g_member_function_annotations.find(callee_type);
if (container_annotations == g_member_function_annotations.end()) {
return;
}
const std::string callee_name = callee->getNameAsString();
auto method_annotation = container_annotations->second.find(callee_name);
if (method_annotation == container_annotations->second.end()) {
return;
}
const uint8_t annotation = method_annotation->second;
assert(!(annotation & Annotation::kReturnIterator) ||
!(annotation & Annotation::kReturnIteratorPair));
auto* container = env.getValue(*callexpr.getImplicitObjectArgument());
if (!container) {
return;
}
if (annotation & Annotation::kReturnIterator ||
annotation & Annotation::kReturnEndIterator) {
TransferCallReturningIterator(&callexpr, *container,
annotation & Annotation::kReturnEndIterator
? env.getBoolLiteralValue(false)
: env.makeAtomicBoolValue(),
annotation & Annotation::kReturnEndIterator
? env.getBoolLiteralValue(true)
: env.makeAtomicBoolValue(),
env);
}
if (annotation & Annotation::kReturnIteratorPair) {
// TODO(https://crbug.com/1455371): Iterator pair are not yet supported.
}
if (annotation & Annotation::kInvalidateArgs) {
bool found_iterator = false;
// TODO(https://crbug.com/1455371): Invalid every arguments.
for (unsigned i = 0; i < callexpr.getNumArgs(); i++) {
if (auto* iterator = UnwrapAsIterator(callexpr.getArg(i), env)) {
InfoStream() << "INVALIDATING ONE: " << DebugString(env, *iterator)
<< '\n';
InvalidateIterator(env, *iterator);
found_iterator = true;
}
}
if (!found_iterator) {
// If we cannot get the iterator from the argument, then let's
// invalidate everything instead:
InfoStream() << "INVALIDATING MANY: Container: " << container << '\n';
InvalidateContainer(env, *container);
return;
}
}
if (annotation & Annotation::kInvalidateAll) {
InfoStream() << "INVALIDATING MANY: Container: " << container << '\n';
InvalidateContainer(env, *container);
return;
}
}
// CXXOperatorCallExpr:
// https://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html
void Transfer(const clang::CXXOperatorCallExpr& expr,
clang::dataflow::Environment& env) {
// Those are operations of the form:
// - `*it`
// - `it->`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_Star ||
expr.getOperator() == clang::OverloadedOperatorKind::OO_Arrow) {
assert(expr.getNumArgs() >= 1);
TransferExpressionAccessForDeref(expr.getArg(0), env);
return;
}
// Those are operations of the form:
// - `it += [integer]`
// - `it -= [integer]`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_PlusEqual ||
expr.getOperator() == clang::OverloadedOperatorKind::OO_MinusEqual) {
assert(expr.getNumArgs() == 2);
// Once all the features are developed, this should really be a
// TransferExpressionAccessForDeref here, but the current error rate
// would be too high as for now.
TransferExpressionAccessForCheck(expr.getArg(0), env);
// The result of this operation is another iterator.
if (auto* iterator = UnwrapAsIterator(expr.getArg(0), env)) {
CloneIterator(&expr, *iterator, env);
}
return;
}
// Those are operations of the form:
// - `it + [integer]`
// - `it - [integer]`
// - `[integer] + it`
// - `[integer] - it`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_Plus ||
expr.getOperator() == clang::OverloadedOperatorKind::OO_Minus) {
// This can happen for classes representing numerical values for example.
// e.g. const Decimal d = 3; -d;
if (expr.getNumArgs() < 2) {
return;
}
// Once all the features are developed, this should really be a
// TransferExpressionAccessForDeref here, but the current error rate
// would be too high as for now.
TransferExpressionAccessForCheck(expr.getArg(0), env);
TransferExpressionAccessForCheck(expr.getArg(1), env);
// Adding/Substracing one iterator with an integer results in a new
// iterator expression of the same type.
auto deduce_return_value = [&](const clang::Expr* a,
const clang::Expr* b) {
clang::dataflow::Value* iterator = UnwrapAsIterator(a, env);
if (!iterator || !b->getType()->isIntegerType()) {
return;
}
CloneIterator(&expr, *iterator, env);
};
deduce_return_value(expr.getArg(0), expr.getArg(1));
deduce_return_value(expr.getArg(1), expr.getArg(0));
return;
}
// Those are operations of the form:
// - `it = [expr]`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_Equal) {
// Just record the potentially new iterator.
auto* lhs = UnwrapAsIterator(&expr, env);
auto* rhs = UnwrapAsIterator(expr.getArg(1), env);
if (lhs) {
assert(rhs);
SetContainerValue(env, *lhs, *GetContainerValue(env, *rhs));
}
return;
}
// Those are operations of the form:
// - `it != [expr]`
// - `it == [expr]`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_EqualEqual ||
expr.getOperator() == clang::OverloadedOperatorKind::OO_ExclaimEqual) {
assert(expr.getNumArgs() >= 2);
TransferExpressionAccessForCheck(expr.getArg(0), env);
TransferExpressionAccessForCheck(expr.getArg(1), env);
clang::dataflow::Value* lhs_it = UnwrapAsIterator(expr.getArg(0), env);
clang::dataflow::Value* rhs_it = UnwrapAsIterator(expr.getArg(1), env);
if (!lhs_it || !rhs_it) {
return;
}
DebugStream() << DebugString(env, *lhs_it) << '\n';
DebugStream() << DebugString(env, *rhs_it) << '\n';
if (GetContainerValue(env, *lhs_it) != GetContainerValue(env, *rhs_it)) {
diagnostic_.Report(
expr.getSourceRange().getBegin(),
diagnostic_.getCustomDiagID(clang::DiagnosticsEngine::Level::Error,
kInvalidIteratorComparison));
}
const auto& formula = ForceBoolValue(env, expr);
auto& arena = env.arena();
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_EqualEqual) {
TransferIteratorsEquality(env, formula, lhs_it, rhs_it);
TransferIteratorsInequality(env, arena.makeNot(formula), lhs_it,
rhs_it);
} else {
TransferIteratorsInequality(env, formula, lhs_it, rhs_it);
TransferIteratorsEquality(env, arena.makeNot(formula), lhs_it, rhs_it);
}
return;
}
// Those are operations of the form:
// - `it--`
// - `it++`
if (expr.getOperator() == clang::OverloadedOperatorKind::OO_PlusPlus ||
expr.getOperator() == clang::OverloadedOperatorKind::OO_MinusMinus) {
assert(expr.getNumArgs());
TransferExpressionAccessForDeref(expr.getArg(0), env);
// The result of this operation is another iterator.
if (auto* iterator = UnwrapAsIterator(expr.getArg(0), env)) {
CloneIterator(&expr, *iterator, env);
}
return;
}
// TODO(https://crbug.com/1455371) Handle other kinds of operators.
}
// CastExpr: https://clang.llvm.org/doxygen/classclang_1_1CastExpr.html
void Transfer(const clang::CastExpr& value_stmt,
clang::dataflow::Environment& env) {
if (auto* expr = clang::dyn_cast<clang::ImplicitCastExpr>(&value_stmt)) {
Transfer(*expr, env);
}
}
// ImplicitCastExpr:
// https://clang.llvm.org/doxygen/classclang_1_1ImplicitCastExpr.html
void Transfer(const clang::ImplicitCastExpr& expr,
clang::dataflow::Environment& env) {
if (expr.getCastKind() == clang::CastKind::CK_LValueToRValue) {
TransferExpressionAccessForDeref(expr.getSubExpr(), env);
}
}
void TransferIteratorsEquality(clang::dataflow::Environment& env,
const clang::dataflow::Formula& formula,
clang::dataflow::Value* lhs,
clang::dataflow::Value* rhs) {
auto& arena = env.arena();
// If we know that lhs and rhs are equal, we can imply that:
// 1. lhs->is_valid == rhs->is_valid
// 2. lhs->is_end == rhs->is_end
// Indeed, in the following scenario:
// if (it == std::end(vec)) {}
// entering the `if` block means that it is the end iterator as well.
env.assume(arena.makeImplies(
formula, arena.makeEquals(GetIsValid(env, *lhs)->formula(),
GetIsValid(env, *rhs)->formula())));
env.assume(arena.makeImplies(
formula, arena.makeEquals(GetIsEnd(env, *lhs)->formula(),
GetIsEnd(env, *rhs)->formula())));
}
void TransferIteratorsInequality(clang::dataflow::Environment& env,
const clang::dataflow::Formula& formula,
clang::dataflow::Value* lhs,
clang::dataflow::Value* rhs) {
auto& arena = env.arena();
// This is a bit trickier, because inequality doesn't really give us
// generic information on the validities of the iterators, except:
// 1. lhs->is_end => rhs->is_valid
// 2. rhs->is_end => lhs->is_valid
env.assume(arena.makeImplies(
arena.makeAnd(formula, GetIsEnd(env, *lhs)->formula()),
GetIsValid(env, *rhs)->formula()));
env.assume(arena.makeImplies(
arena.makeAnd(formula, GetIsEnd(env, *rhs)->formula()),
GetIsValid(env, *lhs)->formula()));
}
// This validates that the iterator at `expr` is allowed to be "checked"
// against. If not, we issue an error.
void TransferExpressionAccessForCheck(const clang::Expr* expr,
clang::dataflow::Environment& env) {
clang::dataflow::Value* iterator = UnwrapAsIterator(expr, env);
if (!iterator) {
return;
}
// If the iterator was never invalidated in any of the parent environments,
// then we allow it to be checked against another iterator, since it means
// the iterator is still potentially valid.
if (env.allows(GetIsValid(env, *iterator)->formula())) {
return;
}
// We always allow the end iterator to be checked, otherwise we wouldn't be
// able to make iterators valid.
if (env.proves(GetIsEnd(env, *iterator)->formula())) {
return;
}
TransferExpressionAccessForDeref(expr, env);
}
// This validates that the iterator at `expr` is allowed to be dereferenced.
// In other words, the iterator **must** be valid or we issue an error.
void TransferExpressionAccessForDeref(const clang::Expr* expr,
clang::dataflow::Environment& env) {
clang::dataflow::Value* iterator = UnwrapAsIterator(expr, env);
if (!iterator) {
return;
}
bool is_valid = env.proves(GetIsValid(env, *iterator)->formula());
DebugStream() << "[ACCESS] " << DebugString(env, *iterator) << '\n';
if (is_valid) {
return;
}
diagnostic_.Report(
expr->getSourceRange().getBegin(),
diagnostic_.getCustomDiagID(clang::DiagnosticsEngine::Level::Error,
kInvalidIteratorUsage));
}
// This invalidates all the iterators previously created by this container in
// the current environment.
void InvalidateContainer(clang::dataflow::Environment& env,
clang::dataflow::Value& container) {
for (auto& p : iterator_to_container_) {
if (p.second != &container) {
continue;
}
auto* value = env.getValue(*p.first);
if (!value) {
continue;
}
DebugStream() << DebugString(env, *value) << '\n';
SetIsValid(env, *value, env.getBoolLiteralValue(false));
}
}
// This invalidates the iterator `iterator` in the current environment.
void InvalidateIterator(clang::dataflow::Environment& env,
clang::dataflow::Value& iterator) {
SetIsValid(env, iterator, env.getBoolLiteralValue(false));
}
clang::dataflow::Value& CreateIteratorValue(
clang::QualType type,
clang::dataflow::Environment& env,
clang::dataflow::RecordStorageLocation& Loc,
clang::dataflow::Value& container,
clang::dataflow::BoolValue& is_valid,
clang::dataflow::BoolValue& is_end) {
iterator_types_mapping_.insert(type.getCanonicalType());
auto& iterator = env.create<clang::dataflow::RecordValue>(Loc);
env.setValue(Loc, iterator);
PopulateIteratorValue(&iterator, container, is_valid, is_end, env);
return iterator;
}
void PopulateIteratorValue(clang::dataflow::Value* value,
clang::dataflow::Value& container,
clang::dataflow::BoolValue& is_valid,
clang::dataflow::BoolValue& is_end,
clang::dataflow::Environment& env) {
assert(clang::isa<clang::dataflow::RecordValue>(*value));
SetContainerValue(env, *value, container);
SetIsValid(env, *value, is_valid);
SetIsEnd(env, *value, is_end);
}
void CloneIterator(const clang::CallExpr* expr,
clang::dataflow::Value& iterator,
clang::dataflow::Environment& env) {
auto* container = GetContainerValue(env, iterator);
TransferCallReturningIterator(expr, *container, env.makeAtomicBoolValue(),
env.makeAtomicBoolValue(), env);
}
const clang::Expr* Unwrap(const clang::Expr* E) {
if (auto* implicitcast = clang::dyn_cast<clang::ImplicitCastExpr>(E)) {
return implicitcast->getSubExpr(); // Is this an iterator implicit cast?
}
if (auto* construct = clang::dyn_cast<clang::CXXConstructExpr>(E)) {
// If the iterator is default constructed, we do not track it since we
// can't link it to a container or anything. However, if it gets copy
// assigned from an actually tracked iterator, we'll be able to track it
// back.
if (construct->getNumArgs()) {
// Is this an iterator constructor being invoked?
return construct->getArg(0);
}
}
return nullptr;
}
// This method walks the given expression and tries to find an iterator tied
// to it.
clang::dataflow::Value* UnwrapAsIterator(
const clang::Expr* expr,
const clang::dataflow::Environment& env) {
while (expr) {
if (expr->isGLValue()) {
auto* loc = env.getStorageLocation(*expr);
if (loc) {
clang::dataflow::Value* value = env.getValue(*expr);
if (IsIterator(loc->getType().getCanonicalType())) {
return value;
}
}
}
expr = Unwrap(expr);
}
return nullptr;
}
// Gets the container value for the given iterator value.
clang::dataflow::Value* GetContainerValue(
const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
assert(clang::isa<clang::dataflow::RecordValue>(iterator));
auto& record = clang::cast<clang::dataflow::RecordValue>(iterator);
auto& loc = record.getLoc();
if (!iterator_to_container_.count(&loc)) {
return nullptr;
}
return iterator_to_container_[&loc];
}
void SetContainerValue(const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator,
clang::dataflow::Value& container) {
auto& record = clang::cast<clang::dataflow::RecordValue>(iterator);
auto& storage = record.getLoc();
iterator_to_container_[&storage] = &container;
}
void UnsetContainerValue(const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
auto& record = clang::cast<clang::dataflow::RecordValue>(iterator);
auto& storage = record.getLoc();
iterator_to_container_.erase(&storage);
}
// Returns whether the currently handled value is an iterator.
bool IsIterator(clang::QualType type) {
return iterator_types_mapping_.count(type.getCanonicalType()) != 0;
}
// Dumps some debugging information about the iterator. Caller is responsible
// of ensuring `iterator` is actually an iterator.
std::string DebugString(const clang::dataflow::Environment& env,
const clang::dataflow::Value& iterator) {
auto* container = GetContainerValue(env, iterator);
std::string res;
const auto& formula = GetIsValid(env, iterator)->formula();
const bool is_valid = env.proves(formula);
const bool is_invalid = env.proves(env.arena().makeNot(formula));
llvm::StringRef status = is_valid ? "VALID"
: is_invalid ? "INVALID"
: "MAYBE_INVALID";
llvm::raw_string_ostream(res) << &iterator << " (container: " << container
<< " status: " << status << ")";
return res;
}
// The diagnostic engine that will issue potential errors.
clang::DiagnosticsEngine& diagnostic_;
// The iterator types found along the way.
// This part is kind of tricky for now, because we'd like to hard code these.
// Unfortunately, since we aim at handling multiple iterator types, we can't
// really do it statically, so we need to store the types while we encounter
// them.
llvm::DenseSet<clang::QualType> iterator_types_mapping_;
// Iterator to container map. This allows us to invalidate all iterators in
// case this is needed.
llvm::DenseMap<clang::dataflow::StorageLocation*, clang::dataflow::Value*>
iterator_to_container_;
};
class IteratorInvalidationCheck
: public clang::ast_matchers::MatchFinder::MatchCallback {
public:
// The checks will performed on every function implemented in the main file.
void Register(clang::ast_matchers::MatchFinder& finder) {
using namespace clang::ast_matchers;
finder.addMatcher(
functionDecl(isExpansionInMainFile(), isDefinition(), hasBody(stmt()))
.bind("fun"),
this);
}
// clang::ast_matchers::MatchFinder::MatchCallback implementation:
void run(const clang::ast_matchers::MatchFinder::MatchResult& result) final {
if (result.SourceManager->getDiagnostics().hasUncompilableErrorOccurred()) {
return;
}
const auto* func = result.Nodes.getNodeAs<clang::FunctionDecl>("fun");
assert(func);
if (!Supported(*func)) {
return;
}
InfoStream() << "[FUNCTION] " << func->getQualifiedNameAsString() << '\n';
auto control_flow_context = clang::dataflow::ControlFlowContext::build(
*func, *func->getBody(), *result.Context);
if (!control_flow_context) {
llvm::report_fatal_error(control_flow_context.takeError());
return;
}
auto solver = std::make_unique<clang::dataflow::WatchedLiteralsSolver>();
clang::dataflow::DataflowAnalysisContext analysis_context(
std::move(solver));
clang::dataflow::Environment environment(analysis_context, *func);
InvalidIteratorAnalysis analysis(func,
result.SourceManager->getDiagnostics());
analysis_context.setSyntheticFieldCallback(
std::bind(&InvalidIteratorAnalysis::GetSyntheticFields, &analysis,
std::placeholders::_1));
auto analysis_result =
runDataflowAnalysis(*control_flow_context, analysis, environment);
if (!analysis_result) {
// just ignore that for now!
handleAllErrors(analysis_result.takeError(),
[](const llvm::StringError& E) {});
}
}
bool Supported(const clang::FunctionDecl& func) {
if (func.isTemplated()) {
return false;
}
if (auto* method = clang::dyn_cast<clang::CXXMethodDecl>(&func)) {
return Supported(*method);
}
return true;
}
bool Supported(const clang::CXXMethodDecl& method) {
const clang::CXXRecordDecl* record_declaration = method.getParent();
if (record_declaration && record_declaration->isLambda()) {
return false;
}
if (method.isStatic()) {
return true;
}
if (method.getThisType()->isDependentType()) {
return false;
}
if (method.getParent()->isTemplateDecl()) {
return false;
}
if (method.getThisType()->isUnionType()) {
return false;
}
// Ignore methods of unions and structs that contain an union.
std::vector<clang::QualType> type_stack;
type_stack.push_back(method.getThisType());
while (!type_stack.empty()) {
clang::QualType type = type_stack.back();
type_stack.pop_back();
if (type->isUnionType()) {
return false;
}
if (clang::CXXRecordDecl* cpp_record = type->getAsCXXRecordDecl()) {
for (auto f : cpp_record->fields()) {
type_stack.push_back(f->getType());
}
}
}
return true;
}
};
class IteratorInvalidationConsumer : public clang::ASTConsumer {
public:
IteratorInvalidationConsumer(clang::CompilerInstance& instance) {}
void HandleTranslationUnit(clang::ASTContext& context) final {
IteratorInvalidationCheck checker;
clang::ast_matchers::MatchFinder match_finder;
checker.Register(match_finder);
match_finder.matchAST(context);
}
};
class IteratorInvalidationPluginAction : public clang::PluginASTAction {
public:
IteratorInvalidationPluginAction() = default;
private:
// clang::PluginASTAction implementation:
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(
clang::CompilerInstance& instance,
llvm::StringRef ref) final {
llvm::EnablePrettyStackTrace();
return std::make_unique<IteratorInvalidationConsumer>(instance);
}
PluginASTAction::ActionType getActionType() final {
return CmdlineBeforeMainAction;
}
bool ParseArgs(const clang::CompilerInstance&,
const std::vector<std::string>& args) final {
return true;
}
};
static clang::FrontendPluginRegistry::Add<IteratorInvalidationPluginAction> X(
"iterator-checker",
"Check c++ iterator misuse");
} // namespace