blob: b0d9aed5f43f8065f0674f39eeed40b014690ddd [file] [log] [blame]
//===-- NaClValueEnumerator.cpp ------------------------------------------===//
// Number values and types for bitcode writer
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the NaClValueEnumerator class.
//
//===----------------------------------------------------------------------===//
#include "NaClValueEnumerator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <set>
using namespace llvm;
static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
return V.first->getType()->isIntOrIntVectorTy();
}
/// NaClValueEnumerator - Enumerate module-level information.
NaClValueEnumerator::NaClValueEnumerator(const Module *M, uint32_t PNaClVersion)
: PNaClVersion(PNaClVersion) {
// Create map for counting frequency of types, and set field
// TypeCountMap accordingly. Note: Pointer field TypeCountMap is
// used to deal with the fact that types are added through various
// method calls in this routine. Rather than pass it as an argument,
// we use a field. The field is a pointer so that the memory
// footprint of count_map can be garbage collected when this
// constructor completes.
TypeCountMapType count_map;
TypeCountMap = &count_map;
IntPtrType = IntegerType::get(M->getContext(), PNaClIntPtrTypeBitSize);
// Enumerate the functions. Note: We do this before global
// variables, so that global variable initializations can refer to
// the functions without a forward reference.
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
EnumerateValue(I);
}
// Enumerate the global variables.
FirstGlobalVarID = Values.size();
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
EnumerateValue(I);
NumGlobalVarIDs = Values.size() - FirstGlobalVarID;
// Enumerate the aliases.
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I)
EnumerateValue(I);
// Remember what is the cutoff between globalvalue's and other constants.
unsigned FirstConstant = Values.size();
// Skip global variable initializers since they are handled within
// WriteGlobalVars of file NaClBitcodeWriter.cpp.
// Enumerate the aliasees.
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I)
EnumerateValue(I->getAliasee());
// Insert constants that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
// Enumerate types used by function bodies and argument lists.
for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
EnumerateType(I->getType());
for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
// Don't generate types for elided pointer casts!
if (IsElidedCast(I))
continue;
if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
// Handle switch instruction specially, so that we don't
// write out unnecessary vector/array types used to model case
// selectors.
EnumerateOperandType(SI->getCondition());
} else {
for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
OI != E; ++OI) {
EnumerateOperandType(*OI);
}
}
EnumerateType(I->getType());
}
}
// Optimized type indicies to put "common" expected types in with small
// indices.
OptimizeTypes(M);
TypeCountMap = NULL;
// Optimize constant ordering.
OptimizeConstants(FirstConstant, Values.size());
}
void NaClValueEnumerator::OptimizeTypes(const Module *M) {
// Sort types by count, so that we can index them based on
// frequency. Use indices of built TypeMap, so that order of
// construction is repeatable.
std::set<unsigned> type_counts;
typedef std::set<unsigned> TypeSetType;
std::map<unsigned, TypeSetType> usage_count_map;
TypeList IdType(Types);
for (TypeCountMapType::iterator iter = TypeCountMap->begin();
iter != TypeCountMap->end(); ++ iter) {
type_counts.insert(iter->second);
usage_count_map[iter->second].insert(TypeMap[iter->first]-1);
}
// Reset type tracking maps, so that we can re-enter based
// on fequency ordering.
TypeCountMap = NULL;
Types.clear();
TypeMap.clear();
// Reinsert types, based on frequency.
for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();
count_iter != type_counts.rend(); ++count_iter) {
TypeSetType& count_types = usage_count_map[*count_iter];
for (TypeSetType::iterator type_iter = count_types.begin();
type_iter != count_types.end(); ++type_iter)
EnumerateType((IdType[*type_iter]), true);
}
}
unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert(I != InstructionMap.end() && "Instruction is not mapped!");
return I->second;
}
void NaClValueEnumerator::setInstructionID(const Instruction *I) {
InstructionMap[I] = InstructionCount++;
}
unsigned NaClValueEnumerator::getValueID(const Value *V) const {
ValueMapType::const_iterator I = ValueMap.find(V);
assert(I != ValueMap.end() && "Value not in slotcalculator!");
return I->second-1;
}
void NaClValueEnumerator::dump() const {
print(dbgs(), ValueMap, "Default");
dbgs() << '\n';
}
void NaClValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
const char *Name) const {
OS << "Map Name: " << Name << "\n";
OS << "Size: " << Map.size() << "\n";
for (ValueMapType::const_iterator I = Map.begin(),
E = Map.end(); I != E; ++I) {
const Value *V = I->first;
if (V->hasName())
OS << "Value: " << V->getName();
else
OS << "Value: [null]\n";
V->dump();
OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
if (UI != V->use_begin())
OS << ",";
if((*UI)->hasName())
OS << " " << (*UI)->getName();
else
OS << " [null]";
}
OS << "\n\n";
}
}
// Optimize constant ordering.
namespace {
struct CstSortPredicate {
NaClValueEnumerator &VE;
explicit CstSortPredicate(NaClValueEnumerator &ve) : VE(ve) {}
bool operator()(const std::pair<const Value*, unsigned> &LHS,
const std::pair<const Value*, unsigned> &RHS) {
// Sort by plane.
if (LHS.first->getType() != RHS.first->getType())
return VE.getTypeID(LHS.first->getType()) <
VE.getTypeID(RHS.first->getType());
// Then by frequency.
return LHS.second > RHS.second;
}
};
}
/// OptimizeConstants - Reorder constant pool for denser encoding.
void NaClValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
CstSortPredicate P(*this);
std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
// Ensure that integer and vector of integer constants are at the start of the
// constant pool. This is important so that GEP structure indices come before
// gep constant exprs.
std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
isIntOrIntVectorValue);
// Rebuild the modified portion of ValueMap.
for (; CstStart != CstEnd; ++CstStart)
ValueMap[Values[CstStart].first] = CstStart+1;
}
/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
/// table into the values table.
void NaClValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
VI != VE; ++VI)
EnumerateValue(VI->getValue());
}
void NaClValueEnumerator::EnumerateValue(const Value *VIn) {
// Skip over elided values.
const Value *V = ElideCasts(VIn);
if (V != VIn) return;
assert(!V->getType()->isVoidTy() && "Can't insert void values!");
assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
"EnumerateValue doesn't handle Metadata!");
// Check to see if it's already in!
unsigned &ValueID = ValueMap[V];
if (ValueID) {
// Increment use count.
Values[ValueID-1].second++;
return;
}
// Enumerate the type of this value. Skip global values since no
// types are dumped for global variables.
if (!isa<GlobalVariable>(V))
EnumerateType(V->getType());
if (const Constant *C = dyn_cast<Constant>(V)) {
if (isa<GlobalValue>(C)) {
// Initializers for globals are handled explicitly elsewhere.
} else if (C->getNumOperands()) {
// If a constant has operands, enumerate them. This makes sure that if a
// constant has uses (for example an array of const ints), that they are
// inserted also.
// We prefer to enumerate them with values before we enumerate the user
// itself. This makes it more likely that we can avoid forward references
// in the reader. We know that there can be no cycles in the constants
// graph that don't go through a global variable.
for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
I != E; ++I)
if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
EnumerateValue(*I);
// Finally, add the value. Doing this could make the ValueID reference be
// dangling, don't reuse it.
Values.push_back(std::make_pair(V, 1U));
ValueMap[V] = Values.size();
return;
}
}
// Add the value.
Values.push_back(std::make_pair(V, 1U));
ValueID = Values.size();
}
Type *NaClValueEnumerator::NormalizeType(Type *Ty) const {
if (Ty->isPointerTy())
return IntPtrType;
if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
SmallVector<Type *, 8> ArgTypes;
for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I)
ArgTypes.push_back(NormalizeType(FTy->getParamType(I)));
return FunctionType::get(NormalizeType(FTy->getReturnType()),
ArgTypes, false);
}
return Ty;
}
void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) {
// Pointer types do not need to be given type IDs.
if (Ty->isPointerTy())
Ty = Ty->getPointerElementType();
Ty = NormalizeType(Ty);
// The label type does not need to be given a type ID.
if (Ty->isLabelTy())
return;
// This function is used to enumerate types referenced by the given
// module. This function is called in two phases, based on the value
// of TypeCountMap. These phases are:
//
// (1) In this phase, InsideOptimizeTypes=false. We are collecting types
// and all corresponding (implicitly) referenced types. In addition,
// we are keeping track of the number of references to each type in
// TypeCountMap. These reference counts will be used by method
// OptimizeTypes to associate the smallest type ID's with the most
// referenced types.
//
// (2) In this phase, InsideOptimizeTypes=true. We are registering types
// based on frequency. To minimize type IDs for frequently used
// types, (unlike the other context) we are inserting the minimal
// (implicitly) referenced types needed for each type.
unsigned *TypeID = &TypeMap[Ty];
if (TypeCountMap) ++((*TypeCountMap)[Ty]);
// We've already seen this type.
if (*TypeID)
return;
// If it is a non-anonymous struct, mark the type as being visited so that we
// don't recursively visit it. This is safe because we allow forward
// references of these in the bitcode reader.
if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isLiteral())
*TypeID = ~0U;
// If in the second phase (i.e. inside optimize types), don't expand
// pointers to structures, since we can just generate a forward
// reference to it. This way, we don't use up unnecessary (small) ID
// values just to define the pointer.
bool EnumerateSubtypes = true;
if (InsideOptimizeTypes)
if (PointerType *PTy = dyn_cast<PointerType>(Ty))
if (StructType *STy = dyn_cast<StructType>(PTy->getElementType()))
if (!STy->isLiteral())
EnumerateSubtypes = false;
// Enumerate all of the subtypes before we enumerate this type. This ensures
// that the type will be enumerated in an order that can be directly built.
if (EnumerateSubtypes) {
for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
I != E; ++I)
EnumerateType(*I, InsideOptimizeTypes);
}
// Refresh the TypeID pointer in case the table rehashed.
TypeID = &TypeMap[Ty];
// Check to see if we got the pointer another way. This can happen when
// enumerating recursive types that hit the base case deeper than they start.
//
// If this is actually a struct that we are treating as forward ref'able,
// then emit the definition now that all of its contents are available.
if (*TypeID && *TypeID != ~0U)
return;
// Add this type now that its contents are all happily enumerated.
Types.push_back(Ty);
*TypeID = Types.size();
}
// Enumerate the types for the specified value. If the value is a constant,
// walk through it, enumerating the types of the constant.
void NaClValueEnumerator::EnumerateOperandType(const Value *V) {
// Note: We intentionally don't create a type id for global variables,
// since the type is automatically generated by the reader before any
// use of the global variable.
if (isa<GlobalVariable>(V)) return;
EnumerateType(V->getType());
if (const Constant *C = dyn_cast<Constant>(V)) {
// If this constant is already enumerated, ignore it, we know its type must
// be enumerated.
if (ValueMap.count(V)) return;
// This constant may have operands, make sure to enumerate the types in
// them.
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
const Value *Op = C->getOperand(i);
// Don't enumerate basic blocks here, this happens as operands to
// blockaddress.
if (isa<BasicBlock>(Op)) continue;
EnumerateOperandType(Op);
}
}
}
void NaClValueEnumerator::incorporateFunction(const Function &F) {
InstructionCount = 0;
NumModuleValues = Values.size();
// Make sure no insertions outside of a function.
assert(FnForwardTypeRefs.empty());
// Adding function arguments to the value table.
for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
I != E; ++I)
EnumerateValue(I);
FirstFuncConstantID = Values.size();
// Add all function-level constants to the value table.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
// Handle switch instruction specially, so that we don't write
// out unnecessary vector/array constants used to model case selectors.
if (isa<Constant>(SI->getCondition())) {
EnumerateValue(SI->getCondition());
}
} else {
for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
OI != E; ++OI) {
if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
isa<InlineAsm>(*OI))
EnumerateValue(*OI);
}
}
}
BasicBlocks.push_back(BB);
ValueMap[BB] = BasicBlocks.size();
}
// Optimize the constant layout.
OptimizeConstants(FirstFuncConstantID, Values.size());
FirstInstID = Values.size();
// Add all of the instructions.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
if (!I->getType()->isVoidTy())
EnumerateValue(I);
}
}
}
void NaClValueEnumerator::purgeFunction() {
/// Remove purged values from the ValueMap.
for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
ValueMap.erase(Values[i].first);
for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
ValueMap.erase(BasicBlocks[i]);
Values.resize(NumModuleValues);
BasicBlocks.clear();
FnForwardTypeRefs.clear();
}
// The normal form required by the PNaCl ABI verifier (documented in
// ReplacePtrsWithInts.cpp) allows us to omit the following pointer
// casts from the bitcode file.
const Value *NaClValueEnumerator::ElideCasts(const Value *V) const {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
switch (I->getOpcode()) {
default:
break;
case Instruction::BitCast:
if (I->getType()->isPointerTy()) {
V = I->getOperand(0);
}
break;
case Instruction::IntToPtr:
V = ElideCasts(I->getOperand(0));
break;
case Instruction::PtrToInt:
if (IsIntPtrType(I->getType())) {
V = I->getOperand(0);
}
break;
}
}
return V;
}