blob: 6a81958e7d898e7c01cb95bc52257dd663daf370 [file] [log] [blame]
// Copyright 2018 The Clspv Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <climits>
#include <string>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "spirv/unified1/spirv.hpp"
#include "clspv/AddressSpace.h"
#include "clspv/Option.h"
#include "ArgKind.h"
#include "Builtins.h"
#include "Constants.h"
#include "DescriptorCounter.h"
#include "Passes.h"
#include "SpecConstant.h"
using namespace llvm;
#define DEBUG_TYPE "allocatedescriptors"
namespace {
// Constant that represents bitfield for UniformMemory Memory Semantics from
// SPIR-V. Used to test barrier semantics.
const uint32_t kMemorySemanticsUniformMemory = 0x40;
// Constant that represents bitfield for ImageMemory Memory Semantics from
// SPIR-V. Used to test barrier semantics.
const uint32_t kMemorySemanticsImageMemory = 0x800;
cl::opt<bool> ShowDescriptors("show-desc", cl::init(false), cl::Hidden,
cl::desc("Show descriptors"));
using SamplerMapType = llvm::ArrayRef<std::pair<unsigned, std::string>>;
class AllocateDescriptorsPass final : public ModulePass {
public:
static char ID;
AllocateDescriptorsPass()
: ModulePass(ID), sampler_map_(), descriptor_set_(0), binding_(0) {}
bool runOnModule(Module &M) override;
SamplerMapType &sampler_map() { return sampler_map_; }
private:
// Allocates descriptors for all samplers and kernel arguments that have uses.
// Replace their uses with calls to a special compiler builtin. Returns true
// if we changed the module.
bool AllocateDescriptors(Module &M);
// Allocate descriptor for literal samplers. Returns true if we changed the
// module.
bool AllocateLiteralSamplerDescriptors(Module &M);
// Allocate descriptor for kernel arguments with uses. Returns true if we
// changed the module.
bool AllocateKernelArgDescriptors(Module &M);
bool AllocateLocalKernelArgSpecIds(Module &M);
// Allocates the next descriptor set and resets the tracked binding number to
// 0.
unsigned StartNewDescriptorSet(Module &M) {
// Allocate the descriptor set we used.
binding_ = 0;
const auto set = clspv::TakeDescriptorIndex(&M);
assert(set == descriptor_set_);
descriptor_set_++;
return set;
}
// Returns true if |F| or call function |F| calls contains a global barrier.
// Specifically, it checks that the memory semantics operand contains
// UniformMemory memory semantics.
//
// The compiler targets OpenCL 1.2, which only provides support for relaxed
// atomics which means they cannot be used as synchronization primitives.
// That is why the pass does not consider them for the addition of coherence.
bool CallTreeContainsGlobalBarrier(Function *F);
// Returns a pair indicating if |V| is read and/or written to.
// Traces the use chain looking for loads and stores and proceeding through
// function calls until a non-pointer value is encountered.
//
// This function assumes loads, stores and function calls are the only
// instructions that can read or write to memory.
std::pair<bool, bool> HasReadsAndWrites(Value *V);
// Cache for which functions' call trees contain a global barrier.
DenseMap<Function *, bool> barrier_map_;
// The sampler map, which is an array ref of pairs, each of which is the
// sampler constant as an integer, followed by the string expression for
// the sampler.
SamplerMapType sampler_map_;
// Which descriptor set are we using?
int descriptor_set_;
// The next binding number to use.
int binding_;
// What makes a kernel argument require a new descriptor?
struct KernelArgDiscriminant {
KernelArgDiscriminant(Type *the_type = nullptr, int the_arg_index = 0,
int the_separation_token = 0, int is_coherent = 0)
: type(the_type), arg_index(the_arg_index),
separation_token(the_separation_token), coherent(is_coherent) {}
// Different argument type requires different descriptor since logical
// addressing requires strongly typed storage buffer variables.
Type *type;
// If we have multiple arguments of the same type to the same kernel,
// then we have to use distinct descriptors because the user could
// bind different storage buffers for them. Use argument index
// as a proxy for distinctness. This might overcount, but we
// don't worry about yet.
int arg_index;
// An extra bit of data that can be used to separate resource
// variables that otherwise share the same type and argument index.
// By default this will be zero, and so it won't force any separation.
int separation_token;
// An extra bit that marks whether the variable is coherent. This means
// coherent and non-coherent variables will not share a binding.
int coherent;
};
struct KADDenseMapInfo {
static KernelArgDiscriminant getEmptyKey() {
return KernelArgDiscriminant(nullptr, 0, 0);
}
static KernelArgDiscriminant getTombstoneKey() {
return KernelArgDiscriminant(nullptr, -1, 0);
}
static unsigned getHashValue(const KernelArgDiscriminant &key) {
return unsigned(uintptr_t(key.type)) ^ key.arg_index ^
key.separation_token ^ key.coherent;
}
static bool isEqual(const KernelArgDiscriminant &lhs,
const KernelArgDiscriminant &rhs) {
return lhs.type == rhs.type && lhs.arg_index == rhs.arg_index &&
lhs.separation_token == rhs.separation_token &&
lhs.coherent == rhs.coherent;
}
};
};
} // namespace
char AllocateDescriptorsPass::ID = 0;
INITIALIZE_PASS(AllocateDescriptorsPass, "AllocateDescriptorsPass",
"Allocate resource descriptors", false, false)
namespace clspv {
ModulePass *createAllocateDescriptorsPass(SamplerMapType sampler_map) {
auto *result = new AllocateDescriptorsPass();
result->sampler_map() = sampler_map;
return result;
}
} // namespace clspv
bool AllocateDescriptorsPass::runOnModule(Module &M) {
bool Changed = false;
// Samplers from the sampler map always grab descriptor set 0.
Changed |= AllocateLiteralSamplerDescriptors(M);
Changed |= AllocateKernelArgDescriptors(M);
Changed |= AllocateLocalKernelArgSpecIds(M);
return Changed;
}
bool AllocateDescriptorsPass::AllocateLiteralSamplerDescriptors(Module &M) {
if (ShowDescriptors) {
outs() << "Allocate literal sampler descriptors\n";
}
bool Changed = false;
auto init_fn = M.getFunction(clspv::TranslateSamplerInitializerFunction());
if (!init_fn)
return Changed;
if (init_fn && clspv::Option::UseSamplerMap() && sampler_map_.size() == 0) {
errs() << "error: kernel uses a literal sampler but option -samplermap "
"has not been specified\n";
llvm_unreachable("Sampler literal in source without sampler map!");
}
const unsigned descriptor_set = StartNewDescriptorSet(M);
Changed = true;
if (!sampler_map_.empty()) {
if (ShowDescriptors) {
outs() << " Found " << sampler_map_.size()
<< " samplers in the sampler map\n";
}
}
// Replace all things that look like
// call %opencl.sampler_t addrspace(2)*
// @__translate_sampler_initializer(i32 sampler-literal-constant-value)
// #2
//
// with (if sampler map is provided):
//
// call %opencl.sampler_t addrspace(2)*
// @clspv.sampler.var.literal(i32 descriptor set, i32 binding, i32
// index-into-sampler-map)
//
// or (if no sampler map is provided):
//
// call %opencl.sampler_t addrspace(2)*
// @clspv.sampler.var.literal(i32 descriptor set, i32 binding, i32
// sampler-literal-value)
//
// We need to preserve the index into the sampler map so that later we can
// generate the sampler lines in the embedded reflection. That needs both the
// literal value and the string expression for the literal.
// Generate the function type for clspv::LiteralSamplerFunction()
IRBuilder<> Builder(M.getContext());
auto *sampler_struct_ty =
StructType::getTypeByName(M.getContext(), "opencl.sampler_t");
if (!sampler_struct_ty) {
sampler_struct_ty = StructType::create(M.getContext(), "opencl.sampler_t");
}
auto *sampler_ty =
sampler_struct_ty->getPointerTo(clspv::AddressSpace::Constant);
Type *i32 = Builder.getInt32Ty();
FunctionType *fn_ty = FunctionType::get(sampler_ty, {i32, i32, i32}, false);
auto var_fn = M.getOrInsertFunction(clspv::LiteralSamplerFunction(), fn_ty);
// Map sampler literal to binding number.
DenseMap<unsigned, unsigned> binding_for_value;
DenseMap<unsigned, unsigned> index_for_value;
unsigned index = 0;
if (!sampler_map_.empty()) {
for (auto sampler_info : sampler_map_) {
const unsigned value = sampler_info.first;
const std::string &expr = sampler_info.second;
if (0 == binding_for_value.count(value)) {
// Make a new entry.
binding_for_value[value] = binding_++;
index_for_value[value] = index;
if (ShowDescriptors) {
outs() << " Map " << value << " to (" << descriptor_set << ","
<< binding_for_value[value] << ") << " << expr << "\n";
}
}
index++;
}
}
// Now replace calls to __translate_sampler_initializer
if (init_fn) {
// Copy users, to avoid modifying the list in place.
SmallVector<User *, 8> users(init_fn->users());
for (auto user : users) {
if (auto *call = dyn_cast<CallInst>(user)) {
auto const_val = dyn_cast<ConstantInt>(call->getArgOperand(0));
if (!const_val) {
call->getArgOperand(0)->print(errs());
llvm_unreachable("Argument of sampler initializer was non-constant!");
}
const auto value = static_cast<unsigned>(const_val->getZExtValue());
auto where = binding_for_value.find(value);
if (where == binding_for_value.end()) {
if (!sampler_map_.empty()) {
errs() << "Sampler literal " << value
<< " was not in the sampler map\n";
llvm_unreachable("Sampler literal was not found in sampler map!");
} else {
// Allocate a binding for this sampler value.
binding_for_value.insert(std::make_pair(value, index++));
if (ShowDescriptors) {
outs() << " Map " << value << " to (" << descriptor_set << ","
<< binding_for_value[value] << ")\n";
}
}
}
const unsigned binding = binding_for_value[value];
// Third parameter is either the data mask if no sampler map is
// specified or the index into the sampler map if one is provided.
unsigned third_param = value;
if (!sampler_map_.empty()) {
// Use the sampler map index when a sampler map is provided.
third_param = index_for_value[value];
}
SmallVector<Value *, 3> args = {Builder.getInt32(descriptor_set),
Builder.getInt32(binding),
Builder.getInt32(third_param)};
if (ShowDescriptors) {
outs() << " translate literal sampler " << *const_val << " to ("
<< descriptor_set << "," << binding << ")\n";
}
auto *new_call =
CallInst::Create(var_fn, args, "", dyn_cast<Instruction>(call));
call->replaceAllUsesWith(new_call);
call->eraseFromParent();
}
}
if (!init_fn->user_empty()) {
errs() << "Function: " << init_fn->getName().str()
<< " still has users after rewrite\n";
for (auto U : init_fn->users()) {
errs() << " User: " << *U << "\n";
}
llvm_unreachable("Unexpected uses remain");
}
init_fn->eraseFromParent();
} else {
if (ShowDescriptors) {
outs() << " No sampler\n";
}
}
return Changed;
}
bool AllocateDescriptorsPass::AllocateKernelArgDescriptors(Module &M) {
bool Changed = false;
if (ShowDescriptors) {
outs() << "Allocate kernel arg descriptors\n";
}
// First classify all kernel arguments by arg discriminant which
// is the pair (type, arg index).
//
// FIRST RULE: There will be at least one resource variable for each
// different discriminant.
// Map a descriminant to a unique index. We don't use a UniqueVector
// because that requires operator< that I don't want to define on
// llvm::Type*
using KernelArgDiscriminantMap =
DenseMap<KernelArgDiscriminant, int, KADDenseMapInfo>;
// Maps a discriminant to its unique index, starting at 0.
KernelArgDiscriminantMap discriminant_map;
// SECOND RULE: We can use several strategies for descriptor binding
// to these variables.
//
// It may not be obvious, but:
// - A single resource variable can only be decorated once with
// DescriptorSet and Binding. Otherwise it's impossible to interpret
// how to use the variable.
// - Different resource variables can have the same binding. (For example,
// do that to save on descriptors, or to save on the number of resource
// variables.)
// - SPIR-V (trivially) allows reuse of (set,binding) pairs.
// - Vulkan permits this as well, but requires that for a given entry
// point all such variables statically referenced by the entry point's
// call tree must have a type compatible with the descriptor actually
// bound to the pipeline.
// - When setting up a pipeline, Vulkan does not care about the resource
// variables that are *not* statically referenced by the used entry points'
// call trees.
// For more, see Vulkan 14.5.3 DescriptorSet and Binding Assignment
const bool always_distinct_sets =
clspv::Option::DistinctKernelDescriptorSets();
// The default is that all kernels use the same descriptor set.
const bool always_single_kernel_descriptor = true;
// By default look for as much sharing as possible. But sometimes we need to
// ensure each kernel argument that is an image or sampler gets a different
// resourcee variable.
const bool always_distinct_image_sampler =
clspv::Option::HackDistinctImageSampler();
// Bookkeeping:
// - Each discriminant remembers which functions use it.
// - Each function remembers the pairs associated with each argument.
// Maps an arg discriminant index to the list of functions using that
// discriminant.
using FunctionsUsedByDiscriminantMap =
SmallVector<SmallVector<Function *, 3>, 3>;
FunctionsUsedByDiscriminantMap functions_used_by_discriminant;
struct DiscriminantInfo {
int index;
KernelArgDiscriminant discriminant;
};
// Maps a function to an ordered list of discriminants and their. The -1
// value is a sentinel indicating the argument does not use a descriptor.
// TODO(dneto): This probably shouldn't be a DenseMap because its value type
// is pretty big.
DenseMap<Function *, SmallVector<DiscriminantInfo, 3>>
discriminants_used_by_function;
// Remember the list of kernels with bodies, for convenience.
// This is in module-order.
SmallVector<Function *, 3> kernels_with_bodies;
int num_image_sampler_arguments = 0;
for (Function &F : M) {
// Only scan arguments of kernel functions that have bodies.
if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL) {
continue;
}
kernels_with_bodies.push_back(&F);
auto &discriminants_list = discriminants_used_by_function[&F];
bool uses_barriers = CallTreeContainsGlobalBarrier(&F);
int arg_index = 0;
for (Argument &Arg : F.args()) {
Type *argTy = Arg.getType();
const auto arg_kind = clspv::GetArgKind(Arg);
int separation_token = 0;
switch (arg_kind) {
case clspv::ArgKind::SampledImage:
case clspv::ArgKind::StorageImage:
case clspv::ArgKind::Sampler:
if (always_distinct_image_sampler) {
separation_token = num_image_sampler_arguments;
}
num_image_sampler_arguments++;
break;
default:
break;
}
int coherent = 0;
if (uses_barriers && (arg_kind == clspv::ArgKind::Buffer ||
arg_kind == clspv::ArgKind::StorageImage)) {
// Coherency is only required if the argument is an SSBO or storage
// image that is both read and written to.
bool reads = false;
bool writes = false;
std::tie(reads, writes) = HasReadsAndWrites(&Arg);
coherent = (reads && writes) ? 1 : 0;
}
KernelArgDiscriminant key(argTy, arg_index, separation_token, coherent);
// First assume no descriptor is required.
discriminants_list.push_back(DiscriminantInfo{-1, key});
// Pointer-to-local arguments don't become resource variables.
if (arg_kind == clspv::ArgKind::Local) {
if (ShowDescriptors) {
errs() << "DBA: skip pointer-to-local\n\n";
}
} else {
int index;
auto where = discriminant_map.find(key);
if (where == discriminant_map.end()) {
index = int(discriminant_map.size());
// Save the new unique idex for this discriminant.
discriminant_map[key] = index;
functions_used_by_discriminant.push_back(
SmallVector<Function *, 3>{&F});
} else {
index = where->second;
functions_used_by_discriminant[index].push_back(&F);
}
discriminants_list.back().index = index;
if (ShowDescriptors) {
outs() << F.getName() << " " << Arg.getName() << " -> index " << index
<< "\n";
}
}
arg_index++;
}
}
// Now map kernel arguments to descriptor sets and bindings.
// There are two buckets of descriptor sets:
// - The all_kernels_descriptor_set is for resources that are used
// by all kernels in the module.
// - Otherwise, each kernel gets is own descriptor set for its
// arguments that don't map to the same discriminant in *all*
// kernels. (It might map to a few, but not all.)
// The kUnallocated descriptor set value means "not yet allocated".
enum { kUnallocated = UINT_MAX };
unsigned all_kernels_descriptor_set = kUnallocated;
// Map the arg index to the binding to use in the all-descriptors descriptor
// set.
DenseMap<int, unsigned> all_kernels_binding_for_arg_index;
// Maps a function to the list of set and binding to use, per argument.
// For an argument that does not use a descriptor, its set and binding are
// both the kUnallocated value.
DenseMap<Function *, SmallVector<std::pair<unsigned, unsigned>, 3>>
set_and_binding_pairs_for_function;
// Determine set and binding for each kernel argument requiring a descriptor.
if (always_distinct_sets) {
for (Function *f_ptr : kernels_with_bodies) {
auto &set_and_binding_list = set_and_binding_pairs_for_function[f_ptr];
auto &discriminants_list = discriminants_used_by_function[f_ptr];
const auto set = clspv::TakeDescriptorIndex(&M);
unsigned binding = 0;
int arg_index = 0;
for (Argument &Arg : f_ptr->args()) {
set_and_binding_list.emplace_back(kUnallocated, kUnallocated);
if (discriminants_list[arg_index].index >= 0) {
if (clspv::GetArgKind(Arg) != clspv::ArgKind::PodPushConstant) {
// Don't assign a descriptor set to push constants.
set_and_binding_list.back().first = set;
}
set_and_binding_list.back().second = binding++;
}
arg_index++;
}
}
} else {
// Share resource variables.
for (Function *f_ptr : kernels_with_bodies) {
unsigned this_kernel_descriptor_set = kUnallocated;
unsigned this_kernel_next_binding = 0;
auto &discriminants_list = discriminants_used_by_function[f_ptr];
int arg_index = 0;
auto &set_and_binding_list = set_and_binding_pairs_for_function[f_ptr];
for (auto &info : discriminants_used_by_function[f_ptr]) {
set_and_binding_list.emplace_back(kUnallocated, kUnallocated);
if (discriminants_list[arg_index].index >= 0) {
// This argument will map to a resource.
unsigned set = kUnallocated;
unsigned binding = kUnallocated;
const bool is_push_constant_arg =
clspv::GetArgKind(*f_ptr->getArg(arg_index)) ==
clspv::ArgKind::PodPushConstant;
if (always_single_kernel_descriptor ||
functions_used_by_discriminant[info.index].size() ==
kernels_with_bodies.size() ||
is_push_constant_arg) {
// Reuse the descriptor because one of the following is true:
// - This kernel argument discriminant is consistent across all
// kernels.
// - Convention is to use a single descriptor for all kernels.
//
// Push constants args always take this path because they share a
// dummy descriptor, kUnallocated, that is never codegen'd.
if (!is_push_constant_arg) {
if (all_kernels_descriptor_set == kUnallocated) {
all_kernels_descriptor_set = clspv::TakeDescriptorIndex(&M);
}
set = all_kernels_descriptor_set;
}
auto where = all_kernels_binding_for_arg_index.find(arg_index);
if (where == all_kernels_binding_for_arg_index.end()) {
binding = all_kernels_binding_for_arg_index.size();
all_kernels_binding_for_arg_index[arg_index] = binding;
} else {
binding = where->second;
}
} else {
// Use a descriptor in the descriptor set dedicated to this
// kernel.
if (this_kernel_descriptor_set == kUnallocated) {
this_kernel_descriptor_set = clspv::TakeDescriptorIndex(&M);
}
set = this_kernel_descriptor_set;
binding = this_kernel_next_binding++;
}
set_and_binding_list.back().first = set;
set_and_binding_list.back().second = binding;
}
arg_index++;
}
}
}
// Rewrite the uses of the arguments.
IRBuilder<> Builder(M.getContext());
for (Function *f_ptr : kernels_with_bodies) {
auto &set_and_binding_list = set_and_binding_pairs_for_function[f_ptr];
auto &discriminants_list = discriminants_used_by_function[f_ptr];
const auto num_args = unsigned(set_and_binding_list.size());
if (!always_distinct_sets &&
(num_args != unsigned(discriminants_list.size()))) {
errs() << "num_args " << num_args << " != num discriminants "
<< discriminants_list.size() << "\n";
llvm_unreachable("Bad accounting in descriptor allocation");
}
const auto num_fun_args = unsigned(f_ptr->arg_end() - f_ptr->arg_begin());
if (num_fun_args != num_args) {
errs() << f_ptr->getName() << " has " << num_fun_args
<< " params but we have set_and_binding list of length "
<< num_args << "\n";
errs() << *f_ptr << "\n";
errs() << *(f_ptr->getType()) << "\n";
for (auto &arg : f_ptr->args()) {
errs() << " " << arg << "\n";
}
llvm_unreachable("Bad accounting in descriptor allocation. Mismatch with "
"function param list");
}
// Prepare to insert arg remapping instructions at the start of the
// function.
Builder.SetInsertPoint(f_ptr->getEntryBlock().getFirstNonPHI());
int arg_index = 0;
for (Argument &Arg : f_ptr->args()) {
if (discriminants_list[arg_index].index >= 0) {
Changed = true;
// This argument needs to be rewritten.
const auto set = set_and_binding_list[arg_index].first;
const auto binding = set_and_binding_list[arg_index].second;
#if 0
// TODO(dneto) Should we ignore unused arguments? It's probably not an
// issue in practice. Adding this condition would change a bunch of our
// tests.
if (!Arg.hasNUsesOrMore(1)) {
continue;
}
#endif
Type *argTy = discriminants_list[arg_index].discriminant.type;
assert(arg_index ==
discriminants_list[arg_index].discriminant.arg_index);
if (ShowDescriptors) {
outs() << "DBA: Function " << f_ptr->getName() << " arg " << arg_index
<< " type " << *argTy << "\n";
}
const auto arg_kind = clspv::GetArgKind(Arg);
Type *resource_type = nullptr;
unsigned addr_space = kUnallocated;
// TODO(dneto): Describe opaque case.
// For pointer-to-global and POD arguments, we will remap this
// kernel argument to a SPIR-V module-scope OpVariable, as follows:
//
// Create a %clspv.resource.var.<kind>.N function that returns
// the same kind of pointer that the OpVariable evaluates to.
// The first two arguments are the descriptor set and binding
// to use.
//
// For each call to a %clspv.resource.var.<kind>.N with a unique
// descriptor set and binding, the SPIRVProducer pass will:
// 1) Create a unique OpVariable
// 2) Map uses of the call to the function with the base pointer
// to use.
// For a storage buffer it's the the elements in the runtime
// array in the module-scope storage buffer variable.
// So it's something that maps to:
// OpAccessChain %ptr_to_elem %the-var %uint_0 %uint_0
// For POD data, its something like this:
// OpAccessChain %ptr_to_elem %the-var %uint_0
// 3) Generate no SPIR-V code for the call itself.
switch (arg_kind) {
case clspv::ArgKind::Buffer: {
// If original argument is:
// Elem addrspace(1)*
// Then make a zero-length array to mimic a StorageBuffer struct
// whose first element is a RuntimeArray:
//
// { [0 x Elem] }
//
// Use unnamed struct types so we generate less SPIR-V code.
// Create the type only once.
auto *arr_type = ArrayType::get(argTy->getPointerElementType(), 0);
resource_type = StructType::get(arr_type);
// Preserve the address space in case the pointer is passed into a
// helper function: we don't want to change the type of the helper
// function parameter.
addr_space = argTy->getPointerAddressSpace();
break;
}
case clspv::ArgKind::BufferUBO: {
// If original argument is:
// Elem addrspace(2)*
// Then make a n-element sized array to mimic an Uniform struct whose
// first element is an array:
//
// { [n x Elem] }
//
// Use unnamed struct types so we generate less SPIR-V code.
// Max UBO size can be specified on the command line. Size the array
// to pretend we are using that space.
uint64_t struct_size = M.getDataLayout().getTypeAllocSize(
argTy->getPointerElementType());
uint64_t num_elements =
clspv::Option::MaxUniformBufferSize() / struct_size;
// Create the type only once.
auto *arr_type =
ArrayType::get(argTy->getPointerElementType(), num_elements);
resource_type = StructType::get(arr_type);
// Preserve the address space in case the pointer is passed into a
// helper function: we don't want to change the type of the helper
// function parameter.
addr_space = argTy->getPointerAddressSpace();
break;
}
case clspv::ArgKind::Pod:
case clspv::ArgKind::PodUBO:
case clspv::ArgKind::PodPushConstant: {
// If original argument is:
// Elem %arg
// Then make a StorageBuffer struct whose element is pod-type:
//
// { Elem }
//
// Use unnamed struct types so we generate less SPIR-V code.
resource_type = StructType::get(argTy);
if (arg_kind == clspv::ArgKind::PodUBO)
addr_space = clspv::AddressSpace::Uniform;
else if (arg_kind == clspv::ArgKind::PodPushConstant)
addr_space = clspv::AddressSpace::PushConstant;
else
addr_space = clspv::AddressSpace::Global;
break;
}
case clspv::ArgKind::Sampler:
case clspv::ArgKind::SampledImage:
case clspv::ArgKind::StorageImage:
// We won't be translating the value here. Keep the type the same.
// since calls using these values need to keep the same type.
resource_type = argTy->getPointerElementType();
addr_space = argTy->getPointerAddressSpace();
break;
default:
errs() << "Unhandled type " << *argTy << "\n";
llvm_unreachable("Allocation of descriptors: Unhandled type");
}
assert(resource_type);
auto fn_name = clspv::ResourceAccessorFunction() + "." +
std::to_string(discriminants_list[arg_index].index);
Function *var_fn = M.getFunction(fn_name);
if (!var_fn) {
// Make the function
PointerType *ptrTy = PointerType::get(resource_type, addr_space);
// The parameters are:
// descriptor set
// binding
// arg kind
// arg index
// discriminant index
// coherent
Type *i32 = Builder.getInt32Ty();
FunctionType *fnTy =
FunctionType::get(ptrTy, {i32, i32, i32, i32, i32, i32}, false);
var_fn =
cast<Function>(M.getOrInsertFunction(fn_name, fnTy).getCallee());
}
// Replace uses of this argument with something dependent on a GEP
// into the the result of a call to the special builtin.
auto *set_arg = Builder.getInt32(set);
auto *binding_arg = Builder.getInt32(binding);
auto *arg_kind_arg = Builder.getInt32(unsigned(arg_kind));
auto *arg_index_arg = Builder.getInt32(arg_index);
auto *discriminant_index_arg =
Builder.getInt32(discriminants_list[arg_index].index);
auto *coherent_arg = Builder.getInt32(
discriminants_list[arg_index].discriminant.coherent);
auto *call = Builder.CreateCall(
var_fn, {set_arg, binding_arg, arg_kind_arg, arg_index_arg,
discriminant_index_arg, coherent_arg});
Value *replacement = nullptr;
Value *zero = Builder.getInt32(0);
switch (arg_kind) {
case clspv::ArgKind::Buffer:
case clspv::ArgKind::BufferUBO:
// Return a GEP to the first element
// in the runtime array we'll make.
replacement = Builder.CreateGEP(call, {zero, zero, zero});
break;
case clspv::ArgKind::Pod:
case clspv::ArgKind::PodUBO:
case clspv::ArgKind::PodPushConstant: {
// Replace with a load of the start of the (virtual) variable.
auto *gep = Builder.CreateGEP(call, {zero, zero});
replacement = Builder.CreateLoad(gep);
} break;
case clspv::ArgKind::SampledImage:
case clspv::ArgKind::StorageImage:
case clspv::ArgKind::Sampler: {
// The call returns a pointer to an opaque type. Eventually the
// SPIR-V will need to load the variable, so the natural thing would
// be to emit an LLVM load here. But LLVM does not allow a load of
// an opaque type because it's unsized. So keep the bare call here,
// and do the translation to a load in the SPIRVProducer pass.
replacement = call;
} break;
case clspv::ArgKind::Local:
llvm_unreachable("local is unhandled");
}
if (ShowDescriptors) {
outs() << "DBA: Map " << *argTy << " " << arg_index << "\n"
<< "DBA: index " << discriminants_list[arg_index].index
<< " -> (" << set << "," << binding << ")"
<< "\n";
outs() << "DBA: resource type " << *resource_type << "\n";
outs() << "DBA: var fn " << var_fn->getName() << "\n";
outs() << "DBA: var call " << *call << "\n";
outs() << "DBA: var replacement " << *replacement << "\n";
outs() << "DBA: var replacement ty " << *(replacement->getType())
<< "\n";
outs() << "\n\n";
}
Arg.replaceAllUsesWith(replacement);
}
arg_index++;
}
}
return Changed;
}
bool AllocateDescriptorsPass::AllocateLocalKernelArgSpecIds(Module &M) {
bool Changed = false;
if (ShowDescriptors) {
outs() << "Allocate local kernel arg spec ids\n";
}
// Maps argument type to assigned SpecIds.
DenseMap<Type *, SmallVector<uint32_t, 4>> spec_id_types;
// Tracks SpecIds assigned in the current function.
DenseSet<int> function_spec_ids;
// Tracks newly allocated spec ids.
std::vector<std::pair<Type *, uint32_t>> function_allocations;
// Allocates a SpecId for |type|.
auto GetSpecId = [&M, &spec_id_types, &function_spec_ids,
&function_allocations](Type *type) {
// Attempt to reuse a SpecId. If the SpecId is associated with the same type
// in another kernel and not yet assigned to this kernel it can be reused.
auto where = spec_id_types.find(type);
if (where != spec_id_types.end()) {
for (auto id : where->second) {
if (!function_spec_ids.count(id)) {
// Reuse |id| for |type| in this kernel. Record the use of |id| in
// this kernel.
function_allocations.emplace_back(type, id);
function_spec_ids.insert(id);
return id;
}
}
}
// Need to allocate a new SpecId.
uint32_t spec_id =
clspv::AllocateSpecConstant(&M, clspv::SpecConstant::kLocalMemorySize);
function_allocations.push_back(std::make_pair(type, spec_id));
function_spec_ids.insert(spec_id);
return spec_id;
};
IRBuilder<> Builder(M.getContext());
for (Function &F : M) {
// Only scan arguments of kernel functions that have bodies.
if (F.isDeclaration() || F.getCallingConv() != CallingConv::SPIR_KERNEL) {
continue;
}
// Prepare to insert arg remapping instructions at the start of the
// function.
Builder.SetInsertPoint(F.getEntryBlock().getFirstNonPHI());
function_allocations.clear();
function_spec_ids.clear();
int arg_index = 0;
for (Argument &Arg : F.args()) {
Type *argTy = Arg.getType();
const auto arg_kind = clspv::GetArgKind(Arg);
if (arg_kind == clspv::ArgKind::Local) {
// Assign a SpecId to this argument.
int spec_id = GetSpecId(Arg.getType());
if (ShowDescriptors) {
outs() << "DBA: " << F.getName() << " arg " << arg_index << " " << Arg
<< " allocated SpecId " << spec_id << "\n";
}
// The type returned by the accessor function is [ Elem x 0 ]
// addrspace(3)*. The zero-sized array is used to match the correct
// indexing required by gep's, but the zero size will eventually be
// codegen'd as an OpSpecConstant.
auto fn_name =
clspv::WorkgroupAccessorFunction() + "." + std::to_string(spec_id);
Function *var_fn = M.getFunction(fn_name);
auto *zero = Builder.getInt32(0);
auto *array_ty = ArrayType::get(argTy->getPointerElementType(), 0);
auto *ptr_ty =
PointerType::get(array_ty, argTy->getPointerAddressSpace());
if (!var_fn) {
// Generate the function.
Type *i32 = Builder.getInt32Ty();
FunctionType *fn_ty = FunctionType::get(ptr_ty, i32, false);
var_fn =
cast<Function>(M.getOrInsertFunction(fn_name, fn_ty).getCallee());
}
// Generate an accessor call.
auto *spec_id_arg = Builder.getInt32(spec_id);
auto *call = Builder.CreateCall(var_fn, {spec_id_arg});
// Add the correct gep. Since the workgroup variable is [ <type> x 0 ]
// addrspace(3)*, generate two zero indices for the gep.
auto *replacement = Builder.CreateGEP(call, {zero, zero});
Arg.replaceAllUsesWith(replacement);
// We record the assignment of the spec id for this particular argument
// in module-level metadata. This allows us to reconstruct the
// connection during SPIR-V generation. We cannot use the argument as an
// operand to the function because DirectResourceAccess will generate
// these calls in different function scopes potentially.
auto *arg_const = Builder.getInt32(arg_index);
NamedMDNode *nmd =
M.getOrInsertNamedMetadata(clspv::LocalSpecIdMetadataName());
Metadata *ops[3];
ops[0] = ValueAsMetadata::get(&F);
ops[1] = ConstantAsMetadata::get(arg_const);
ops[2] = ConstantAsMetadata::get(spec_id_arg);
MDTuple *tuple = MDTuple::get(M.getContext(), ops);
nmd->addOperand(tuple);
Changed = true;
}
++arg_index;
}
// Move newly allocated SpecIds for this function into the overall mapping.
for (auto &pair : function_allocations) {
spec_id_types[pair.first].push_back(pair.second);
}
}
return Changed;
}
bool AllocateDescriptorsPass::CallTreeContainsGlobalBarrier(Function *F) {
auto iter = barrier_map_.find(F);
if (iter != barrier_map_.end()) {
return iter->second;
}
bool uses_barrier = false;
for (auto &BB : *F) {
for (auto &I : BB) {
if (auto *call = dyn_cast<CallInst>(&I)) {
// For barrier and mem_fence semantics, only Uniform (covering Uniform
// and StorageBuffer storage classes) and Image semantics are checked
// because Workgroup variables are inherently coherent (and do not
// require the decoration).
auto &func_info = clspv::Builtins::Lookup(call->getCalledFunction());
if (func_info.getType() == clspv::Builtins::kSpirvOp) {
auto *arg0 = dyn_cast<ConstantInt>(call->getArgOperand(0));
spv::Op opcode = static_cast<spv::Op>(arg0->getZExtValue());
if (opcode == spv::OpControlBarrier) {
// barrier()
if (auto *semantics = dyn_cast<ConstantInt>(call->getOperand(3))) {
uses_barrier =
(semantics->getZExtValue() & kMemorySemanticsUniformMemory) ||
(semantics->getZExtValue() & kMemorySemanticsImageMemory);
}
} else if (opcode == spv::OpMemoryBarrier) {
// mem_fence()
if (auto *semantics = dyn_cast<ConstantInt>(call->getOperand(2))) {
uses_barrier =
(semantics->getZExtValue() & kMemorySemanticsUniformMemory) ||
(semantics->getZExtValue() & kMemorySemanticsImageMemory);
}
}
} else if (!call->getCalledFunction()->isDeclaration()) {
// Continue searching in the subfunction.
uses_barrier =
CallTreeContainsGlobalBarrier(call->getCalledFunction());
}
if (uses_barrier)
break;
}
if (uses_barrier)
break;
}
if (uses_barrier)
break;
}
barrier_map_.insert(std::make_pair(F, uses_barrier));
return uses_barrier;
}
std::pair<bool, bool> AllocateDescriptorsPass::HasReadsAndWrites(Value *V) {
// Atomics and OpenCL builtins modf and frexp are all represented as function
// calls.
//
// A user is interesting if reads or writes memory or could eventually read
// or write memory.
auto IsInterestingUser = [](const User *user) {
if (isa<StoreInst>(user) || isa<LoadInst>(user) || isa<CallInst>(user) ||
user->getType()->isPointerTy())
return true;
return false;
};
bool read = false;
bool write = false;
DenseSet<Value *> visited;
std::vector<std::pair<Value *, unsigned>> stack;
for (auto &Use : V->uses()) {
if (IsInterestingUser(Use.getUser()))
stack.push_back(std::make_pair(Use.getUser(), Use.getOperandNo()));
}
while (!stack.empty() && !(read && write)) {
Value *value = stack.back().first;
unsigned operand_no = stack.back().second;
stack.pop_back();
if (!visited.insert(value).second)
continue;
if (isa<LoadInst>(value)) {
read = true;
} else if (isa<StoreInst>(value)) {
write = true;
} else {
auto *call = dyn_cast<CallInst>(value);
if (call && !call->getCalledFunction()->isDeclaration()) {
// Trace through the function call and grab the right argument.
auto arg_iter = call->getCalledFunction()->arg_begin();
for (size_t i = 0; i != operand_no; ++i, ++arg_iter) {
}
for (auto &Use : arg_iter->uses()) {
auto *User = Use.getUser();
if (IsInterestingUser(User))
stack.push_back(std::make_pair(Use.getUser(), Use.getOperandNo()));
}
} else if (call) {
auto func_info = clspv::Builtins::Lookup(call->getCalledFunction());
// Note that image queries (e.g. get_image_width()) do not touch the
// actual image memory.
switch (func_info.getType()) {
case clspv::Builtins::kReadImagef:
case clspv::Builtins::kReadImagei:
case clspv::Builtins::kReadImageui:
case clspv::Builtins::kReadImageh:
read = true;
break;
case clspv::Builtins::kWriteImagef:
case clspv::Builtins::kWriteImagei:
case clspv::Builtins::kWriteImageui:
case clspv::Builtins::kWriteImageh:
write = true;
break;
case clspv::Builtins::kGetImageWidth:
case clspv::Builtins::kGetImageHeight:
case clspv::Builtins::kGetImageDepth:
case clspv::Builtins::kGetImageDim:
break;
default:
// For other calls, check the function attributes.
if (!call->getCalledFunction()->doesNotAccessMemory()) {
if (!call->getCalledFunction()->doesNotReadMemory())
read = true;
if (!call->getCalledFunction()->onlyReadsMemory())
write = true;
}
break;
}
} else {
// Trace uses that remain a pointer or a function calls.
for (auto &U : value->uses()) {
auto *User = U.getUser();
if (IsInterestingUser(User))
stack.push_back(std::make_pair(U.getUser(), U.getOperandNo()));
}
}
}
}
return std::make_pair(read, write);
}