blob: 35929af95e5783e72dc234e009b73ff807663411 [file] [log] [blame]
//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This provides a class for OpenMP runtime code generation.
//
//===----------------------------------------------------------------------===//
#include "CGCXXABI.h"
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
using namespace clang;
using namespace CodeGen;
namespace {
/// \brief Base class for handling code generation inside OpenMP regions.
class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
public:
/// \brief Kinds of OpenMP regions used in codegen.
enum CGOpenMPRegionKind {
/// \brief Region with outlined function for standalone 'parallel'
/// directive.
ParallelOutlinedRegion,
/// \brief Region with outlined function for standalone 'task' directive.
TaskOutlinedRegion,
/// \brief Region for constructs that do not require function outlining,
/// like 'for', 'sections', 'atomic' etc. directives.
InlinedRegion,
/// \brief Region with outlined function for standalone 'target' directive.
TargetRegion,
};
CGOpenMPRegionInfo(const CapturedStmt &CS,
const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
bool HasCancel)
: CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
Kind(Kind), HasCancel(HasCancel) {}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
virtual const VarDecl *getThreadIDVariable() const = 0;
/// \brief Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
/// \brief Get an LValue for the current ThreadID variable.
/// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
bool hasCancel() const { return HasCancel; }
static bool classof(const CGCapturedStmtInfo *Info) {
return Info->getKind() == CR_OpenMP;
}
~CGOpenMPRegionInfo() override = default;
protected:
CGOpenMPRegionKind RegionKind;
RegionCodeGenTy CodeGen;
OpenMPDirectiveKind Kind;
bool HasCancel;
};
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
StringRef HelperName)
: CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
HasCancel),
ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
ParallelOutlinedRegion;
}
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
StringRef HelperName;
};
/// \brief API for captured statement code generation in OpenMP constructs.
class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
class UntiedTaskActionTy final : public PrePostActionTy {
bool Untied;
const VarDecl *PartIDVar;
const RegionCodeGenTy UntiedCodeGen;
llvm::SwitchInst *UntiedSwitch = nullptr;
public:
UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
const RegionCodeGenTy &UntiedCodeGen)
: Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
void Enter(CodeGenFunction &CGF) override {
if (Untied) {
// Emit task switching point.
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
auto *DoneBB = CGF.createBasicBlock(".untied.done.");
UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
CGF.EmitBlock(DoneBB);
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(0),
CGF.Builder.GetInsertBlock());
emitUntiedSwitch(CGF);
}
}
void emitUntiedSwitch(CodeGenFunction &CGF) const {
if (Untied) {
auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(PartIDVar),
PartIDVar->getType()->castAs<PointerType>());
CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
PartIdLVal);
UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next.");
CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock());
CGF.EmitBranchThroughCleanup(CurPoint);
CGF.EmitBlock(CurPoint.getBlock());
}
}
unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
};
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel,
const UntiedTaskActionTy &Action)
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
ThreadIDVar(ThreadIDVar), Action(Action) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
/// \brief Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
void emitUntiedSwitch(CodeGenFunction &CGF) override {
Action.emitUntiedSwitch(CGF);
}
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
TaskOutlinedRegion;
}
private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
/// Action for emitting code for untied tasks.
const UntiedTaskActionTy &Action;
};
/// \brief API for inlined captured statement code generation in OpenMP
/// constructs.
class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
public:
CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
OldCSI(OldCSI),
OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
// \brief Retrieve the value of the context parameter.
llvm::Value *getContextValue() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getContextValue();
llvm_unreachable("No context value for inlined OpenMP region");
}
void setContextValue(llvm::Value *V) override {
if (OuterRegionInfo) {
OuterRegionInfo->setContextValue(V);
return;
}
llvm_unreachable("No context value for inlined OpenMP region");
}
/// \brief Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
// If there is no outer outlined region,no need to lookup in a list of
// captured variables, we can use the original one.
return nullptr;
}
FieldDecl *getThisFieldDecl() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThisFieldDecl();
return nullptr;
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariable();
return nullptr;
}
/// \brief Get an LValue for the current ThreadID variable.
LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
return OuterRegionInfo->getThreadIDVariableLValue(CGF);
llvm_unreachable("No LValue for inlined OpenMP construct");
}
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override {
if (auto *OuterRegionInfo = getOldCSI())
return OuterRegionInfo->getHelperName();
llvm_unreachable("No helper name for inlined OpenMP construct");
}
void emitUntiedSwitch(CodeGenFunction &CGF) override {
if (OuterRegionInfo)
OuterRegionInfo->emitUntiedSwitch(CGF);
}
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
}
~CGOpenMPInlinedRegionInfo() override = default;
private:
/// \brief CodeGen info about outer OpenMP region.
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
CGOpenMPRegionInfo *OuterRegionInfo;
};
/// \brief API for captured statement code generation in OpenMP target
/// constructs. For this captures, implicit parameters are used instead of the
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
/// the information to generate that.
class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
const RegionCodeGenTy &CodeGen, StringRef HelperName)
: CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
/*HasCancel=*/false),
HelperName(HelperName) {}
/// \brief This is unused for target regions because each starts executing
/// with a single thread.
const VarDecl *getThreadIDVariable() const override { return nullptr; }
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return HelperName; }
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
}
private:
StringRef HelperName;
};
static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
llvm_unreachable("No codegen for expressions");
}
/// \brief API for generation of expressions captured in a innermost OpenMP
/// region.
class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
public:
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
OMPD_unknown,
/*HasCancel=*/false),
PrivScope(CGF) {
// Make sure the globals captured in the provided statement are local by
// using the privatization logic. We assume the same variable is not
// captured more than once.
for (auto &C : CS.captures()) {
if (!C.capturesVariable() && !C.capturesVariableByCopy())
continue;
const VarDecl *VD = C.getCapturedVar();
if (VD->isLocalVarDeclOrParm())
continue;
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
SourceLocation());
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
return CGF.EmitLValue(&DRE).getAddress();
});
}
(void)PrivScope.Privatize();
}
/// \brief Lookup the captured field decl for a variable.
const FieldDecl *lookup(const VarDecl *VD) const override {
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
return FD;
return nullptr;
}
/// \brief Emit the captured statement body.
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
llvm_unreachable("No body for expressions");
}
/// \brief Get a variable or parameter for storing global thread id
/// inside OpenMP construct.
const VarDecl *getThreadIDVariable() const override {
llvm_unreachable("No thread id for expressions");
}
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override {
llvm_unreachable("No helper name for expressions");
}
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
private:
/// Private scope to capture global variables.
CodeGenFunction::OMPPrivateScope PrivScope;
};
/// \brief RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
public:
/// \brief Constructs region for combined constructs.
/// \param CodeGen Code generation sequence for combined directives. Includes
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGF(CGF) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
}
~InlinedOpenMPRegionRAII() {
// Restore original CapturedStmtInfo only if we're done with code emission.
auto *OldCSI =
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = OldCSI;
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
}
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
enum OpenMPLocationFlags : unsigned {
/// \brief Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
/// \brief Use c-style ident structure.
OMP_IDENT_KMPC = 0x02,
/// \brief Atomic reduction option for kmpc_reduce.
OMP_ATOMIC_REDUCE = 0x10,
/// \brief Explicit 'barrier' directive.
OMP_IDENT_BARRIER_EXPL = 0x20,
/// \brief Implicit barrier in code.
OMP_IDENT_BARRIER_IMPL = 0x40,
/// \brief Implicit barrier in 'for' directive.
OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
/// \brief Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
/// \brief Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
/// Call of __kmp_for_static_init for static loop.
OMP_IDENT_WORK_LOOP = 0x200,
/// Call of __kmp_for_static_init for sections.
OMP_IDENT_WORK_SECTIONS = 0x400,
/// Call of __kmp_for_static_init for distribute.
OMP_IDENT_WORK_DISTRIBUTE = 0x800,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
/// \brief Describes ident structure that describes a source location.
/// All descriptions are taken from
/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
/// Original structure:
/// typedef struct ident {
/// kmp_int32 reserved_1; /**< might be used in Fortran;
/// see above */
/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
/// KMP_IDENT_KMPC identifies this union
/// member */
/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
/// see above */
///#if USE_ITT_BUILD
/// /* but currently used for storing
/// region-specific ITT */
/// /* contextual information. */
///#endif /* USE_ITT_BUILD */
/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
/// C++ */
/// char const *psource; /**< String describing the source location.
/// The string is composed of semi-colon separated
// fields which describe the source file,
/// the function and a pair of line numbers that
/// delimit the construct.
/// */
/// } ident_t;
enum IdentFieldIndex {
/// \brief might be used in Fortran
IdentField_Reserved_1,
/// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
IdentField_Flags,
/// \brief Not really used in Fortran any more
IdentField_Reserved_2,
/// \brief Source[4] in Fortran, do not use for C++
IdentField_Reserved_3,
/// \brief String describing the source location. The string is composed of
/// semi-colon separated fields which describe the source file, the function
/// and a pair of line numbers that delimit the construct.
IdentField_PSource
};
/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
/// the enum sched_type in kmp.h).
enum OpenMPSchedType {
/// \brief Lower bound for default (unordered) versions.
OMP_sch_lower = 32,
OMP_sch_static_chunked = 33,
OMP_sch_static = 34,
OMP_sch_dynamic_chunked = 35,
OMP_sch_guided_chunked = 36,
OMP_sch_runtime = 37,
OMP_sch_auto = 38,
/// static with chunk adjustment (e.g., simd)
OMP_sch_static_balanced_chunked = 45,
/// \brief Lower bound for 'ordered' versions.
OMP_ord_lower = 64,
OMP_ord_static_chunked = 65,
OMP_ord_static = 66,
OMP_ord_dynamic_chunked = 67,
OMP_ord_guided_chunked = 68,
OMP_ord_runtime = 69,
OMP_ord_auto = 70,
OMP_sch_default = OMP_sch_static,
/// \brief dist_schedule types
OMP_dist_sch_static_chunked = 91,
OMP_dist_sch_static = 92,
/// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
/// Set if the monotonic schedule modifier was present.
OMP_sch_modifier_monotonic = (1 << 29),
/// Set if the nonmonotonic schedule modifier was present.
OMP_sch_modifier_nonmonotonic = (1 << 30),
};
enum OpenMPRTLFunction {
/// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
/// kmpc_micro microtask, ...);
OMPRTL__kmpc_fork_call,
/// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
/// kmp_int32 global_tid, void *data, size_t size, void ***cache);
OMPRTL__kmpc_threadprivate_cached,
/// \brief Call to void __kmpc_threadprivate_register( ident_t *,
/// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
OMPRTL__kmpc_threadprivate_register,
// Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
OMPRTL__kmpc_global_thread_num,
// Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
OMPRTL__kmpc_critical,
// Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
// global_tid, kmp_critical_name *crit, uintptr_t hint);
OMPRTL__kmpc_critical_with_hint,
// Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
OMPRTL__kmpc_end_critical,
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_cancel_barrier,
// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_barrier,
// Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_for_static_fini,
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_serialized_parallel,
// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_end_serialized_parallel,
// Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads);
OMPRTL__kmpc_push_num_threads,
// Call to void __kmpc_flush(ident_t *loc);
OMPRTL__kmpc_flush,
// Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_master,
// Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_master,
// Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
// int end_part);
OMPRTL__kmpc_omp_taskyield,
// Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_single,
// Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
OMPRTL__kmpc_end_single,
// Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
OMPRTL__kmpc_omp_task_alloc,
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
// new_task);
OMPRTL__kmpc_omp_task,
// Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
// kmp_int32 didit);
OMPRTL__kmpc_copyprivate,
// Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
OMPRTL__kmpc_reduce,
// Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
// *lck);
OMPRTL__kmpc_reduce_nowait,
// Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce,
// Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
OMPRTL__kmpc_end_reduce_nowait,
// Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
// kmp_task_t * new_task);
OMPRTL__kmpc_omp_task_begin_if0,
// Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
// kmp_task_t * new_task);
OMPRTL__kmpc_omp_task_complete_if0,
// Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_ordered,
// Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_end_ordered,
// Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_omp_taskwait,
// Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_taskgroup,
// Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_end_taskgroup,
// Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
// int proc_bind);
OMPRTL__kmpc_push_proc_bind,
// Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
// gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
// *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
OMPRTL__kmpc_omp_task_with_deps,
// Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
// gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
OMPRTL__kmpc_omp_wait_deps,
// Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
// global_tid, kmp_int32 cncl_kind);
OMPRTL__kmpc_cancellationpoint,
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind);
OMPRTL__kmpc_cancel,
// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_teams, kmp_int32 thread_limit);
OMPRTL__kmpc_push_num_teams,
// Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
OMPRTL__kmpc_fork_teams,
// Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
// sched, kmp_uint64 grainsize, void *task_dup);
OMPRTL__kmpc_taskloop,
// Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
// num_dims, struct kmp_dim *dims);
OMPRTL__kmpc_doacross_init,
// Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
OMPRTL__kmpc_doacross_fini,
// Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_post,
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_wait,
// Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
// *data);
OMPRTL__kmpc_task_reduction_init,
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
//
// Offloading related calls
//
// Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
OMPRTL__tgt_target,
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
OMPRTL__tgt_target_teams,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_unregister_lib,
// Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_begin,
// Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_end,
// Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
OMPRTL__tgt_target_data_update,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
/// region.
class CleanupTy final : public EHScopeStack::Cleanup {
PrePostActionTy *Action;
public:
explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
if (!CGF.HaveInsertPoint())
return;
Action->Exit(CGF);
}
};
} // anonymous namespace
void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
CodeGenFunction::RunCleanupsScope Scope(CGF);
if (PrePostAction) {
CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
Callback(CodeGen, CGF, *PrePostAction);
} else {
PrePostActionTy Action;
Callback(CodeGen, CGF, Action);
}
}
/// Check if the combiner is a call to UDR combiner and if it is so return the
/// UDR decl used for reduction.
static const OMPDeclareReductionDecl *
getReductionInit(const Expr *ReductionOp) {
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
if (auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
return DRD;
return nullptr;
}
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
const OMPDeclareReductionDecl *DRD,
const Expr *InitOp,
Address Private, Address Original,
QualType Ty) {
if (DRD->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
auto *CE = cast<CallExpr>(InitOp);
auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
[=]() -> Address { return Private; });
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
[=]() -> Address { return Original; });
(void)PrivateScope.Privatize();
RValue Func = RValue::get(Reduction.second);
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
CGF.EmitIgnoredExpr(InitOp);
} else {
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
auto *GV = new llvm::GlobalVariable(
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage, Init, ".init");
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
RValue InitRVal;
switch (CGF.getEvaluationKind(Ty)) {
case TEK_Scalar:
InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
break;
case TEK_Complex:
InitRVal =
RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
break;
case TEK_Aggregate:
InitRVal = RValue::getAggregate(LV.getAddress());
break;
}
OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
}
}
/// \brief Emit initialization of arrays of complex types.
/// \param DestAddr Address of the array.
/// \param Type Type of array.
/// \param Init Initial expression of array.
/// \param SrcAddr Address of the original array.
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType Type, bool EmitDeclareReductionInit,
const Expr *Init,
const OMPDeclareReductionDecl *DRD,
Address SrcAddr = Address::invalid()) {
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
auto ArrayTy = Type->getAsArrayTypeUnsafe();
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
DestAddr =
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
if (DRD)
SrcAddr =
CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
llvm::Value *SrcBegin = nullptr;
if (DRD)
SrcBegin = SrcAddr.getPointer();
auto DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
// The basic structure here is a while-do loop.
auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
auto IsEmpty =
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
auto EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI = nullptr;
Address SrcElementCurrent = Address::invalid();
if (DRD) {
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
}
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
Address(DestElementPHI,
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
{
CodeGenFunction::RunCleanupsScope InitScope(CGF);
if (EmitDeclareReductionInit) {
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
SrcElementCurrent, ElementTy);
} else
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
/*IsInitializer=*/false);
}
if (DRD) {
// Shift the address forward by one element.
auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
// Check whether we've reached the end.
auto Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
// Done.
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
return CGF.EmitOMPSharedLValue(E);
}
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
const Expr *E) {
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
return LValue();
}
void ReductionCodeGen::emitAggregateInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
const OMPDeclareReductionDecl *DRD) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
bool EmitDeclareReductionInit =
DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
EmitDeclareReductionInit,
EmitDeclareReductionInit ? ClausesData[N].ReductionOp
: PrivateVD->getInit(),
DRD, SharedLVal.getAddress());
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps) {
ClausesData.reserve(Shareds.size());
SharedAddresses.reserve(Shareds.size());
Sizes.reserve(Shareds.size());
BaseDecls.reserve(Shareds.size());
auto IPriv = Privates.begin();
auto IRed = ReductionOps.begin();
for (const auto *Ref : Shareds) {
ClausesData.emplace_back(Ref, *IPriv, *IRed);
std::advance(IPriv, 1);
std::advance(IRed, 1);
}
}
void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
assert(SharedAddresses.size() == N &&
"Number of generated lvalues must be exactly N.");
LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
SharedAddresses.emplace_back(First, Second);
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
llvm::Type *ElemType =
cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
SharedAddresses[N].first.getPointer());
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
SizeInChars = CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
if (!PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.");
return;
}
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
assert(SharedAddresses.size() > N && "No variable was generated");
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
QualType PrivateType = PrivateVD->getType();
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
QualType SharedType = SharedAddresses[N].first.getType();
SharedLVal = CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
CGF.ConvertTypeForMem(SharedType)),
SharedType, SharedAddresses[N].first.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
PrivateAddr, SharedLVal.getAddress(),
SharedLVal.getType());
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
PrivateVD->getType().getQualifiers(),
/*IsInitializer=*/false);
}
}
bool ReductionCodeGen::needCleanups(unsigned N) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
return DTorKind != QualType::DK_none;
}
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
if (needCleanups(N)) {
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
}
}
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
LValue BaseLV) {
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (auto *PtrTy = BaseTy->getAs<PointerType>())
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
else {
LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
CGF.ConvertTypeForMem(ElTy)),
BaseLV.getType(), BaseLV.getBaseInfo(),
CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
}
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
llvm::Value *Addr) {
Address Tmp = Address::invalid();
Address TopTmp = Address::invalid();
Address MostTopTmp = Address::invalid();
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
Tmp = CGF.CreateMemTemp(BaseTy);
if (TopTmp.isValid())
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
else
MostTopTmp = Tmp;
TopTmp = Tmp;
BaseTy = BaseTy->getPointeeType();
}
llvm::Type *Ty = BaseLVType;
if (Tmp.isValid())
Ty = Tmp.getElementType();
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
if (Tmp.isValid()) {
CGF.Builder.CreateStore(Addr, Tmp);
return MostTopTmp;
}
return Address(Addr, BaseLVAlignment);
}
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
const DeclRefExpr *DE;
const VarDecl *OrigVD = nullptr;
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
auto *Base = OASE->getBase()->IgnoreParenImpCasts();
while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
Base = TempOASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
auto *Base = ASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
}
if (OrigVD) {
BaseDecls.emplace_back(OrigVD);
auto OriginalBaseLValue = CGF.EmitLValue(DE);
LValue BaseLValue =
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
llvm::Value *Ptr =
CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
OriginalBaseLValue.getPointer()->getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
return PrivateAddr;
}
bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
return DRD && DRD->getInitializer();
}
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType()->castAs<PointerType>());
}
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
if (!CGF.HaveInsertPoint())
return;
// 1.2.2 OpenMP Language Terminology
// Structured block - An executable statement with a single entry at the
// top and a single exit at the bottom.
// The point of exit cannot be a branch out of the structured block.
// longjmp() and throw() must not violate the entry/exit criteria.
CGF.EHStack.pushTerminate();
CodeGen(CGF);
CGF.EHStack.popTerminate();
}
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
AlignmentSource::Decl);
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
: CGM(CGM), OffloadEntriesInfoManager(CGM) {
IdentTy = llvm::StructType::create(
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
CGM.Int8PtrTy /* psource */);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
loadOffloadInfoMetadata();
}
void CGOpenMPRuntime::clear() {
InternalVars.clear();
}
static llvm::Function *
emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
const Expr *CombinerInitializer, const VarDecl *In,
const VarDecl *Out, bool IsCombiner) {
// void .omp_combiner.(Ty *in, Ty *out);
auto &C = CGM.getContext();
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
/*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
auto &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
auto *Fn = llvm::Function::Create(
FnTy, llvm::GlobalValue::InternalLinkage,
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
Fn->removeFnAttr(llvm::Attribute::NoInline);
Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
CodeGenFunction::OMPPrivateScope Scope(CGF);
Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
.getAddress();
});
Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
.getAddress();
});
(void)Scope.Privatize();
if (!IsCombiner && Out->hasInit() &&
!CGF.isTrivialInitializer(Out->getInit())) {
CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
Out->getType().getQualifiers(),
/*IsInitializer=*/true);
}
if (CombinerInitializer)
CGF.EmitIgnoredExpr(CombinerInitializer);
Scope.ForceCleanup();
CGF.FinishFunction();
return Fn;
}
void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
auto &C = CGM.getContext();
if (!In || !Out) {
In = &C.Idents.get("omp_in");
Out = &C.Idents.get("omp_out");
}
llvm::Function *Combiner = emitCombinerOrInitializer(
CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
cast<VarDecl>(D->lookup(Out).front()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
if (auto *Init = D->getInitializer()) {
if (!Priv || !Orig) {
Priv = &C.Idents.get("omp_priv");
Orig = &C.Idents.get("omp_orig");
}
Initializer = emitCombinerOrInitializer(
CGM, D->getType(),
D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
: nullptr,
cast<VarDecl>(D->lookup(Orig).front()),
cast<VarDecl>(D->lookup(Priv).front()),
/*IsCombiner=*/false);
}
UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
if (CGF) {
auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
Decls.second.push_back(D);
}
}
std::pair<llvm::Function *, llvm::Function *>
CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
auto I = UDRMap.find(D);
if (I != UDRMap.end())
return I->second;
emitUserDefinedReduction(/*CGF=*/nullptr, D);
return UDRMap.lookup(D);
}
// Layout information for ident_t.
static CharUnits getIdentAlign(CodeGenModule &CGM) {
return CGM.getPointerAlign();
}
static CharUnits getIdentSize(CodeGenModule &CGM) {
assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
return CharUnits::fromQuantity(16) + CGM.getPointerSize();
}
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
// All the fields except the last are i32, so this works beautifully.
return unsigned(Field) * CharUnits::fromQuantity(4);
}
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
IdentFieldIndex Field,
const llvm::Twine &Name = "") {
auto Offset = getOffsetOfIdentField(Field);
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}
static llvm::Value *emitParallelOrTeamsOutlinedFunction(
CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
HasCancel = OPD->hasCancel();
else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
HasCancel = OPSD->hasCancel();
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
return emitParallelOrTeamsOutlinedFunction(
CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
}
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
auto *ThreadID = getThreadID(CGF, D.getLocStart());
auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
TaskTVar->getType()->castAs<PointerType>())
.getPointer()};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
};
CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
UntiedCodeGen);
CodeGen.setAction(Action);
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
auto *TD = dyn_cast<OMPTaskDirective>(&D);
CodeGenFunction CGF(CGM, true);
CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
InnermostKind,
TD ? TD->hasCancel() : false, Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
if (!Tied)
NumberOfParts = Action.getNumberOfParts();
return Res;
}
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
CharUnits Align = getIdentAlign(CGM);
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
if (!Entry) {
if (!DefaultOpenMPPSource) {
// Initialize default location for psource field of ident_t structure of
// all ident_t objects. Format is ";file;function;line;column;;".
// Taken from
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
DefaultOpenMPPSource =
CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
DefaultOpenMPPSource =
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
ConstantInitBuilder builder(CGM);
auto fields = builder.beginStruct(IdentTy);
fields.addInt(CGM.Int32Ty, 0);
fields.addInt(CGM.Int32Ty, Flags);
fields.addInt(CGM.Int32Ty, 0);
fields.addInt(CGM.Int32Ty, 0);
fields.add(DefaultOpenMPPSource);
auto DefaultOpenMPLocation =
fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
llvm::GlobalValue::PrivateLinkage);
DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
}
return Address(Entry, Align);
}
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
Flags |= OMP_IDENT_KMPC;
// If no debug info is generated - return global default location.
if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
Loc.isInvalid())
return getOrCreateDefaultLocation(Flags).getPointer();
assert(CGF.CurFn && "No function in current CodeGenFunction.");
Address LocValue = Address::invalid();
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
// GetOpenMPThreadID was called before this routine.
if (!LocValue.isValid()) {
// Generate "ident_t .kmpc_loc.addr;"
Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
".kmpc_loc.addr");
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGM.getSize(getIdentSize(CGF.CGM)));
}
// char **psource = &.kmpc_loc_<flags>.addr.psource;
Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
if (OMPDebugLoc == nullptr) {
SmallString<128> Buffer2;
llvm::raw_svector_ostream OS2(Buffer2);
// Build debug location
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
OS2 << ";" << PLoc.getFilename() << ";";
if (const FunctionDecl *FD =
dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
OS2 << FD->getQualifiedNameAsString();
}
OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
}
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
// Our callers always pass this to a runtime function, so for
// convenience, go ahead and return a naked pointer.
return LocValue.getPointer();
}
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
SourceLocation Loc) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *ThreadID = nullptr;
// Check whether we've already cached a load of the thread id in this
// function.
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end()) {
ThreadID = I->second.ThreadID;
if (ThreadID != nullptr)
return ThreadID;
}
// If exceptions are enabled, do not use parameter to avoid possible crash.
if (!CGF.getInvokeDest()) {
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
if (OMPRegionInfo->getThreadIDVariable()) {
// Check if this an outlined function with thread id passed as argument.
auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
// If value loaded in entry block, cache it and use it everywhere in
// function.
if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
}
return ThreadID;
}
}
}
// This is not an outlined function region - need to call __kmpc_int32
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
ThreadID =
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = ThreadID;
return ThreadID;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
if (OpenMPLocThreadIDMap.count(CGF.CurFn))
OpenMPLocThreadIDMap.erase(CGF.CurFn);
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn]) {
UDRMap.erase(D);
}
FunctionUDRMap.erase(CGF.CurFn);
}
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
if (!IdentTy) {
}
return llvm::PointerType::getUnqual(IdentTy);
}
llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
if (!Kmpc_MicroTy) {
// Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
llvm::PointerType::getUnqual(CGM.Int32Ty)};
Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
}
return llvm::PointerType::getUnqual(Kmpc_MicroTy);
}
llvm::Constant *
CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
llvm::Constant *RTLFn = nullptr;
switch (static_cast<OpenMPRTLFunction>(Function)) {
case OMPRTL__kmpc_fork_call: {
// Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
break;
}
case OMPRTL__kmpc_global_thread_num: {
// Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
break;
}
case OMPRTL__kmpc_threadprivate_cached: {
// Build void *__kmpc_threadprivate_cached(ident_t *loc,
// kmp_int32 global_tid, void *data, size_t size, void ***cache);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy, CGM.SizeTy,
CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
break;
}
case OMPRTL__kmpc_critical: {
// Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
break;
}
case OMPRTL__kmpc_critical_with_hint: {
// Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit, uintptr_t hint);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy),
CGM.IntPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
break;
}
case OMPRTL__kmpc_threadprivate_register: {
// Build void __kmpc_threadprivate_register(ident_t *, void *data,
// kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
// typedef void *(*kmpc_ctor)(void *);
auto KmpcCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg*/ false)->getPointerTo();
// typedef void *(*kmpc_cctor)(void *, void *);
llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto KmpcCopyCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
/*isVarArg*/ false)->getPointerTo();
// typedef void (*kmpc_dtor)(void *);
auto KmpcDtorTy =
llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
->getPointerTo();
llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
KmpcCopyCtorTy, KmpcDtorTy};
auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
/*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
break;
}
case OMPRTL__kmpc_end_critical: {
// Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *crit);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
break;
}
case OMPRTL__kmpc_cancel_barrier: {
// Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
break;
}
case OMPRTL__kmpc_barrier: {
// Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
break;
}
case OMPRTL__kmpc_for_static_fini: {
// Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
break;
}
case OMPRTL__kmpc_push_num_threads: {
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
break;
}
case OMPRTL__kmpc_serialized_parallel: {
// Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
break;
}
case OMPRTL__kmpc_end_serialized_parallel: {
// Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
break;
}
case OMPRTL__kmpc_flush: {
// Build void __kmpc_flush(ident_t *loc);
llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
break;
}
case OMPRTL__kmpc_master: {
// Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
break;
}
case OMPRTL__kmpc_end_master: {
// Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
break;
}
case OMPRTL__kmpc_omp_taskyield: {
// Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
// int end_part);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
break;
}
case OMPRTL__kmpc_single: {
// Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
break;
}
case OMPRTL__kmpc_end_single: {
// Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
break;
}
case OMPRTL__kmpc_omp_task_alloc: {
// Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
assert(KmpRoutineEntryPtrTy != nullptr &&
"Type kmp_routine_entry_t must be created.");
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
// Return void * and then cast to particular kmp_task_t type.
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
break;
}
case OMPRTL__kmpc_omp_task: {
// Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
break;
}
case OMPRTL__kmpc_copyprivate: {
// Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
// size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
// kmp_int32 didit);
llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *CpyFnTy =
llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
break;
}
case OMPRTL__kmpc_reduce: {
// Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
// (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
break;
}
case OMPRTL__kmpc_reduce_nowait: {
// Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
// global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
// void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
// *lck);
llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
/*isVarArg=*/false);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
break;
}
case OMPRTL__kmpc_end_reduce: {
// Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
break;
}
case OMPRTL__kmpc_end_reduce_nowait: {
// Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
// kmp_critical_name *lck);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty,
llvm::PointerType::getUnqual(KmpCriticalNameTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
break;
}
case OMPRTL__kmpc_omp_task_begin_if0: {
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
break;
}
case OMPRTL__kmpc_omp_task_complete_if0: {
// Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
// *new_task);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy,
/*Name=*/"__kmpc_omp_task_complete_if0");
break;
}
case OMPRTL__kmpc_ordered: {
// Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
break;
}
case OMPRTL__kmpc_end_ordered: {
// Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
break;
}
case OMPRTL__kmpc_omp_taskwait: {
// Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
break;
}
case OMPRTL__kmpc_taskgroup: {
// Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
break;
}
case OMPRTL__kmpc_end_taskgroup: {
// Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
break;
}
case OMPRTL__kmpc_push_proc_bind: {
// Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
// int proc_bind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
break;
}
case OMPRTL__kmpc_omp_task_with_deps: {
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
break;
}
case OMPRTL__kmpc_omp_wait_deps: {
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
// kmp_depend_info_t *noalias_dep_list);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int32Ty, CGM.VoidPtrTy,
CGM.Int32Ty, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
break;
}
case OMPRTL__kmpc_cancellationpoint: {
// Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
// global_tid, kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
break;
}
case OMPRTL__kmpc_cancel: {
// Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
break;
}
case OMPRTL__kmpc_push_num_teams: {
// Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
// kmp_int32 num_teams, kmp_int32 num_threads)
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
break;
}
case OMPRTL__kmpc_fork_teams: {
// Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
// microtask, ...);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
getKmpc_MicroPointerTy()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
break;
}
case OMPRTL__kmpc_taskloop: {
// Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
// if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
// sched, kmp_uint64 grainsize, void *task_dup);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.IntTy,
CGM.VoidPtrTy,
CGM.IntTy,
CGM.Int64Ty->getPointerTo(),
CGM.Int64Ty->getPointerTo(),
CGM.Int64Ty,
CGM.IntTy,
CGM.IntTy,
CGM.Int64Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
break;
}
case OMPRTL__kmpc_doacross_init: {
// Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
// num_dims, struct kmp_dim *dims);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
break;
}
case OMPRTL__kmpc_doacross_fini: {
// Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
break;
}
case OMPRTL__kmpc_doacross_post: {
// Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
break;
}
case OMPRTL__kmpc_doacross_wait: {
// Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
}
case OMPRTL__kmpc_task_reduction_init: {
// Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
// *data);
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn =
CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
break;
}
case OMPRTL__kmpc_task_reduction_get_th_data: {
// Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
case OMPRTL__tgt_target_teams: {
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo(),
CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
break;
}
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
break;
}
case OMPRTL__tgt_unregister_lib: {
// Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
QualType ParamTy =
CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
break;
}
case OMPRTL__tgt_target_data_begin: {
// Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
break;
}
case OMPRTL__tgt_target_data_end: {
// Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
break;
}
case OMPRTL__tgt_target_data_update: {
// Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
// void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
llvm::Type *TypeParams[] = {CGM.Int32Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
CGM.Int32Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
break;
}
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
}
llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy, // p_stride
ITy, // incr
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
: (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
CGM.Int32Ty, // schedtype
ITy, // lower
ITy, // upper
ITy, // stride
ITy // chunk
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
: (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
bool IVSigned) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
auto Name =
IVSize == 32
? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
: (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
getIdentTyPointerTy(), // loc
CGM.Int32Ty, // tid
llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
PtrTy, // p_lower
PtrTy, // p_upper
PtrTy // p_stride
};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
return CGM.CreateRuntimeFunction(FnTy, Name);
}
llvm::Constant *
CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
assert(!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported());
// Lookup the entry, lazily creating it if necessary.
return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
Twine(CGM.getMangledName(VD)) + ".cache.");
}
Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
const VarDecl *VD,
Address VDAddr,
SourceLocation Loc) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return VDAddr;
auto VarTy = VDAddr.getElementType();
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.Int8PtrTy),
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
getOrCreateThreadPrivateCache(VD)};
return Address(CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
VDAddr.getAlignment());
}
void CGOpenMPRuntime::emitThreadPrivateVarInit(
CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
// library.
auto OMPLoc = emitUpdateLocation(CGF, Loc);
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
OMPLoc);
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
// to register constructor/destructor for variable.
llvm::Value *Args[] = {OMPLoc,
CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
CGM.VoidPtrTy),
Ctor, CopyCtor, Dtor};
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
}
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
const VarDecl *VD, Address VDAddr, SourceLocation Loc,
bool PerformInit, CodeGenFunction *CGF) {
if (CGM.getLangOpts().OpenMPUseTLS &&
CGM.getContext().getTargetInfo().isTLSSupported())
return nullptr;
VD = VD->getDefinition(CGM.getContext());
if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
ThreadPrivateWithDefinition.insert(VD);
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
auto Init = VD->getAnyInitializer();
if (CGM.getLangOpts().CPlusPlus && PerformInit) {
// Generate function that re-emits the declaration's initializer into the
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidPtrTy, Args);
auto FTy = CGM.getTypes().GetFunctionType(FI);
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, ".__kmpc_global_ctor_.", FI, Loc);
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
Args, SourceLocation());
auto ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
Address Arg = Address(ArgVal, VDAddr.getAlignment());
Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
CtorCGF.ConvertTypeForMem(ASTTy));
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
/*IsInitializer=*/true);
ArgVal = CtorCGF.EmitLoadOfScalar(
CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
CGM.getContext().VoidPtrTy, Dst.getLocation());
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
CtorCGF.FinishFunction();
Ctor = Fn;
}
if (VD->getType().isDestructedType() != QualType::DK_none) {
// Generate function that emits destructor call for the threadprivate copy
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
CGM.getContext().VoidTy, Args);
auto FTy = CGM.getTypes().GetFunctionType(FI);
auto Fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, ".__kmpc_global_dtor_.", FI, Loc);
auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
SourceLocation());
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
auto ArgVal = DtorCGF.EmitLoadOfScalar(
DtorCGF.GetAddrOfLocalVar(&Dst),
/*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
DtorCGF.FinishFunction();
Dtor = Fn;
}
// Do not emit init function if it is not required.
if (!Ctor && !Dtor)
return nullptr;
llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
auto CopyCtorTy =
llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
/*isVarArg=*/false)->getPointerTo();
// Copying constructor for the threadprivate variable.
// Must be NULL - reserved by runtime, but currently it requires that this
// parameter is always NULL. Otherwise it fires assertion.
CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
if (Ctor == nullptr) {
auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
/*isVarArg=*/false)->getPointerTo();
Ctor = llvm::Constant::getNullValue(CtorTy);
}
if (Dtor == nullptr) {
auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
/*isVarArg=*/false)->getPointerTo();
Dtor = llvm::Constant::getNullValue(DtorTy);
}
if (!CGF) {
auto InitFunctionTy =
llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
InitFunctionTy, ".__omp_threadprivate_init_.",
CGM.getTypes().arrangeNullaryFunction());
CodeGenFunction InitCGF(CGM);
FunctionArgList ArgList;
InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
CGM.getTypes().arrangeNullaryFunction(), ArgList,
Loc);
emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
InitCGF.FinishFunction();
return InitFunction;
}
emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
}
return nullptr;
}
Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
QualType VarType,
StringRef Name) {
llvm::Twine VarName(Name, ".artificial.");
llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, SourceLocation()),
getThreadID(CGF, SourceLocation()),
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
/*IsSigned=*/false),
getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
return Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitRuntimeCall(
createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
VarLVType->getPointerTo(/*AddrSpace=*/0)),
CGM.getPointerAlign());
}
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
/// ThenGen();
/// } else {
/// ElseGen();
/// }
void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
const RegionCodeGenTy &ThenGen,
const RegionCodeGenTy &ElseGen) {
CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
bool CondConstant;
if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
if (CondConstant)
ThenGen(CGF);
else
ElseGen(CGF);
return;
}
// Otherwise, the condition did not fold, or we couldn't elide it. Just
// emit the conditional branch.
auto ThenBlock = CGF.createBasicBlock("omp_if.then");
auto ElseBlock = CGF.createBasicBlock("omp_if.else");
auto ContBlock = CGF.createBasicBlock("omp_if.end");
CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
// Emit the 'then' code.
CGF.EmitBlock(ThenBlock);
ThenGen(CGF);
CGF.EmitBranch(ContBlock);
// Emit the 'else' code if present.
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(ElseBlock);
ElseGen(CGF);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBranch(ContBlock);
// Emit the continuation block for code after the if.
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
}
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Value *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) {
if (!CGF.HaveInsertPoint())
return;
auto *RTLoc = emitUpdateLocation(CGF, Loc);
auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
PrePostActionTy &) {
// Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
auto &RT = CGF.CGM.getOpenMPRuntime();
llvm::Value *Args[] = {
RTLoc,
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
llvm::SmallVector<llvm::Value *, 16> RealArgs;
RealArgs.append(std::begin(Args), std::end(Args));
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
CGF.EmitRuntimeCall(RTLFn, RealArgs);
};
auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
auto ThreadID = RT.getThreadID(CGF, Loc);
// Build calls:
// __kmpc_serialized_parallel(&Loc, GTid);
llvm::Value *Args[] = {RTLoc, ThreadID};
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
// OutlinedFn(&GTid, &zero, CapturedStruct);
auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
Address ZeroAddr =
CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
/*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
EndArgs);
};
if (IfCond)
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
else {
RegionCodeGenTy ThenRCG(ThenGen);
ThenRCG(CGF);
}
}
// If we're inside an (outlined) parallel region, use the region info's
// thread-ID variable (it is passed in a first argument of the outlined function
// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
// regular serial code region, get thread ID by calling kmp_int32
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
// return the address of that temp.
Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
SourceLocation Loc) {
if (auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
if (OMPRegionInfo->getThreadIDVariable())
return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
auto ThreadID = getThreadID(CGF, Loc);
auto Int32Ty =
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
CGF.EmitStoreOfScalar(ThreadID,
CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
return ThreadIDTemp;
}
llvm::Constant *
CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
const llvm::Twine &Name) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
Out << Name;
auto RuntimeName = Out.str();
auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
if (Elem.second) {
assert(Elem.second->getType()->getPointerElementType() == Ty &&
"OMP internal variable has different type than requested");
return &*Elem.second;
}
return Elem.second = new llvm::GlobalVariable(
CGM.getModule(), Ty, /*IsConstant*/ false,
llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
Elem.first());
}
llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
llvm::Twine Name(".gomp_critical_user_", CriticalName);
return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
}
namespace {
/// Common pre(post)-action for different OpenMP constructs.
class CommonActionTy final : public PrePostActionTy {
llvm::Value *EnterCallee;
ArrayRef<llvm::Value *> EnterArgs;
llvm::Value *ExitCallee;
ArrayRef<llvm::Value *> ExitArgs;
bool Conditional;
llvm::BasicBlock *ContBlock = nullptr;
public:
CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
bool Conditional = false)
: EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
ExitArgs(ExitArgs), Conditional(Conditional) {}
void Enter(CodeGenFunction &CGF) override {
llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
if (Conditional) {
llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
ContBlock = CGF.createBasicBlock("omp_if.end");
// Generate the branch (If-stmt)
CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
CGF.EmitBlock(ThenBlock);
}
}