| /////////////////////////////////////////////////////////////////////////////// |
| // // |
| // DxilExpandTrigIntrinsics.cpp // |
| // Copyright (C) Microsoft Corporation. All rights reserved. // |
| // This file is distributed under the University of Illinois Open Source // |
| // License. See LICENSE.TXT for details. // |
| // // |
| // Expand trigonmetric intrinsics to a sequence of dxil instructions. // |
| // ========================================================================= // |
| // |
| // We provide expansions to approximate several trigonmetric functions that |
| // typically do not have native instructions in hardware. The details of each |
| // expansion is given below, but typically the exansion occurs in three steps |
| // |
| // 1. Perform range reduction (if necessary) to reduce input range |
| // to a value that works with the approximation. |
| // 2. Compute an approximation to the function (typically by evaluating |
| // a polynomial). |
| // 3. Perform range expansion (if necessary) to map the result back to |
| // the original range. |
| // |
| // For example, say we are expanding f(x) using an approximation to f, call it |
| // f*(x). And assume that f* only works for positive inputs, but we know that |
| // f(-x) = -f(x).Then the expansion would be |
| // |
| // 1. a = abs(x) |
| // 2. v = f*(a) |
| // 3. e = x < 0 ? -v : v |
| // |
| // where e contains the final expanded result. |
| // |
| // References |
| // --------------------------------------------------------------------------- |
| // [HMF] Handbook of Mathematical Formulas by Abramowitz and Stegun, 1964 |
| // [ADC] Approximations for Digital Computers by Hastings, 1955 |
| // [WIK] Wikipedia, 2017 |
| // |
| // The approximation functions mostly come from [ADC]. The approximations |
| // are also referenced in [HMF], but they give original credit to [ADC]. |
| // |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| #include "dxc/DXIL/DxilInstructions.h" |
| #include "dxc/DXIL/DxilModule.h" |
| #include "dxc/DXIL/DxilOperations.h" |
| #include "dxc/DXIL/DxilSignatureElement.h" |
| #include "dxc/HLSL/DxilGenerationPass.h" |
| #include "dxc/Support/Global.h" |
| |
| #include "llvm/ADT/MapVector.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/Pass.h" |
| |
| #include <cmath> |
| #include <utility> |
| |
| using namespace llvm; |
| using namespace hlsl; |
| |
| namespace { |
| class DxilExpandTrigIntrinsics : public FunctionPass { |
| private: |
| public: |
| static char ID; // Pass identification, replacement for typeid |
| explicit DxilExpandTrigIntrinsics() : FunctionPass(ID) {} |
| |
| StringRef getPassName() const override { |
| return "DXIL expand trig intrinsics"; |
| } |
| |
| bool runOnFunction(Function &F) override; |
| |
| private: |
| typedef std::vector<CallInst *> IntrinsicList; |
| IntrinsicList findTrigFunctionsToExpand(Function &F); |
| CallInst *isExpandableTrigIntrinsicCall(Instruction *I); |
| bool expandTrigIntrinsics(DxilModule &DM, const IntrinsicList &worklist); |
| FastMathFlags getFastMathFlagsForIntrinsic(CallInst *intrinsic); |
| void prepareBuilderToExpandIntrinsic(IRBuilder<> &builder, |
| CallInst *intrinsic); |
| |
| // Expansion implementations. |
| Value *expandACos(IRBuilder<> &builder, DxilInst_Acos acos, DxilModule &DM); |
| Value *expandASin(IRBuilder<> &builder, DxilInst_Asin asin, DxilModule &DM); |
| Value *expandATan(IRBuilder<> &builder, DxilInst_Atan atan, DxilModule &DM); |
| Value *expandHCos(IRBuilder<> &builder, DxilInst_Hcos hcos, DxilModule &DM); |
| Value *expandHSin(IRBuilder<> &builder, DxilInst_Hsin hsin, DxilModule &DM); |
| Value *expandHTan(IRBuilder<> &builder, DxilInst_Htan htan, DxilModule &DM); |
| Value *expandTan(IRBuilder<> &builder, DxilInst_Tan tan, DxilModule &DM); |
| }; |
| |
| // Math constants. |
| // Values taken from https://msdn.microsoft.com/en-us/library/4hwaceh6.aspx. |
| // Replicated here because they are not part of standard C++. |
| namespace math { |
| constexpr double PI = 3.14159265358979323846; |
| constexpr double PI_2 = 1.57079632679489661923; |
| constexpr double LOG2E = 1.44269504088896340736; |
| } // namespace math |
| |
| } // namespace |
| |
| bool DxilExpandTrigIntrinsics::runOnFunction(Function &F) { |
| DxilModule &DM = F.getParent()->GetOrCreateDxilModule(); |
| IntrinsicList intrinsics = findTrigFunctionsToExpand(F); |
| const bool changed = expandTrigIntrinsics(DM, intrinsics); |
| return changed; |
| } |
| |
| CallInst * |
| DxilExpandTrigIntrinsics::isExpandableTrigIntrinsicCall(Instruction *I) { |
| if (OP::IsDxilOpFuncCallInst(I)) { |
| switch (OP::GetDxilOpFuncCallInst(I)) { |
| case OP::OpCode::Acos: |
| case OP::OpCode::Asin: |
| case OP::OpCode::Atan: |
| case OP::OpCode::Hcos: |
| case OP::OpCode::Hsin: |
| case OP::OpCode::Htan: |
| case OP::OpCode::Tan: |
| return cast<CallInst>(I); |
| default: |
| break; |
| } |
| } |
| return nullptr; |
| } |
| |
| DxilExpandTrigIntrinsics::IntrinsicList |
| DxilExpandTrigIntrinsics::findTrigFunctionsToExpand(Function &F) { |
| IntrinsicList worklist; |
| for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) |
| if (CallInst *call = isExpandableTrigIntrinsicCall(&*I)) |
| worklist.push_back(call); |
| |
| return worklist; |
| } |
| |
| static bool isPreciseBuilder(IRBuilder<> &builder) { |
| return !builder.getFastMathFlags().any(); |
| } |
| |
| static void setPreciseBuilder(IRBuilder<> &builder, bool precise) { |
| FastMathFlags flags; |
| if (precise) |
| flags.clear(); |
| else |
| flags.setUnsafeAlgebra(); |
| builder.SetFastMathFlags(flags); |
| } |
| |
| void DxilExpandTrigIntrinsics::prepareBuilderToExpandIntrinsic( |
| IRBuilder<> &builder, CallInst *intrinsic) { |
| DxilModule &DM = intrinsic->getModule()->GetOrCreateDxilModule(); |
| builder.SetInsertPoint(intrinsic); |
| setPreciseBuilder(builder, DM.IsPrecise(intrinsic)); |
| } |
| |
| bool DxilExpandTrigIntrinsics::expandTrigIntrinsics( |
| DxilModule &DM, const IntrinsicList &worklist) { |
| IRBuilder<> builder(DM.GetCtx()); |
| for (CallInst *intrinsic : worklist) { |
| Value *expansion = nullptr; |
| prepareBuilderToExpandIntrinsic(builder, intrinsic); |
| |
| OP::OpCode opcode = OP::GetDxilOpFuncCallInst(intrinsic); |
| switch (opcode) { |
| case OP::OpCode::Acos: |
| expansion = expandACos(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Asin: |
| expansion = expandASin(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Atan: |
| expansion = expandATan(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Hcos: |
| expansion = expandHCos(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Hsin: |
| expansion = expandHSin(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Htan: |
| expansion = expandHTan(builder, intrinsic, DM); |
| break; |
| case OP::OpCode::Tan: |
| expansion = expandTan(builder, intrinsic, DM); |
| break; |
| default: |
| assert(false && "unexpected intrinsic"); |
| break; |
| } |
| |
| assert(expansion); |
| intrinsic->replaceAllUsesWith(expansion); |
| intrinsic->eraseFromParent(); |
| } |
| |
| return !worklist.empty(); |
| } |
| |
| // Helper |
| // return dx.op.UnaryFloat(X) |
| // |
| static Value *emitUnaryFloat(IRBuilder<> &builder, Value *X, OP *dxOp, |
| OP::OpCode opcode, StringRef name) { |
| Function *F = dxOp->GetOpFunc(opcode, X->getType()); |
| Value *Args[] = {dxOp->GetI32Const(static_cast<int>(opcode)), X}; |
| CallInst *Call = builder.CreateCall(F, Args, name); |
| |
| if (isPreciseBuilder(builder)) |
| DxilMDHelper::MarkPrecise(Call); |
| return Call; |
| } |
| |
| // Helper |
| // return dx.op.Fabs(X) |
| // |
| static Value *emitFAbs(IRBuilder<> &builder, Value *X, OP *dxOp, |
| StringRef name) { |
| return emitUnaryFloat(builder, X, dxOp, OP::OpCode::FAbs, name); |
| } |
| |
| // Helper |
| // return dx.op.Sqrt(X) |
| // |
| static Value *emitSqrt(IRBuilder<> &builder, Value *X, OP *dxOp, |
| StringRef name) { |
| return emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sqrt, name); |
| } |
| |
| // Helper |
| // return sqrt(1 - X) * psi*(X) |
| // |
| // We compute the polynomial using Horners method to evaluate it efficently. |
| // |
| // psi*(X) = a0 + a1x + a2x^2 + a3x^3 |
| // = a0 + x(a1 + a2x + a3x^2) |
| // = a0 + x(a1 + x(a2 + a3x)) |
| // |
| static Value *emitSqrt1mXtimesPsiX(IRBuilder<> &builder, Value *X, OP *dxOp, |
| StringRef name) { |
| Value *One = ConstantFP::get(X->getType(), 1.0); |
| Value *a0 = ConstantFP::get(X->getType(), 1.5707288); |
| Value *a1 = ConstantFP::get(X->getType(), -0.2121144); |
| Value *a2 = ConstantFP::get(X->getType(), 0.0742610); |
| Value *a3 = ConstantFP::get(X->getType(), -0.0187293); |
| |
| // sqrt(1-x) |
| Value *r1 = builder.CreateFSub(One, X, name); |
| Value *r2 = emitSqrt(builder, r1, dxOp, name); |
| |
| // psi*(x) |
| Value *r3 = builder.CreateFMul(X, a3, name); |
| r3 = builder.CreateFAdd(r3, a2, name); |
| r3 = builder.CreateFMul(X, r3, name); |
| r3 = builder.CreateFAdd(r3, a1, name); |
| r3 = builder.CreateFMul(X, r3, name); |
| r3 = builder.CreateFAdd(r3, a0, name); |
| |
| // sqrt(1-x) * psi*(x) |
| Value *r4 = builder.CreateFMul(r2, r3, name); |
| return r4; |
| } |
| |
| // Helper |
| // return e^x, e^-x |
| // |
| // We can use the dxil Exp function to compute the exponential. The only slight |
| // wrinkle is that in dxil Exp(x) = 2^x and we need e^x. Luckily we can easily |
| // change the base of the exponent using the following identity [HFM(p69)] |
| // |
| // e^x = 2^{x * log_2(e)} |
| // |
| static std::pair<Value *, Value *> emitExEmx(IRBuilder<> &builder, Value *X, |
| OP *dxOp, StringRef name) { |
| Value *Zero = ConstantFP::get(X->getType(), 0.0); |
| Value *Log2e = ConstantFP::get(X->getType(), math::LOG2E); |
| |
| Value *r0 = builder.CreateFMul(X, Log2e, name); |
| Value *r1 = emitUnaryFloat(builder, r0, dxOp, OP::OpCode::Exp, name); |
| Value *r2 = builder.CreateFSub(Zero, r0, name); |
| Value *r3 = emitUnaryFloat(builder, r2, dxOp, OP::OpCode::Exp, name); |
| |
| return std::make_pair(r1, r3); |
| } |
| |
| // Asin |
| // ---------------------------------------------------------------------------- |
| // Function |
| // arcsin X = pi/2 - sqrt(1 - X) * psi(X) |
| // |
| // Range |
| // 0 <= X <= 1 |
| // |
| // Approximation |
| // Psi*(X) = a0 + a1x + a2x^2 + a3x^3 |
| // a0 = 1.5707288 |
| // a1 = -0.2121144 |
| // a2 = 0.0742610 |
| // a3 = -0.0187293 |
| // |
| // The domain of the approximation is 0 <=x <= 1, but the domain of asin is |
| // -1 <= x <= 1. So we need to perform a range reduction to [0,1] before |
| // computing the approximation. |
| // |
| // We use the following identity from [HMF(p80),WIK] for range reduction |
| // |
| // asin(-x) = -asin(x) |
| // |
| // We take the absolute value of x, compute asin(x) using the approximation |
| // and then negate the value if x < 0. |
| // |
| // In [HMF] the authors claim an error, e, of |e| <= 5e-5, but the error graph |
| // in [ADC] looks like the error can be larger that that for some inputs. |
| // |
| Value *DxilExpandTrigIntrinsics::expandASin(IRBuilder<> &builder, |
| DxilInst_Asin asin, |
| DxilModule &DM) { |
| assert(asin); |
| StringRef name = "asin.x"; |
| Value *X = asin.get_value(); |
| Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2); |
| Value *Zero = ConstantFP::get(X->getType(), 0.0); |
| |
| // Range reduction to [0, 1] |
| Value *absX = emitFAbs(builder, X, DM.GetOP(), name); |
| |
| // Approximation |
| Value *psiX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name); |
| Value *asinX = builder.CreateFSub(PI_2, psiX, name); |
| Value *asinmX = builder.CreateFSub(Zero, asinX, name); |
| |
| // Range expansion to [-1, 1] |
| Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name); |
| Value *r = builder.CreateSelect(lt0, asinmX, asinX, name); |
| |
| return r; |
| } |
| |
| // Acos |
| // ---------------------------------------------------------------------------- |
| // The acos expansion uses the following identity [WIK]. So that we can use the |
| // same approximation psi*(x) that we use for asin. |
| // |
| // acos(x) = pi/2 - asin(x) |
| // |
| // Substituting the equation for asin(x) we get |
| // |
| // acos(x) = pi/2 - asin(x) |
| // = pi/2 - (pi/2 - sqrt(1-x)*psi(x)) |
| // = sqrt(1-x)*psi(x) |
| // |
| // We use the following identity from [HMF(p80),WIK] for range reduction |
| // |
| // acos(-x) = pi - acos(x) |
| // = pi - sqrt(1-x)*psi(x) |
| // |
| // We take the absolute value of x, compute acos(x) using the approximation |
| // and then subtract from pi if x < 0. |
| // |
| Value *DxilExpandTrigIntrinsics::expandACos(IRBuilder<> &builder, |
| DxilInst_Acos acos, |
| DxilModule &DM) { |
| assert(acos); |
| StringRef name = "acos.x"; |
| Value *X = acos.get_value(); |
| Value *PI = ConstantFP::get(X->getType(), math::PI); |
| Value *Zero = ConstantFP::get(X->getType(), 0.0); |
| |
| // Range reduction to [0, 1] |
| Value *absX = emitFAbs(builder, X, DM.GetOP(), name); |
| |
| // Approximation |
| Value *acosX = emitSqrt1mXtimesPsiX(builder, absX, DM.GetOP(), name); |
| Value *acosmX = builder.CreateFSub(PI, acosX, name); |
| |
| // Range expansion to [-1, 1] |
| Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name); |
| Value *r = builder.CreateSelect(lt0, acosmX, acosX, name); |
| |
| return r; |
| } |
| |
| // Atan |
| // ---------------------------------------------------------------------------- |
| // Function |
| // arctan X |
| // |
| // Range |
| // -1 <= X <= 1 |
| // |
| // Approximation |
| // arctan*(x) = c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9 |
| // c1 = 0.9998660 |
| // c3 = -0.3302995 |
| // c5 = 0.1801410 |
| // c7 = -0.0851330 |
| // c9 = 0.0208351 |
| // |
| // The polynomial is evaluated using Horner's method to efficiently compute the |
| // value |
| // |
| // c1x + c3x^3 + c5x^5 + c7x^7 + c9x^9 |
| // = x(c1 + c3x^2 + c5x^4 + c7x^6 + c9x^8) |
| // = x(c1 + x^2(c3 + c5x^2 + c7x^4 + c9x^6)) |
| // = x(c1 + x^2(c3 + x^2(c5 + c7x^2 + c9x^4))) |
| // = x(c1 + x^2(c3 + x^2(c5 + x^2(c7 + c9x^2)))) |
| // |
| // The range reduction is a little more compilicated for atan because the |
| // domain of atan is [-inf, inf], but the domain of the approximation is only |
| // [-1, 1]. We use the following identities for range reduction from |
| // [HMF(p80),WIK] |
| // |
| // arctan(-x) = -arctan(x) |
| // arctan(x) = pi/2 - arctan(1/x) if x > 0 |
| // |
| // The first identity allows us to only work with positive numbers. The second |
| // identity allows us to reduce the range to [0,1]. We first convert the value |
| // to positive by taking abs(x). Then if x > 1 we compute arctan(1/x). |
| // |
| // To expand the range we check if x > 1 then subtracted the computed value from |
| // pi/2 and if x is negative then negate the final value. |
| // |
| Value *DxilExpandTrigIntrinsics::expandATan(IRBuilder<> &builder, |
| DxilInst_Atan atan, |
| DxilModule &DM) { |
| assert(atan); |
| StringRef name = "atan.x"; |
| Value *X = atan.get_value(); |
| Value *PI_2 = ConstantFP::get(X->getType(), math::PI_2); |
| Value *One = ConstantFP::get(X->getType(), 1.0); |
| Value *Zero = ConstantFP::get(X->getType(), 0.0); |
| Value *c1 = ConstantFP::get(X->getType(), 0.9998660); |
| Value *c3 = ConstantFP::get(X->getType(), -0.3302995); |
| Value *c5 = ConstantFP::get(X->getType(), 0.1801410); |
| Value *c7 = ConstantFP::get(X->getType(), -0.0851330); |
| Value *c9 = ConstantFP::get(X->getType(), 0.0208351); |
| |
| // Range reduction to [0, inf] |
| Value *absX = emitFAbs(builder, X, DM.GetOP(), name); |
| |
| // Range reduction to [0, 1] |
| Value *gt1 = builder.CreateFCmp(CmpInst::FCMP_UGT, absX, One, name); |
| Value *r1 = builder.CreateFDiv(One, absX, name); |
| Value *r2 = builder.CreateSelect(gt1, r1, absX, name); |
| |
| // Approximate |
| Value *r3 = builder.CreateFMul(r2, r2, name); |
| Value *r4 = builder.CreateFMul(r3, c9, name); |
| r4 = builder.CreateFAdd(r4, c7, name); |
| r4 = builder.CreateFMul(r4, r3, name); |
| r4 = builder.CreateFAdd(r4, c5, name); |
| r4 = builder.CreateFMul(r4, r3, name); |
| r4 = builder.CreateFAdd(r4, c3, name); |
| r4 = builder.CreateFMul(r4, r3, name); |
| r4 = builder.CreateFAdd(r4, c1, name); |
| r4 = builder.CreateFMul(r2, r4, name); |
| |
| // Range Expansion to [0, inf] |
| Value *r5 = builder.CreateFSub(PI_2, r4, name); |
| Value *r6 = builder.CreateSelect(gt1, r5, r4, name); |
| |
| // Range Expansion to [-inf, inf] |
| Value *r7 = builder.CreateFSub(Zero, r6, name); |
| Value *lt0 = builder.CreateFCmp(CmpInst::FCMP_ULT, X, Zero, name); |
| Value *r = builder.CreateSelect(lt0, r7, r6, name); |
| |
| return r; |
| } |
| |
| // Hcos |
| // ---------------------------------------------------------------------------- |
| // We use the following identity for computing hcos(x) from [HMF(p83)] |
| // |
| // cosh(x) = (e^x + e^-x) / 2 |
| // |
| // No range reduction is needed. |
| // |
| Value *DxilExpandTrigIntrinsics::expandHCos(IRBuilder<> &builder, |
| DxilInst_Hcos hcos, |
| DxilModule &DM) { |
| assert(hcos); |
| StringRef name = "hcos.x"; |
| Value *eX, *emX; |
| Value *X = hcos.get_value(); |
| Value *Two = ConstantFP::get(X->getType(), 2.0); |
| |
| std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name); |
| Value *r4 = builder.CreateFAdd(eX, emX, name); |
| Value *r = builder.CreateFDiv(r4, Two, name); |
| |
| return r; |
| } |
| |
| // Hsin |
| // ---------------------------------------------------------------------------- |
| // We use the following identity for computing hsin(x) from[HMF(p83)] |
| // |
| // sinh(x) = (e^x - e^-x) / 2 |
| // |
| // No range reduction is needed. |
| // |
| Value *DxilExpandTrigIntrinsics::expandHSin(IRBuilder<> &builder, |
| DxilInst_Hsin hsin, |
| DxilModule &DM) { |
| assert(hsin); |
| StringRef name = "hsin.x"; |
| Value *eX, *emX; |
| Value *X = hsin.get_value(); |
| Value *Two = ConstantFP::get(X->getType(), 2.0); |
| |
| std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name); |
| Value *r4 = builder.CreateFSub(eX, emX, name); |
| Value *r = builder.CreateFDiv(r4, Two, name); |
| |
| return r; |
| } |
| |
| // Htan |
| // ---------------------------------------------------------------------------- |
| // We use the following identity for computing hsin(x) from[HMF(p83)] |
| // |
| // tanh(x) = (e^x - e^-x) / (e^x + e^-x) |
| // |
| // No range reduction is needed. |
| // |
| Value *DxilExpandTrigIntrinsics::expandHTan(IRBuilder<> &builder, |
| DxilInst_Htan htan, |
| DxilModule &DM) { |
| assert(htan); |
| StringRef name = "htan.x"; |
| Value *eX, *emX; |
| Value *X = htan.get_value(); |
| |
| std::tie(eX, emX) = emitExEmx(builder, X, DM.GetOP(), name); |
| Value *r4 = builder.CreateFSub(eX, emX, name); |
| Value *r5 = builder.CreateFAdd(eX, emX, name); |
| Value *r = builder.CreateFDiv(r4, r5, name); |
| |
| return r; |
| } |
| |
| // Tan |
| // ---------------------------------------------------------------------------- |
| // We use the following identity for computing tan(x) |
| // |
| // tan(x) = sin(x) / cos(x) |
| // |
| // No range reduction is needed. |
| // |
| Value *DxilExpandTrigIntrinsics::expandTan(IRBuilder<> &builder, |
| DxilInst_Tan tan, DxilModule &DM) { |
| assert(tan); |
| StringRef name = "tan.x"; |
| Value *X = tan.get_value(); |
| OP *dxOp = DM.GetOP(); |
| Value *sin = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Sin, name); |
| Value *cos = emitUnaryFloat(builder, X, dxOp, OP::OpCode::Cos, name); |
| Value *r = builder.CreateFDiv(sin, cos, name); |
| |
| return r; |
| } |
| |
| char DxilExpandTrigIntrinsics::ID = 0; |
| |
| FunctionPass *llvm::createDxilExpandTrigIntrinsicsPass() { |
| return new DxilExpandTrigIntrinsics(); |
| } |
| |
| INITIALIZE_PASS(DxilExpandTrigIntrinsics, "hlsl-dxil-expand-trig-intrinsics", |
| "DXIL expand trig intrinsics", false, false) |