| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "Utility.h" |
| #include "FunctionList.h" |
| |
| #if defined(__PPC__) |
| // Global varaiable used to hold the FPU control register state. The FPSCR register can not |
| // be used because not all Power implementations retain or observed the NI (non-IEEE |
| // mode) bit. |
| __thread fpu_control_t fpu_control = 0; |
| #endif |
| |
| void MulD(double *rhi, double *rlo, double u, double v) |
| { |
| const double c = 134217729.0; // 1+2^27 |
| double up, u1, u2, vp, v1, v2; |
| |
| up = u*c; |
| u1 = (u - up) + up; |
| u2 = u - u1; |
| |
| vp = v*c; |
| v1 = (v - vp) + vp; |
| v2 = v - v1; |
| |
| double rh = u*v; |
| double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2); |
| |
| *rhi = rh; |
| *rlo = rl; |
| } |
| |
| void AddD(double *rhi, double *rlo, double a, double b) |
| { |
| double zhi, zlo; |
| zhi = a + b; |
| if(fabs(a) > fabs(b)) { |
| zlo = zhi - a; |
| zlo = b - zlo; |
| } |
| else { |
| zlo = zhi - b; |
| zlo = a - zlo; |
| } |
| |
| *rhi = zhi; |
| *rlo = zlo; |
| } |
| |
| void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) |
| { |
| double mh, ml; |
| double c = 134217729.0; |
| double up, u1, u2, vp, v1, v2; |
| |
| up = xh*c; |
| u1 = (xh - up) + up; |
| u2 = xh - u1; |
| |
| vp = yh*c; |
| v1 = (yh - vp) + vp; |
| v2 = yh - v1; |
| |
| mh = xh*yh; |
| ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2); |
| ml += xh*yl + xl*yh; |
| |
| *rhi = mh + ml; |
| *rlo = (mh - (*rhi)) + ml; |
| } |
| |
| void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl) |
| { |
| double r, s; |
| r = xh + yh; |
| s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl); |
| *rhi = r + s; |
| *rlo = (r - (*rhi)) + s; |
| } |
| |
| void DivideDD(double *chi, double *clo, double a, double b) |
| { |
| *chi = a / b; |
| double rhi, rlo; |
| MulD(&rhi, &rlo, *chi, b); |
| AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0); |
| *clo = rhi / b; |
| } |
| |
| // These functions comapre two floats/doubles. Since some platforms may choose to |
| // flush denormals to zeros before comparison, comparison like a < b may give wrong |
| // result in "certain cases" where we do need correct compasion result when operands |
| // are denormals .... these functions comapre floats/doubles using signed integer/long int |
| // rep. In other cases, when flushing to zeros is fine, these should not be used. |
| // Also these doesn't check for nans and assume nans are handled separately as special edge case |
| // by the caller which calls these functions |
| // return 0 if both are equal, 1 if x > y and -1 if x < y. |
| |
| inline |
| int compareFloats(float x, float y) |
| { |
| int32f_t a, b; |
| |
| a.f = x; |
| b.f = y; |
| |
| if( a.i & 0x80000000 ) |
| a.i = 0x80000000 - a.i; |
| if( b.i & 0x80000000 ) |
| b.i = 0x80000000 - b.i; |
| |
| if( a.i == b.i ) |
| return 0; |
| |
| return a.i < b.i ? -1 : 1; |
| } |
| |
| inline |
| int compareDoubles(double x, double y) |
| { |
| int64d_t a, b; |
| |
| a.d = x; |
| b.d = y; |
| |
| if( a.l & 0x8000000000000000LL ) |
| a.l = 0x8000000000000000LL - a.l; |
| if( b.l & 0x8000000000000000LL ) |
| b.l = 0x8000000000000000LL - b.l; |
| |
| if( a.l == b.l ) |
| return 0; |
| |
| return a.l < b.l ? -1 : 1; |
| } |
| |
| void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed) |
| { |
| char const *fpSizeStr = NULL; |
| char const *fpFastRelaxedStr = ""; |
| switch (float_size) { |
| case sizeof(cl_double): |
| fpSizeStr = "fp64"; |
| break; |
| case sizeof(cl_float): |
| fpSizeStr = "fp32"; |
| break; |
| case sizeof(cl_half): |
| fpSizeStr = "fp16"; |
| break; |
| } |
| if (isFastRelaxed) { |
| fpFastRelaxedStr = "rlx"; |
| } |
| vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr); |
| } |
| |
| float getAllowedUlpError(const Func *f, const bool relaxed) |
| { |
| float ulp; |
| |
| if (relaxed) |
| { |
| if (gIsEmbedded) |
| { |
| ulp = f->relaxed_embedded_error; |
| } |
| else |
| { |
| ulp = f->relaxed_error; |
| } |
| } |
| else |
| { |
| if (gIsEmbedded) |
| { |
| ulp = f->float_embedded_ulps; |
| } |
| else |
| { |
| ulp = f->float_ulps; |
| } |
| } |
| |
| return ulp; |
| } |