| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #ifndef UTILITY_H |
| #define UTILITY_H |
| |
| #include "harness/compat.h" |
| |
| #ifdef __APPLE__ |
| #include <OpenCL/opencl.h> |
| #else |
| #include <CL/opencl.h> |
| #endif |
| #include <stdio.h> |
| #include "harness/rounding_mode.h" |
| #include "harness/fpcontrol.h" |
| #include "harness/testHarness.h" |
| #include "harness/ThreadPool.h" |
| #include "harness/conversions.h" |
| |
| #define BUFFER_SIZE (1024 * 1024 * 2) |
| #define EMBEDDED_REDUCTION_FACTOR (64) |
| |
| #if defined(__GNUC__) |
| #define UNUSED __attribute__((unused)) |
| #else |
| #define UNUSED |
| #endif |
| |
| struct Func; |
| |
| extern int gWimpyBufferSize; |
| extern int gWimpyReductionFactor; |
| |
| #define VECTOR_SIZE_COUNT 6 |
| extern const char *sizeNames[VECTOR_SIZE_COUNT]; |
| extern const int sizeValues[VECTOR_SIZE_COUNT]; |
| |
| extern cl_device_id gDevice; |
| extern cl_context gContext; |
| extern cl_command_queue gQueue; |
| extern void *gIn; |
| extern void *gIn2; |
| extern void *gIn3; |
| extern void *gOut_Ref; |
| extern void *gOut_Ref2; |
| extern void *gOut[VECTOR_SIZE_COUNT]; |
| extern void *gOut2[VECTOR_SIZE_COUNT]; |
| extern cl_mem gInBuffer; |
| extern cl_mem gInBuffer2; |
| extern cl_mem gInBuffer3; |
| extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT]; |
| extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]; |
| extern uint32_t gComputeDevices; |
| extern uint32_t gSimdSize; |
| extern int gSkipCorrectnessTesting; |
| extern int gMeasureTimes; |
| extern int gReportAverageTimes; |
| extern int gForceFTZ; |
| extern int gFastRelaxedDerived; |
| extern int gWimpyMode; |
| extern int gHasDouble; |
| extern int gIsInRTZMode; |
| extern int gInfNanSupport; |
| extern int gIsEmbedded; |
| extern int gVerboseBruteForce; |
| extern uint32_t gMaxVectorSizeIndex; |
| extern uint32_t gMinVectorSizeIndex; |
| extern uint32_t gDeviceFrequency; |
| extern cl_device_fp_config gFloatCapabilities; |
| extern cl_device_fp_config gDoubleCapabilities; |
| |
| #define LOWER_IS_BETTER 0 |
| #define HIGHER_IS_BETTER 1 |
| |
| #include "harness/errorHelpers.h" |
| |
| #if defined(_MSC_VER) |
| // Deal with missing scalbn on windows |
| #define scalbnf(_a, _i) ldexpf(_a, _i) |
| #define scalbn(_a, _i) ldexp(_a, _i) |
| #define scalbnl(_a, _i) ldexpl(_a, _i) |
| #endif |
| |
| float Abs_Error(float test, double reference); |
| float Ulp_Error(float test, double reference); |
| float Bruteforce_Ulp_Error_Double(double test, long double reference); |
| |
| uint64_t GetTime(void); |
| double SubtractTime(uint64_t endTime, uint64_t startTime); |
| int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k, |
| cl_program *p, bool relaxedMode); |
| int MakeKernels(const char **c, cl_uint count, const char *name, |
| cl_uint kernel_count, cl_kernel *k, cl_program *p, |
| bool relaxedMode); |
| |
| // used to convert a bucket of bits into a search pattern through double |
| static inline double DoubleFromUInt32(uint32_t bits); |
| static inline double DoubleFromUInt32(uint32_t bits) |
| { |
| union { |
| uint64_t u; |
| double d; |
| } u; |
| |
| // split 0x89abcdef to 0x89abc00000000def |
| u.u = bits & 0xfffU; |
| u.u |= (uint64_t)(bits & ~0xfffU) << 32; |
| |
| // sign extend the leading bit of def segment as sign bit so that the middle |
| // region consists of either all 1s or 0s |
| u.u -= (bits & 0x800U) << 1; |
| |
| // return result |
| return u.d; |
| } |
| |
| void _LogBuildError(cl_program p, int line, const char *file); |
| #define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__) |
| |
| #define PERF_LOOP_COUNT 100 |
| |
| // The spec is fairly clear that we may enforce a hard cutoff to prevent |
| // premature flushing to zero. |
| // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + |
| // ulp_limit to be flushed to zero. |
| static inline int IsFloatResultSubnormal(double x, float ulps) |
| { |
| x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps; |
| return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); |
| } |
| |
| static inline int IsFloatResultSubnormalAbsError(double x, float abs_err) |
| { |
| x = x - abs_err; |
| return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126); |
| } |
| |
| static inline int IsDoubleResultSubnormal(long double x, float ulps) |
| { |
| x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps; |
| return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022); |
| } |
| |
| static inline int IsFloatInfinity(double x) |
| { |
| union { |
| cl_float d; |
| cl_uint u; |
| } u; |
| u.d = (cl_float)x; |
| return ((u.u & 0x7fffffffU) == 0x7F800000U); |
| } |
| |
| static inline int IsFloatMaxFloat(double x) |
| { |
| union { |
| cl_float d; |
| cl_uint u; |
| } u; |
| u.d = (cl_float)x; |
| return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU); |
| } |
| |
| static inline int IsFloatNaN(double x) |
| { |
| union { |
| cl_float d; |
| cl_uint u; |
| } u; |
| u.d = (cl_float)x; |
| return ((u.u & 0x7fffffffU) > 0x7F800000U); |
| } |
| |
| extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x); |
| |
| // Windows (since long double got deprecated) sets the x87 to 53-bit precision |
| // (that's x87 default state). This causes problems with the tests that |
| // convert long and ulong to float and double or otherwise deal with values |
| // that need more precision than 53-bit. So, set the x87 to 64-bit precision. |
| static inline void Force64BitFPUPrecision(void) |
| { |
| #if __MINGW32__ |
| // The usual method is to use _controlfp as follows: |
| // #include <float.h> |
| // _controlfp(_PC_64, _MCW_PC); |
| // |
| // _controlfp is available on MinGW32 but not on MinGW64. Instead of having |
| // divergent code just use inline assembly which works for both. |
| unsigned short int orig_cw = 0; |
| unsigned short int new_cw = 0; |
| __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); |
| new_cw = orig_cw | 0x0300; // set precision to 64-bit |
| __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); |
| #elif defined(_WIN32) && defined(__INTEL_COMPILER) |
| // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* |
| // work on win.x64: > On the x64 architecture, changing the floating point |
| // precision is not supported. (Taken from |
| // http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx) |
| int cw; |
| __asm { fnstcw cw } |
| ; // Get current value of FPU control word. |
| cw = cw & 0xfffffcff |
| | (3 << 8); // Set Precision Control to Double Extended Precision. |
| __asm { fldcw cw } |
| ; // Set new value of FPU control word. |
| #else |
| /* Implement for other platforms if needed */ |
| #endif |
| } |
| |
| extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); |
| |
| typedef union { |
| int32_t i; |
| float f; |
| } int32f_t; |
| |
| typedef union { |
| int64_t l; |
| double d; |
| } int64d_t; |
| |
| void MulD(double *rhi, double *rlo, double u, double v); |
| void AddD(double *rhi, double *rlo, double a, double b); |
| void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, |
| double yl); |
| void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, |
| double yl); |
| void DivideDD(double *chi, double *clo, double a, double b); |
| int compareFloats(float x, float y); |
| int compareDoubles(double x, double y); |
| |
| void logFunctionInfo(const char *fname, unsigned int float_size, |
| unsigned int isFastRelaxed); |
| |
| float getAllowedUlpError(const Func *f, const bool relaxed); |
| |
| static inline cl_uint getTestScale(size_t typeSize) |
| { |
| if (gWimpyMode) |
| { |
| return (cl_uint)typeSize * 2 * gWimpyReductionFactor; |
| } |
| else if (gIsEmbedded) |
| { |
| return EMBEDDED_REDUCTION_FACTOR; |
| } |
| else |
| { |
| return 1; |
| } |
| } |
| |
| static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize) |
| { |
| if (gWimpyMode) |
| { |
| return (1ULL << 32) * gWimpyReductionFactor / (512); |
| } |
| else if (gIsEmbedded) |
| { |
| return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR; |
| } |
| else |
| { |
| return bufferSize / typeSize; |
| } |
| } |
| |
| #endif /* UTILITY_H */ |