| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "compat.h" |
| |
| #if defined(_MSC_VER) |
| |
| #include <limits.h> |
| #include <stdlib.h> |
| |
| #include <CL/cl.h> |
| |
| #include <windows.h> |
| |
| #if _MSC_VER < 1900 && !defined(__INTEL_COMPILER) |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // rint, rintf |
| // |
| /////////////////////////////////////////////////////////////////// |
| |
| float copysignf(float x, float y) |
| { |
| union { |
| cl_uint u; |
| float f; |
| } ux, uy; |
| |
| ux.f = x; |
| uy.f = y; |
| |
| ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U); |
| |
| return ux.f; |
| } |
| |
| double copysign(double x, double y) |
| { |
| union { |
| cl_ulong u; |
| double f; |
| } ux, uy; |
| |
| ux.f = x; |
| uy.f = y; |
| |
| ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL); |
| |
| return ux.f; |
| } |
| |
| long double copysignl(long double x, long double y) |
| { |
| union { |
| long double f; |
| struct |
| { |
| cl_ulong m; |
| cl_ushort sexp; |
| } u; |
| } ux, uy; |
| |
| ux.f = x; |
| uy.f = y; |
| |
| ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000); |
| |
| return ux.f; |
| } |
| |
| float rintf(float x) |
| { |
| float absx = fabsf(x); |
| |
| if (absx < 8388608.0f /* 0x1.0p23f */) |
| { |
| float magic = copysignf(8388608.0f /* 0x1.0p23f */, x); |
| float rounded = x + magic; |
| rounded -= magic; |
| x = copysignf(rounded, x); |
| } |
| |
| return x; |
| } |
| |
| double rint(double x) |
| { |
| double absx = fabs(x); |
| |
| if (absx < 4503599627370496.0 /* 0x1.0p52f */) |
| { |
| double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x); |
| double rounded = x + magic; |
| rounded -= magic; |
| x = copysign(rounded, x); |
| } |
| |
| return x; |
| } |
| |
| long double rintl(long double x) |
| { |
| double absx = fabs(x); |
| |
| if (absx < 9223372036854775808.0L /* 0x1.0p64f */) |
| { |
| long double magic = |
| copysignl(9223372036854775808.0L /* 0x1.0p63L */, x); |
| long double rounded = x + magic; |
| rounded -= magic; |
| x = copysignl(rounded, x); |
| } |
| |
| return x; |
| } |
| |
| #if _MSC_VER < 1800 |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // ilogb, ilogbf, ilogbl |
| // |
| /////////////////////////////////////////////////////////////////// |
| #ifndef FP_ILOGB0 |
| #define FP_ILOGB0 INT_MIN |
| #endif |
| |
| #ifndef FP_ILOGBNAN |
| #define FP_ILOGBNAN INT_MIN |
| #endif |
| |
| int ilogb(double x) |
| { |
| union { |
| double f; |
| cl_ulong u; |
| } u; |
| u.f = x; |
| |
| cl_ulong absx = u.u & CL_LONG_MAX; |
| if (absx - 0x0001000000000000ULL |
| >= 0x7ff0000000000000ULL - 0x0001000000000000ULL) |
| { |
| switch (absx) |
| { |
| case 0: return FP_ILOGB0; |
| case 0x7ff0000000000000ULL: return INT_MAX; |
| default: |
| if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN; |
| |
| // subnormal |
| u.u = absx | 0x3ff0000000000000ULL; |
| u.f -= 1.0; |
| return (u.u >> 52) - (1023 + 1022); |
| } |
| } |
| |
| return (absx >> 52) - 1023; |
| } |
| |
| |
| int ilogbf(float x) |
| { |
| union { |
| float f; |
| cl_uint u; |
| } u; |
| u.f = x; |
| |
| cl_uint absx = u.u & 0x7fffffff; |
| if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U) |
| { |
| switch (absx) |
| { |
| case 0: return FP_ILOGB0; |
| case 0x7f800000U: return INT_MAX; |
| default: |
| if (absx > 0x7f800000) return FP_ILOGBNAN; |
| |
| // subnormal |
| u.u = absx | 0x3f800000U; |
| u.f -= 1.0f; |
| return (u.u >> 23) - (127 + 126); |
| } |
| } |
| |
| return (absx >> 23) - 127; |
| } |
| |
| int ilogbl(long double x) |
| { |
| union { |
| long double f; |
| struct |
| { |
| cl_ulong m; |
| cl_ushort sexp; |
| } u; |
| } u; |
| u.f = x; |
| |
| int exp = u.u.sexp & 0x7fff; |
| if (0 == exp) |
| { |
| if (0 == u.u.m) return FP_ILOGB0; |
| |
| // subnormal |
| u.u.sexp = 0x3fff; |
| u.f -= 1.0f; |
| exp = u.u.sexp & 0x7fff; |
| |
| return exp - (0x3fff + 0x3ffe); |
| } |
| else if (0x7fff == exp) |
| { |
| if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN; |
| |
| return INT_MAX; |
| } |
| |
| return exp - 0x3fff; |
| } |
| |
| #endif // _MSC_VER < 1800 |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // fmax, fmin, fmaxf, fminf |
| // |
| /////////////////////////////////////////////////////////////////// |
| |
| static void GET_BITS_SP32(float fx, unsigned int* ux) |
| { |
| volatile union { |
| float f; |
| unsigned int u; |
| } _bitsy; |
| _bitsy.f = (fx); |
| *ux = _bitsy.u; |
| } |
| /* static void GET_BITS_SP32(float fx, unsigned int* ux) */ |
| /* { */ |
| /* volatile union {float f; unsigned int i;} _bitsy; */ |
| /* _bitsy.f = (fx); */ |
| /* *ux = _bitsy.i; */ |
| /* } */ |
| static void PUT_BITS_SP32(unsigned int ux, float* fx) |
| { |
| volatile union { |
| float f; |
| unsigned int u; |
| } _bitsy; |
| _bitsy.u = (ux); |
| *fx = _bitsy.f; |
| } |
| /* static void PUT_BITS_SP32(unsigned int ux, float* fx) */ |
| /* { */ |
| /* volatile union {float f; unsigned int i;} _bitsy; */ |
| /* _bitsy.i = (ux); */ |
| /* *fx = _bitsy.f; */ |
| /* } */ |
| static void GET_BITS_DP64(double dx, unsigned __int64* lx) |
| { |
| volatile union { |
| double d; |
| unsigned __int64 l; |
| } _bitsy; |
| _bitsy.d = (dx); |
| *lx = _bitsy.l; |
| } |
| static void PUT_BITS_DP64(unsigned __int64 lx, double* dx) |
| { |
| volatile union { |
| double d; |
| unsigned __int64 l; |
| } _bitsy; |
| _bitsy.l = (lx); |
| *dx = _bitsy.d; |
| } |
| |
| #if 0 |
| int SIGNBIT_DP64(double x ) |
| { |
| int hx; |
| _GET_HIGH_WORD(hx,x); |
| return((hx>>31)); |
| } |
| #endif |
| |
| #if _MSC_VER < 1900 |
| |
| /* fmax(x, y) returns the larger (more positive) of x and y. |
| NaNs are treated as missing values: if one argument is NaN, |
| the other argument is returned. If both arguments are NaN, |
| the first argument is returned. */ |
| |
| /* This works so long as the compiler knows that (x != x) means |
| that x is NaN; gcc does. */ |
| double fmax(double x, double y) |
| { |
| if (isnan(y)) return x; |
| |
| return x >= y ? x : y; |
| } |
| |
| |
| /* fmin(x, y) returns the smaller (more negative) of x and y. |
| NaNs are treated as missing values: if one argument is NaN, |
| the other argument is returned. If both arguments are NaN, |
| the first argument is returned. */ |
| |
| double fmin(double x, double y) |
| { |
| if (isnan(y)) return x; |
| |
| return x <= y ? x : y; |
| } |
| |
| |
| float fmaxf(float x, float y) |
| { |
| if (isnan(y)) return x; |
| |
| return x >= y ? x : y; |
| } |
| |
| /* fminf(x, y) returns the smaller (more negative) of x and y. |
| NaNs are treated as missing values: if one argument is NaN, |
| the other argument is returned. If both arguments are NaN, |
| the first argument is returned. */ |
| |
| float fminf(float x, float y) |
| { |
| if (isnan(y)) return x; |
| |
| return x <= y ? x : y; |
| } |
| |
| long double scalblnl(long double x, long n) |
| { |
| union { |
| long double d; |
| struct |
| { |
| cl_ulong m; |
| cl_ushort sexp; |
| } u; |
| } u; |
| u.u.m = CL_LONG_MIN; |
| |
| if (x == 0.0L || n < -2200) return copysignl(0.0L, x); |
| |
| if (n > 2200) return INFINITY; |
| |
| if (n < 0) |
| { |
| u.u.sexp = 0x3fff - 1022; |
| while (n <= -1022) |
| { |
| x *= u.d; |
| n += 1022; |
| } |
| u.u.sexp = 0x3fff + n; |
| x *= u.d; |
| return x; |
| } |
| |
| if (n > 0) |
| { |
| u.u.sexp = 0x3fff + 1023; |
| while (n >= 1023) |
| { |
| x *= u.d; |
| n -= 1023; |
| } |
| u.u.sexp = 0x3fff + n; |
| x *= u.d; |
| return x; |
| } |
| |
| return x; |
| } |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // log2 |
| // |
| /////////////////////////////////////////////////////////////////// |
| const static cl_double log_e_base2 = 1.4426950408889634074; |
| const static cl_double log_10_base2 = 3.3219280948873623478; |
| |
| // double log10(double x); |
| |
| double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); } |
| |
| long double log2l(long double x) |
| { |
| return 1.44269504088896340735992468100189214L * log(x); |
| } |
| |
| double trunc(double x) |
| { |
| double absx = fabs(x); |
| |
| if (absx < 4503599627370496.0 /* 0x1.0p52f */) |
| { |
| cl_long rounded = x; |
| x = copysign((double)rounded, x); |
| } |
| |
| return x; |
| } |
| |
| float truncf(float x) |
| { |
| float absx = fabsf(x); |
| |
| if (absx < 8388608.0f /* 0x1.0p23f */) |
| { |
| cl_int rounded = x; |
| x = copysignf((float)rounded, x); |
| } |
| |
| return x; |
| } |
| |
| long lround(double x) |
| { |
| double absx = fabs(x); |
| |
| if (absx < 0.5) return 0; |
| |
| if (absx < 4503599627370496.0 /* 0x1.0p52 */) |
| { |
| absx += 0.5; |
| cl_long rounded = absx; |
| absx = rounded; |
| x = copysign(absx, x); |
| } |
| |
| if (x >= (double)LONG_MAX) return LONG_MAX; |
| |
| return (long)x; |
| } |
| |
| long lroundf(float x) |
| { |
| float absx = fabsf(x); |
| |
| if (absx < 0.5f) return 0; |
| |
| if (absx < 8388608.0f) |
| { |
| absx += 0.5f; |
| cl_int rounded = absx; |
| absx = rounded; |
| x = copysignf(absx, x); |
| } |
| |
| if (x >= (float)LONG_MAX) return LONG_MAX; |
| |
| return (long)x; |
| } |
| |
| double round(double x) |
| { |
| double absx = fabs(x); |
| |
| if (absx < 0.5) return copysign(0.0, x); |
| |
| if (absx < 4503599627370496.0 /* 0x1.0p52 */) |
| { |
| absx += 0.5; |
| cl_long rounded = absx; |
| absx = rounded; |
| x = copysign(absx, x); |
| } |
| |
| return x; |
| } |
| |
| float roundf(float x) |
| { |
| float absx = fabsf(x); |
| |
| if (absx < 0.5f) return copysignf(0.0f, x); |
| |
| if (absx < 8388608.0f) |
| { |
| absx += 0.5f; |
| cl_int rounded = absx; |
| absx = rounded; |
| x = copysignf(absx, x); |
| } |
| |
| return x; |
| } |
| |
| long double roundl(long double x) |
| { |
| long double absx = fabsl(x); |
| |
| if (absx < 0.5L) return copysignl(0.0L, x); |
| |
| if (absx < 9223372036854775808.0L /*0x1.0p63L*/) |
| { |
| absx += 0.5L; |
| cl_ulong rounded = absx; |
| absx = rounded; |
| x = copysignl(absx, x); |
| } |
| |
| return x; |
| } |
| |
| float cbrtf(float x) |
| { |
| float z = pow(fabs((double)x), 1.0 / 3.0); |
| return copysignf(z, x); |
| } |
| |
| double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); } |
| |
| long int lrint(double x) |
| { |
| double absx = fabs(x); |
| |
| if (x >= (double)LONG_MAX) return LONG_MAX; |
| |
| if (absx < 4503599627370496.0 /* 0x1.0p52 */) |
| { |
| double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x); |
| double rounded = x + magic; |
| rounded -= magic; |
| return (long int)rounded; |
| } |
| |
| return (long int)x; |
| } |
| |
| long int lrintf(float x) |
| { |
| float absx = fabsf(x); |
| |
| if (x >= (float)LONG_MAX) return LONG_MAX; |
| |
| if (absx < 8388608.0f /* 0x1.0p23f */) |
| { |
| float magic = copysignf(8388608.0f /* 0x1.0p23f */, x); |
| float rounded = x + magic; |
| rounded -= magic; |
| return (long int)rounded; |
| } |
| |
| return (long int)x; |
| } |
| |
| #endif // _MSC_VER < 1900 |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // fenv functions |
| // |
| /////////////////////////////////////////////////////////////////// |
| |
| #if _MSC_VER < 1800 |
| int fetestexcept(int excepts) |
| { |
| unsigned int status = _statusfp(); |
| return excepts |
| & (((status & _SW_INEXACT) ? FE_INEXACT : 0) |
| | ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
| | ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
| | ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
| | ((status & _SW_INVALID) ? FE_INVALID : 0)); |
| } |
| |
| int feclearexcept(int excepts) |
| { |
| _clearfp(); |
| return 0; |
| } |
| #endif |
| |
| #endif // __INTEL_COMPILER |
| |
| #if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300) |
| |
| float nanf(const char* str) |
| { |
| cl_uint u = atoi(str); |
| u |= 0x7fc00000U; |
| return *(float*)(&u); |
| } |
| |
| |
| double nan(const char* str) |
| { |
| cl_ulong u = atoi(str); |
| u |= 0x7ff8000000000000ULL; |
| return *(double*)(&u); |
| } |
| |
| // double check this implementatation |
| long double nanl(const char* str) |
| { |
| union { |
| long double f; |
| struct |
| { |
| cl_ulong m; |
| cl_ushort sexp; |
| } u; |
| } u; |
| u.u.sexp = 0x7fff; |
| u.u.m = 0x8000000000000000ULL | atoi(str); |
| |
| return u.f; |
| } |
| |
| #endif |
| |
| /////////////////////////////////////////////////////////////////// |
| // |
| // misc functions |
| // |
| /////////////////////////////////////////////////////////////////// |
| |
| /* |
| // This function is commented out because the Windows implementation should |
| never call munmap. |
| // If it is calling it, we have a bug. Please file a bugzilla. |
| int munmap(void *addr, size_t len) |
| { |
| // FIXME: this is not correct. munmap is like free() |
| // http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html |
| |
| return (int)VirtualAlloc( (LPVOID)addr, len, |
| MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); |
| } |
| */ |
| |
| uint64_t ReadTime(void) |
| { |
| LARGE_INTEGER current; |
| QueryPerformanceCounter(¤t); |
| return (uint64_t)current.QuadPart; |
| } |
| |
| double SubtractTime(uint64_t endTime, uint64_t startTime) |
| { |
| static double PerformanceFrequency = 0.0; |
| |
| if (PerformanceFrequency == 0.0) |
| { |
| LARGE_INTEGER frequency; |
| QueryPerformanceFrequency(&frequency); |
| PerformanceFrequency = (double)frequency.QuadPart; |
| } |
| |
| return (double)(endTime - startTime) / PerformanceFrequency * 1e9; |
| } |
| |
| int cf_signbit(double x) |
| { |
| union { |
| double f; |
| cl_ulong u; |
| } u; |
| u.f = x; |
| return u.u >> 63; |
| } |
| |
| int cf_signbitf(float x) |
| { |
| union { |
| float f; |
| cl_uint u; |
| } u; |
| u.f = x; |
| return u.u >> 31; |
| } |
| |
| float int2float(int32_t ix) |
| { |
| union { |
| float f; |
| int32_t i; |
| } u; |
| u.i = ix; |
| return u.f; |
| } |
| |
| int32_t float2int(float fx) |
| { |
| union { |
| float f; |
| int32_t i; |
| } u; |
| u.f = fx; |
| return u.i; |
| } |
| |
| #if !defined(_WIN64) |
| /** Returns the number of leading 0-bits in x, |
| starting at the most significant bit position. |
| If x is 0, the result is undefined. |
| */ |
| int __builtin_clz(unsigned int pattern) |
| { |
| #if 0 |
| int res; |
| __asm { |
| mov eax, pattern |
| bsr eax, eax |
| mov res, eax |
| } |
| return 31 - res; |
| #endif |
| unsigned long index; |
| unsigned char res = _BitScanReverse(&index, pattern); |
| if (res) |
| { |
| return 8 * sizeof(int) - 1 - index; |
| } |
| else |
| { |
| return 8 * sizeof(int); |
| } |
| } |
| #else |
| int __builtin_clz(unsigned int pattern) |
| { |
| int count; |
| if (pattern == 0u) |
| { |
| return 32; |
| } |
| count = 31; |
| if (pattern >= 1u << 16) |
| { |
| pattern >>= 16; |
| count -= 16; |
| } |
| if (pattern >= 1u << 8) |
| { |
| pattern >>= 8; |
| count -= 8; |
| } |
| if (pattern >= 1u << 4) |
| { |
| pattern >>= 4; |
| count -= 4; |
| } |
| if (pattern >= 1u << 2) |
| { |
| pattern >>= 2; |
| count -= 2; |
| } |
| if (pattern >= 1u << 1) |
| { |
| count -= 1; |
| } |
| return count; |
| } |
| |
| #endif // !defined(_WIN64) |
| |
| #include <intrin.h> |
| #include <emmintrin.h> |
| |
| int usleep(int usec) |
| { |
| Sleep((usec + 999) / 1000); |
| return 0; |
| } |
| |
| unsigned int sleep(unsigned int sec) |
| { |
| Sleep(sec * 1000); |
| return 0; |
| } |
| |
| #endif // defined( _MSC_VER ) |