| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "harness/compat.h" |
| |
| #include <string.h> |
| #include <stdio.h> |
| |
| #if !defined(_WIN32) |
| #include <libgen.h> |
| #include <sys/param.h> |
| #endif |
| |
| #include "mingw_compat.h" |
| #if defined (__MINGW32__) |
| #include <sys/param.h> |
| #endif |
| |
| #include <time.h> |
| #include "errorHelpers.h" |
| #include "harness/compat.h" |
| #include "harness/mt19937.h" |
| #include "harness/kernelHelpers.h" |
| #include "harness/rounding_mode.h" |
| #include "harness/fpcontrol.h" |
| #include "harness/testHarness.h" |
| #include "harness/parseParameters.h" |
| #if defined( __APPLE__ ) |
| #include <sys/sysctl.h> |
| #endif |
| #if defined( __linux__ ) |
| #include <unistd.h> |
| #include <sys/syscall.h> |
| #include <linux/sysctl.h> |
| #endif |
| |
| #if defined (_WIN32) |
| #include <string.h> |
| #endif |
| |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| #include <emmintrin.h> |
| #endif |
| |
| #if defined(__PPC__) |
| // Global varaiable used to hold the FPU control register state. The FPSCR register can not |
| // be used because not all Power implementations retain or observed the NI (non-IEEE |
| // mode) bit. |
| __thread fpu_control_t fpu_control = 0; |
| #endif |
| |
| #ifndef MAXPATHLEN |
| #define MAXPATHLEN 2048 |
| #endif |
| |
| char appName[ MAXPATHLEN ] = ""; |
| cl_context gContext = NULL; |
| cl_command_queue gQueue = NULL; |
| cl_program gProgram[5] = { NULL, NULL, NULL, NULL, NULL }; |
| cl_program gProgram_double[5] = { NULL, NULL, NULL, NULL, NULL }; |
| int gForceFTZ = 0; |
| int gSeed = 0; |
| int gSeedSpecified = 0; |
| int gHasDouble = 0; |
| MTdata gMTdata = NULL; |
| int gSkipNanInf = 0; |
| int gIgnoreZeroSign = 0; |
| |
| cl_mem bufA = NULL; |
| cl_mem bufB = NULL; |
| cl_mem bufC = NULL; |
| cl_mem bufD = NULL; |
| cl_mem bufE = NULL; |
| cl_mem bufC_double = NULL; |
| cl_mem bufD_double = NULL; |
| float *buf1, *buf2, *buf3, *buf4, *buf5, *buf6; |
| float *correct[8]; |
| int *skipTest[8]; |
| |
| double *buf3_double, *buf4_double, *buf5_double, *buf6_double; |
| double *correct_double[8]; |
| |
| static const char **gArgList; |
| static size_t gArgCount; |
| |
| #define BUFFER_SIZE (1024*1024) |
| |
| |
| static int ParseArgs( int argc, const char **argv ); |
| static void PrintUsage( void ); |
| test_status InitCL( cl_device_id device ); |
| static void ReleaseCL( void ); |
| static int RunTest( int testNumber ); |
| static int RunTest_Double( int testNumber ); |
| |
| #if defined(__ANDROID__) |
| #define nanf( X ) strtof( "NAN", ( char ** ) NULL ) |
| #define nan( X ) strtod( "NAN", ( char ** ) NULL ) |
| #endif |
| |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| // defeat x87 on MSVC |
| float sse_add(float x, float y) |
| { |
| volatile float a = x; |
| volatile float b = y; |
| |
| // defeat x87 |
| __m128 va = _mm_set_ss( (float) a ); |
| __m128 vb = _mm_set_ss( (float) b ); |
| va = _mm_add_ss( va, vb ); |
| _mm_store_ss( (float*) &a, va ); |
| return a; |
| } |
| |
| double sse_add_sd(double x, double y) |
| { |
| volatile double a = x; |
| volatile double b = y; |
| |
| // defeat x87 |
| __m128d va = _mm_set_sd( (double) a ); |
| __m128d vb = _mm_set_sd( (double) b ); |
| va = _mm_add_sd( va, vb ); |
| _mm_store_sd( (double*) &a, va ); |
| return a; |
| } |
| |
| float sse_sub(float x, float y) |
| { |
| volatile float a = x; |
| volatile float b = y; |
| |
| // defeat x87 |
| __m128 va = _mm_set_ss( (float) a ); |
| __m128 vb = _mm_set_ss( (float) b ); |
| va = _mm_sub_ss( va, vb ); |
| _mm_store_ss( (float*) &a, va ); |
| return a; |
| } |
| |
| double sse_sub_sd(double x, double y) |
| { |
| volatile double a = x; |
| volatile double b = y; |
| |
| // defeat x87 |
| __m128d va = _mm_set_sd( (double) a ); |
| __m128d vb = _mm_set_sd( (double) b ); |
| va = _mm_sub_sd( va, vb ); |
| _mm_store_sd( (double*) &a, va ); |
| return a; |
| } |
| |
| float sse_mul(float x, float y) |
| { |
| volatile float a = x; |
| volatile float b = y; |
| |
| // defeat x87 |
| __m128 va = _mm_set_ss( (float) a ); |
| __m128 vb = _mm_set_ss( (float) b ); |
| va = _mm_mul_ss( va, vb ); |
| _mm_store_ss( (float*) &a, va ); |
| return a; |
| } |
| |
| double sse_mul_sd(double x, double y) |
| { |
| volatile double a = x; |
| volatile double b = y; |
| |
| // defeat x87 |
| __m128d va = _mm_set_sd( (double) a ); |
| __m128d vb = _mm_set_sd( (double) b ); |
| va = _mm_mul_sd( va, vb ); |
| _mm_store_sd( (double*) &a, va ); |
| return a; |
| } |
| #endif |
| |
| #ifdef __PPC__ |
| float ppc_mul(float a, float b) |
| { |
| float p; |
| |
| if (gForceFTZ) { |
| // Flush input a to zero if it is sub-normal |
| if (fabsf(a) < FLT_MIN) { |
| a = copysignf(0.0, a); |
| } |
| // Flush input b to zero if it is sub-normal |
| if (fabsf(b) < FLT_MIN) { |
| b = copysignf(0.0, b); |
| } |
| // Perform multiply |
| p = a * b; |
| // Flush the product if it is a sub-normal |
| if (fabs((double)a * (double)b) < FLT_MIN) { |
| p = copysignf(0.0, p); |
| } |
| } else { |
| p = a * b; |
| } |
| return p; |
| } |
| #endif |
| |
| int test_contractions_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(0); |
| } |
| |
| int test_contractions_float_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(1); |
| } |
| |
| int test_contractions_float_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(2); |
| } |
| |
| int test_contractions_float_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(3); |
| } |
| |
| int test_contractions_float_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(4); |
| } |
| |
| int test_contractions_float_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(5); |
| } |
| |
| int test_contractions_float_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(6); |
| } |
| |
| int test_contractions_float_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest(7); |
| } |
| |
| int test_contractions_double_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(0); |
| } |
| |
| int test_contractions_double_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(1); |
| } |
| |
| int test_contractions_double_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(2); |
| } |
| |
| int test_contractions_double_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(3); |
| } |
| |
| int test_contractions_double_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(4); |
| } |
| |
| int test_contractions_double_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(5); |
| } |
| |
| int test_contractions_double_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(6); |
| } |
| |
| int test_contractions_double_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return RunTest_Double(7); |
| } |
| |
| test_definition test_list[] = { |
| ADD_TEST( contractions_float_0 ), |
| ADD_TEST( contractions_float_1 ), |
| ADD_TEST( contractions_float_2 ), |
| ADD_TEST( contractions_float_3 ), |
| ADD_TEST( contractions_float_4 ), |
| ADD_TEST( contractions_float_5 ), |
| ADD_TEST( contractions_float_6 ), |
| ADD_TEST( contractions_float_7 ), |
| ADD_TEST( contractions_double_0 ), |
| ADD_TEST( contractions_double_1 ), |
| ADD_TEST( contractions_double_2 ), |
| ADD_TEST( contractions_double_3 ), |
| ADD_TEST( contractions_double_4 ), |
| ADD_TEST( contractions_double_5 ), |
| ADD_TEST( contractions_double_6 ), |
| ADD_TEST( contractions_double_7 ), |
| }; |
| |
| const int test_num = ARRAY_SIZE( test_list ); |
| |
| int main( int argc, const char **argv ) |
| { |
| argc = parseCustomParam(argc, argv); |
| if (argc == -1) |
| { |
| return -1; |
| } |
| |
| int error = ParseArgs( argc, argv ); |
| |
| if( !error ) |
| { |
| error = runTestHarnessWithCheck( gArgCount, gArgList, test_num, test_list, true, 0, InitCL ); |
| } |
| |
| if( gQueue ) |
| { |
| int flush_error = clFinish( gQueue ); |
| if( flush_error ) |
| log_error( "clFinish failed: %d\n", flush_error ); |
| } |
| |
| ReleaseCL(); |
| free( gArgList ); |
| |
| return error; |
| } |
| |
| |
| |
| static int ParseArgs( int argc, const char **argv ) |
| { |
| gArgList = (const char **)calloc( argc, sizeof( char*) ); |
| |
| if( NULL == gArgList ) |
| { |
| vlog_error( "Failed to allocate memory for argList\n" ); |
| return 1; |
| } |
| |
| gArgList[0] = argv[0]; |
| gArgCount = 1; |
| |
| int length_of_seed = 0; |
| |
| { // Extract the app name |
| strncpy( appName, argv[0], MAXPATHLEN ); |
| |
| #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__)) |
| char baseName[MAXPATHLEN]; |
| char *base = NULL; |
| strncpy( baseName, argv[0], MAXPATHLEN ); |
| base = basename( baseName ); |
| if( NULL != base ) |
| { |
| strncpy( appName, base, sizeof( appName ) ); |
| appName[ sizeof( appName ) -1 ] = '\0'; |
| } |
| #elif defined (_WIN32) |
| char fname[_MAX_FNAME + _MAX_EXT + 1]; |
| char ext[_MAX_EXT]; |
| |
| errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0, |
| fname, _MAX_FNAME, ext, _MAX_EXT ); |
| if (err == 0) { // no error |
| strcat (fname, ext); //just cat them, size of frame can keep both |
| strncpy (appName, fname, sizeof(appName)); |
| appName[ sizeof( appName ) -1 ] = '\0'; |
| } |
| #endif |
| } |
| |
| for( int i = 1; i < argc; i++ ) |
| { |
| const char *arg = argv[i]; |
| if( NULL == arg ) |
| break; |
| |
| if( arg[0] == '-' ) |
| { |
| while( arg[1] != '\0' ) |
| { |
| arg++; |
| switch( *arg ) |
| { |
| case 'h': |
| PrintUsage(); |
| return -1; |
| |
| case 's': |
| arg++; |
| gSeed = atoi( arg ); |
| while (arg[length_of_seed] >='0' && arg[length_of_seed]<='9') |
| length_of_seed++; |
| gSeedSpecified = 1; |
| arg+=length_of_seed-1; |
| break; |
| |
| case 'z': |
| gForceFTZ ^= 1; |
| break; |
| |
| default: |
| vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); |
| PrintUsage(); |
| return -1; |
| } |
| } |
| } |
| else |
| { |
| gArgList[gArgCount] = arg; |
| gArgCount++; |
| } |
| } |
| vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ ); |
| |
| PrintArch(); |
| |
| return 0; |
| } |
| |
| static void PrintUsage( void ) |
| { |
| vlog( "%s [-z]: <optional: test names>\n", appName ); |
| vlog( "\tOptions:\n" ); |
| vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" ); |
| vlog( "\t\t-sNUMBER set random seed.\n"); |
| vlog( "\n" ); |
| vlog( "\tTest names:\n" ); |
| for( int i = 0; i < test_num; i++ ) |
| { |
| vlog( "\t\t%s\n", test_list[i].name ); |
| } |
| } |
| |
| const char *sizeNames[] = { "float", "float2", "float4", "float8", "float16" }; |
| const char *sizeNames_double[] = { "double", "double2", "double4", "double8", "double16" }; |
| |
| test_status InitCL( cl_device_id device ) |
| { |
| int error; |
| uint32_t i, j; |
| int *bufSkip = NULL; |
| int isRTZ = 0; |
| RoundingMode oldRoundMode = kDefaultRoundingMode; |
| |
| cl_device_fp_config floatCapabilities = 0; |
| if( (error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL))) |
| floatCapabilities = 0; |
| if(0 == (CL_FP_DENORM & floatCapabilities) ) |
| gForceFTZ ^= 1; |
| |
| // check for cl_khr_fp64 |
| gHasDouble = is_extension_available(device, "cl_khr_fp64" ); |
| |
| if(0 == (CL_FP_INF_NAN & floatCapabilities) ) |
| gSkipNanInf = 1; |
| |
| // Embedded devices that flush to zero are allowed to have an undefined sign. |
| if (gIsEmbedded && gForceFTZ) |
| gIgnoreZeroSign = 1; |
| |
| gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error ); |
| if( NULL == gContext || error ) |
| { |
| vlog_error( "clCreateDeviceGroup failed. %d\n", error ); |
| return TEST_FAIL; |
| } |
| |
| gQueue = clCreateCommandQueue( gContext, device, 0, &error ); |
| if( NULL == gQueue || error ) |
| { |
| vlog_error( "clCreateContext failed. %d\n", error ); |
| return TEST_FAIL; |
| } |
| |
| // setup input buffers |
| bufA = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| bufB = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| bufC = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| bufD = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| bufE = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| |
| if( bufA == NULL || |
| bufB == NULL || |
| bufC == NULL || |
| bufD == NULL || |
| bufE == NULL ) |
| { |
| vlog_error( "clCreateArray failed for input\n" ); |
| return TEST_FAIL; |
| } |
| |
| if( gHasDouble ) |
| { |
| bufC_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| bufD_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL ); |
| if( bufC_double == NULL || |
| bufD_double == NULL ) |
| { |
| vlog_error( "clCreateArray failed for input DP\n" ); |
| return TEST_FAIL; |
| } |
| } |
| |
| const char *kernels[] = { |
| "", "#pragma OPENCL FP_CONTRACT OFF\n" |
| "__kernel void kernel1( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = a[i] * b[i] + c[i];\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel2( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = a[i] * b[i] - c[i];\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel3( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = c[i] + a[i] * b[i];\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel4( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = c[i] - a[i] * b[i];\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel5( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = -(a[i] * b[i] + c[i]);\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel6( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = -(a[i] * b[i] - c[i]);\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel7( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = -(c[i] + a[i] * b[i]);\n" |
| "}\n" |
| "\n" |
| "__kernel void kernel8( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n" |
| "{\n" |
| " int i = get_global_id(0);\n" |
| " out[i] = -(c[i] - a[i] * b[i]);\n" |
| "}\n" |
| "\n" }; |
| |
| for (i = 0; i < sizeof(sizeNames) / sizeof(sizeNames[0]); i++) |
| { |
| size_t strCount = sizeof(kernels) / sizeof(kernels[0]); |
| kernels[0] = ""; |
| |
| for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames[i]; |
| error = create_single_kernel_helper(gContext, &gProgram[i], nullptr, |
| strCount, kernels, nullptr); |
| if (CL_SUCCESS != error || nullptr == gProgram[i]) |
| { |
| log_error("Error: Unable to create test program! (%s) (in %s:%d)\n", |
| IGetErrorString(error), __FILE__, __LINE__); |
| return TEST_FAIL; |
| } |
| } |
| |
| if (gHasDouble) |
| { |
| kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; |
| for (i = 0; i < sizeof(sizeNames_double) / sizeof(sizeNames_double[0]); |
| i++) |
| { |
| size_t strCount = sizeof(kernels) / sizeof(kernels[0]); |
| |
| for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames_double[i]; |
| error = create_single_kernel_helper(gContext, &gProgram_double[i], |
| nullptr, strCount, kernels, |
| nullptr); |
| if (CL_SUCCESS != error || nullptr == gProgram_double[i]) |
| { |
| log_error( |
| "Error: Unable to create test program! (%s) (in %s:%d)\n", |
| IGetErrorString(error), __FILE__, __LINE__); |
| return TEST_FAIL; |
| } |
| } |
| } |
| |
| if( 0 == gSeedSpecified ) |
| { |
| time_t currentTime = time( NULL ); |
| struct tm *t = localtime(¤tTime); |
| gSeed = t->tm_sec + 60 * ( t->tm_min + 60 * (t->tm_hour + 24 * (t->tm_yday + 365 * t->tm_year))); |
| gSeed = (uint32_t) (((uint64_t) gSeed * (uint64_t) gSeed ) >> 16); |
| } |
| gMTdata = init_genrand( gSeed ); |
| |
| |
| // Init bufA and bufB |
| { |
| buf1 = (float *)malloc( BUFFER_SIZE ); |
| buf2 = (float *)malloc( BUFFER_SIZE ); |
| buf3 = (float *)malloc( BUFFER_SIZE ); |
| buf4 = (float *)malloc( BUFFER_SIZE ); |
| buf5 = (float *)malloc( BUFFER_SIZE ); |
| buf6 = (float *)malloc( BUFFER_SIZE ); |
| |
| bufSkip = (int *)malloc( BUFFER_SIZE ); |
| |
| if( NULL == buf1 || NULL == buf2 || NULL == buf3 || NULL == buf4 || NULL == buf5 || NULL == buf6 || NULL == bufSkip) |
| { |
| vlog_error( "Out of memory initializing buffers\n" ); |
| return TEST_FAIL; |
| } |
| for( i = 0; i < sizeof( correct ) / sizeof( correct[0] ); i++ ) |
| { |
| correct[i] = (float *)malloc( BUFFER_SIZE ); |
| skipTest[i] = (int *)malloc( BUFFER_SIZE ); |
| if(( NULL == correct[i] ) || ( NULL == skipTest[i])) |
| { |
| vlog_error( "Out of memory initializing buffers 2\n" ); |
| return TEST_FAIL; |
| } |
| } |
| |
| for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ ) |
| ((uint32_t*) buf1)[i] = genrand_int32( gMTdata ); |
| |
| if( (error = clEnqueueWriteBuffer(gQueue, bufA, CL_FALSE, 0, BUFFER_SIZE, buf1, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer1\n", error ); |
| return TEST_FAIL; |
| } |
| |
| for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ ) |
| ((uint32_t*) buf2)[i] = genrand_int32( gMTdata ); |
| |
| if( (error = clEnqueueWriteBuffer(gQueue, bufB, CL_FALSE, 0, BUFFER_SIZE, buf2, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer2\n", error ); |
| return TEST_FAIL; |
| } |
| |
| void *ftzInfo = NULL; |
| if( gForceFTZ ) |
| ftzInfo = FlushToZero(); |
| if ((CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) && gIsEmbedded) { |
| oldRoundMode = set_round(kRoundTowardZero, kfloat); |
| isRTZ = 1; |
| } |
| float *f = (float*) buf1; |
| float *f2 = (float*) buf2; |
| float *f3 = (float*) buf3; |
| float *f4 = (float*) buf4; |
| for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ ) |
| { |
| float q = f[i]; |
| float q2 = f2[i]; |
| |
| feclearexcept(FE_OVERFLOW); |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| // VS2005 might use x87 for straight multiplies, and we can't |
| // turn that off |
| f3[i] = sse_mul(q, q2); |
| f4[i] = sse_mul(-q, q2); |
| #elif defined(__PPC__) |
| // None of the current generation PPC processors support HW |
| // FTZ, emulate it in sw. |
| f3[i] = ppc_mul(q, q2); |
| f4[i] = ppc_mul(-q, q2); |
| #else |
| f3[i] = q * q2; |
| f4[i] = -q * q2; |
| #endif |
| // Skip test if the device doesn't support infinities and NaN AND the result overflows |
| // or either input is an infinity of NaN |
| bufSkip[i] = (gSkipNanInf && ((FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))) || |
| (fabsf(q) == FLT_MAX) || (q != q) || |
| (fabsf(q2) == FLT_MAX) || (q2 != q2))); |
| } |
| |
| if( gForceFTZ ) |
| UnFlushToZero(ftzInfo); |
| |
| if (isRTZ) |
| (void)set_round(oldRoundMode, kfloat); |
| |
| |
| if( (error = clEnqueueWriteBuffer(gQueue, bufC, CL_FALSE, 0, BUFFER_SIZE, buf3, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error ); |
| return TEST_FAIL; |
| } |
| if( (error = clEnqueueWriteBuffer(gQueue, bufD, CL_FALSE, 0, BUFFER_SIZE, buf4, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error ); |
| return TEST_FAIL; |
| } |
| |
| // Fill the buffers with NaN |
| float *f5 = (float*) buf5; |
| float nan_val = nanf(""); |
| for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ ) |
| f5[i] = nan_val; |
| |
| // calculate reference results |
| for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ ) |
| { |
| for ( j=0; j<8; j++) |
| { |
| feclearexcept(FE_OVERFLOW); |
| switch (j) |
| { |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| // VS2005 might use x87 for straight add/sub, and we can't |
| // turn that off |
| case 0: |
| correct[0][i] = sse_add(buf3[i],buf4[i]); break; |
| case 1: |
| correct[1][i] = sse_sub(buf3[i],buf3[i]); break; |
| case 2: |
| correct[2][i] = sse_add(buf4[i],buf3[i]); break; |
| case 3: |
| correct[3][i] = sse_sub(buf3[i],buf3[i]); break; |
| case 4: |
| correct[4][i] = -sse_add(buf3[i],buf4[i]); break; |
| case 5: |
| correct[5][i] = -sse_sub(buf3[i],buf3[i]); break; |
| case 6: |
| correct[6][i] = -sse_add(buf4[i],buf3[i]); break; |
| case 7: |
| correct[7][i] = -sse_sub(buf3[i],buf3[i]); break; |
| #else |
| case 0: |
| correct[0][i] = buf3[i] + buf4[i]; break; |
| case 1: |
| correct[1][i] = buf3[i] - buf3[i]; break; |
| case 2: |
| correct[2][i] = buf4[i] + buf3[i]; break; |
| case 3: |
| correct[3][i] = buf3[i] - buf3[i]; break; |
| case 4: |
| correct[4][i] = -(buf3[i] + buf4[i]); break; |
| case 5: |
| correct[5][i] = -(buf3[i] - buf3[i]); break; |
| case 6: |
| correct[6][i] = -(buf4[i] + buf3[i]); break; |
| case 7: |
| correct[7][i] = -(buf3[i] - buf3[i]); break; |
| #endif |
| } |
| // Further skip test inputs if the device doesn support infinities AND NaNs |
| // resulting sum overflows |
| skipTest[j][i] = (bufSkip[i] || |
| (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))))); |
| |
| #if defined(__PPC__) |
| // Since the current Power processors don't emulate flush to zero in HW, |
| // it must be emulated in SW instead. |
| if (gForceFTZ) |
| { |
| if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f)) |
| correct[j][i] = copysignf(0.0f, correct[j][i]); |
| } |
| #endif |
| } |
| } |
| if( gHasDouble ) |
| { |
| // Spec requires correct non-flushed results |
| // for doubles. We disable FTZ if this is default on |
| // the platform (like ARM) for reference result computation |
| // It is no-op if platform default is not FTZ (e.g. x86) |
| FPU_mode_type oldMode; |
| DisableFTZ( &oldMode ); |
| |
| buf3_double = (double *)malloc( BUFFER_SIZE ); |
| buf4_double = (double *)malloc( BUFFER_SIZE ); |
| buf5_double = (double *)malloc( BUFFER_SIZE ); |
| buf6_double = (double *)malloc( BUFFER_SIZE ); |
| if( NULL == buf3_double || NULL == buf4_double || NULL == buf5_double || NULL == buf6_double ) |
| { |
| vlog_error( "Out of memory initializing DP buffers\n" ); |
| return TEST_FAIL; |
| } |
| for( i = 0; i < sizeof( correct_double ) / sizeof( correct_double[0] ); i++ ) |
| { |
| correct_double[i] = (double *)malloc( BUFFER_SIZE ); |
| if( NULL == correct_double[i] ) |
| { |
| vlog_error( "Out of memory initializing DP buffers 2\n" ); |
| return TEST_FAIL; |
| } |
| } |
| |
| |
| double *f = (double*) buf1; |
| double *f2 = (double*) buf2; |
| double *f3 = (double*) buf3_double; |
| double *f4 = (double*) buf4_double; |
| for( i = 0; i < BUFFER_SIZE / sizeof(double); i++ ) |
| { |
| double q = f[i]; |
| double q2 = f2[i]; |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| // VS2005 might use x87 for straight multiplies, and we can't |
| // turn that off |
| f3[i] = sse_mul_sd(q, q2); |
| f4[i] = sse_mul_sd(-q, q2); |
| #else |
| f3[i] = q * q2; |
| f4[i] = -q * q2; |
| #endif |
| } |
| |
| if( (error = clEnqueueWriteBuffer(gQueue, bufC_double, CL_FALSE, 0, BUFFER_SIZE, buf3_double, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error ); |
| return TEST_FAIL; |
| } |
| if( (error = clEnqueueWriteBuffer(gQueue, bufD_double, CL_FALSE, 0, BUFFER_SIZE, buf4_double, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error ); |
| return TEST_FAIL; |
| } |
| |
| // Fill the buffers with NaN |
| double *f5 = (double*) buf5_double; |
| double nan_val = nanf(""); |
| for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ ) |
| f5[i] = nan_val; |
| |
| // calculate reference results |
| for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ ) |
| { |
| #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) |
| // VS2005 might use x87 for straight add/sub, and we can't |
| // turn that off |
| correct_double[0][i] = sse_add_sd(buf3_double[i],buf4_double[i]); |
| correct_double[1][i] = sse_sub_sd(buf3_double[i],buf3_double[i]); |
| correct_double[2][i] = sse_add_sd(buf4_double[i],buf3_double[i]); |
| correct_double[3][i] = sse_sub_sd(buf3_double[i],buf3_double[i]); |
| correct_double[4][i] = -sse_add_sd(buf3_double[i],buf4_double[i]); |
| correct_double[5][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]); |
| correct_double[6][i] = -sse_add_sd(buf4_double[i],buf3_double[i]); |
| correct_double[7][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]); |
| #else |
| correct_double[0][i] = buf3_double[i] + buf4_double[i]; |
| correct_double[1][i] = buf3_double[i] - buf3_double[i]; |
| correct_double[2][i] = buf4_double[i] + buf3_double[i]; |
| correct_double[3][i] = buf3_double[i] - buf3_double[i]; |
| correct_double[4][i] = -(buf3_double[i] + buf4_double[i]); |
| correct_double[5][i] = -(buf3_double[i] - buf3_double[i]); |
| correct_double[6][i] = -(buf4_double[i] + buf3_double[i]); |
| correct_double[7][i] = -(buf3_double[i] - buf3_double[i]); |
| #endif |
| } |
| |
| // Restore previous FP state since we modified it for |
| // reference result computation (see DisableFTZ call above) |
| RestoreFPState(&oldMode); |
| } |
| } |
| |
| char c[1000]; |
| static const char *no_yes[] = { "NO", "YES" }; |
| vlog( "\nCompute Device info:\n" ); |
| clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof(c), (void *)&c, NULL); |
| vlog( "\tDevice Name: %s\n", c ); |
| clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof(c), (void *)&c, NULL); |
| vlog( "\tVendor: %s\n", c ); |
| clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(c), (void *)&c, NULL); |
| vlog( "\tDevice Version: %s\n", c ); |
| clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL); |
| vlog( "\tCL C Version: %s\n", c ); |
| clGetDeviceInfo( device, CL_DRIVER_VERSION, sizeof(c), (void *)&c, NULL); |
| vlog( "\tDriver Version: %s\n", c ); |
| vlog( "\tSubnormal values supported? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] ); |
| vlog( "\tTesting with FTZ mode ON? %s\n", no_yes[0 != gForceFTZ] ); |
| vlog( "\tTesting Doubles? %s\n", no_yes[0 != gHasDouble] ); |
| vlog( "\tRandom Number seed: 0x%8.8x\n", gSeed ); |
| vlog( "\n\n" ); |
| |
| return TEST_PASS; |
| } |
| |
| static void ReleaseCL( void ) |
| { |
| clReleaseMemObject(bufA); |
| clReleaseMemObject(bufB); |
| clReleaseMemObject(bufC); |
| clReleaseMemObject(bufD); |
| clReleaseMemObject(bufE); |
| clReleaseProgram(gProgram[0]); |
| clReleaseProgram(gProgram[1]); |
| clReleaseProgram(gProgram[2]); |
| clReleaseProgram(gProgram[3]); |
| clReleaseProgram(gProgram[4]); |
| if( gHasDouble ) |
| { |
| clReleaseMemObject(bufC_double); |
| clReleaseMemObject(bufD_double); |
| clReleaseProgram(gProgram_double[0]); |
| clReleaseProgram(gProgram_double[1]); |
| clReleaseProgram(gProgram_double[2]); |
| clReleaseProgram(gProgram_double[3]); |
| clReleaseProgram(gProgram_double[4]); |
| } |
| clReleaseCommandQueue(gQueue); |
| clReleaseContext(gContext); |
| } |
| |
| |
| static int RunTest( int testNumber ) |
| { |
| size_t i; |
| int error = 0; |
| cl_mem args[4]; |
| float *c; |
| const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4", |
| "kernel5", "kernel6", "kernel7", "kernel8" }; |
| switch( testNumber ) |
| { |
| case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; // a * b + c |
| case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break; |
| case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; |
| case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break; |
| case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; |
| case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break; |
| case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; |
| case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break; |
| default: |
| vlog_error( "Unknown test case %d passed to RunTest\n", testNumber ); |
| return -1; |
| } |
| |
| |
| int vectorSize; |
| for( vectorSize = 0; vectorSize < 5; vectorSize++ ) |
| { |
| cl_kernel k = clCreateKernel( gProgram[ vectorSize ], kernelName[ testNumber ], &error ); |
| if( NULL == k || error ) |
| { |
| vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize ); |
| return -2; |
| } |
| |
| // set the kernel args |
| for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ ) |
| if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) )) |
| { |
| vlog_error( "Error %d setting kernel arg # %ld\n", error, i ); |
| return error; |
| } |
| |
| // write NaNs to the result array |
| if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf5, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber ); |
| return error; |
| } |
| |
| // execute the kernel |
| size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_float ) * (1<<vectorSize)), 0, 0 }; |
| if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) ))) |
| { |
| vlog_error( "Got Error # %d trying to execture kernel\n", error ); |
| return error; |
| } |
| |
| // read the data back |
| if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6, 0, NULL, NULL ) )) |
| { |
| vlog_error( "Failure %d at clReadArray %d\n", error, testNumber ); |
| return error; |
| } |
| |
| // verify results |
| float *test = (float*) buf6; |
| float *a = (float*) buf1; |
| float *b = (float*) buf2; |
| for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ ) |
| { |
| if( isnan(test[i]) && isnan(correct[testNumber][i] ) ) |
| continue; |
| |
| if( skipTest[testNumber][i] ) |
| continue; |
| |
| // sign of zero must be correct |
| if(( ((uint32_t*) test)[i] != ((uint32_t*) correct[testNumber])[i] ) && |
| !(gIgnoreZeroSign && (test[i] == 0.0f) && (correct[testNumber][i] == 0.0f)) ) |
| { |
| switch( testNumber ) |
| { |
| // Zeros for these should be positive |
| case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| |
| // Zeros for these should be negative |
| case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1; |
| default: |
| vlog_error( "error: Unknown test number!\n" ); |
| clReleaseKernel(k); |
| return -2; |
| } |
| } |
| } |
| |
| clReleaseKernel(k); |
| } |
| |
| return error; |
| } |
| |
| static int RunTest_Double( int testNumber ) |
| { |
| if( !gHasDouble ) |
| { |
| vlog("Double is not supported, test not run.\n"); |
| return 0; |
| } |
| |
| size_t i; |
| int error = 0; |
| cl_mem args[4]; |
| double *c; |
| const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4", |
| "kernel5", "kernel6", "kernel7", "kernel8" }; |
| |
| switch( testNumber ) |
| { |
| case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; // a * b + c |
| case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break; |
| case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; |
| case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break; |
| case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; |
| case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break; |
| case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; |
| case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break; |
| default: |
| vlog_error( "Unknown test case %d passed to RunTest\n", testNumber ); |
| return -1; |
| } |
| |
| int vectorSize; |
| for( vectorSize = 0; vectorSize < 5; vectorSize++ ) |
| { |
| cl_kernel k = clCreateKernel( gProgram_double[ vectorSize ], kernelName[ testNumber ], &error ); |
| if( NULL == k || error ) |
| { |
| vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize ); |
| return -2; |
| } |
| |
| // set the kernel args |
| for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ ) |
| if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) )) |
| { |
| vlog_error( "Error %d setting kernel arg # %ld\n", error, i ); |
| return error; |
| } |
| |
| // write NaNs to the result array |
| if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_FALSE, 0, BUFFER_SIZE, buf5_double, 0, NULL, NULL) )) |
| { |
| vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber ); |
| return error; |
| } |
| |
| // execute the kernel |
| size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_double ) * (1<<vectorSize)), 0, 0 }; |
| if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) ))) |
| { |
| vlog_error( "Got Error # %d trying to execture kernel\n", error ); |
| return error; |
| } |
| |
| // read the data back |
| if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6_double, 0, NULL, NULL ) )) |
| { |
| vlog_error( "Failure %d at clReadArray %d\n", error, testNumber ); |
| return error; |
| } |
| |
| // verify results |
| double *test = (double*) buf6_double; |
| double *a = (double*) buf1; |
| double *b = (double*) buf2; |
| for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ ) |
| { |
| if( isnan(test[i]) && isnan(correct_double[testNumber][i] ) ) |
| continue; |
| |
| // sign of zero must be correct |
| if( ((uint64_t*) test)[i] != ((uint64_t*) correct_double[testNumber])[i] ) |
| { |
| switch( testNumber ) |
| { |
| // Zeros for these should be positive |
| case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1; |
| case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1; |
| case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1; |
| case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1; |
| |
| // Zeros for these should be negative |
| case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1; |
| case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1; |
| case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1; |
| case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ], |
| c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1; |
| default: |
| vlog_error( "error: Unknown test number!\n" ); |
| return -2; |
| } |
| } |
| } |
| |
| clReleaseKernel(k); |
| } |
| |
| return error; |
| } |