| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "testBase.h" |
| #include "harness/conversions.h" |
| |
| #define TEST_SIZE 512 |
| |
| #ifndef MIN |
| #define MIN( _a, _b ) ((_a) < (_b) ? (_a) : (_b)) |
| #endif |
| #ifndef MAX |
| #define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b)) |
| #endif |
| |
| const char *singleParamIntegerKernelSourcePattern = |
| "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " %s%s tmp = vload%s( tid, destValues );\n" |
| " tmp %s= %s( vload%s( tid, sourceA ) );\n" |
| " vstore%s( tmp, tid, destValues );\n" |
| "\n" |
| "}\n"; |
| |
| const char *singleParamSingleSizeIntegerKernelSourcePattern = |
| "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " destValues[tid] %s= %s( sourceA[tid] );\n" |
| "}\n"; |
| |
| typedef bool (*singleParamIntegerVerifyFn)( void *source, void *destination, ExplicitType vecType ); |
| static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType ); |
| bool verify_integer_divideAssign( void *source, void *destination, ExplicitType vecType ); |
| bool verify_integer_moduloAssign( void *source, void *destination, ExplicitType vecType ); |
| |
| int test_single_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName, |
| ExplicitType vecType, size_t vecSize, singleParamIntegerVerifyFn verifyFn, |
| MTdata d, bool useOpKernel = false ) |
| { |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper streams[2]; |
| cl_long inDataA[TEST_SIZE * 16], outData[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], expected; |
| int error, i; |
| size_t threads[1], localThreads[1]; |
| char kernelSource[10240]; |
| char *programPtr; |
| char sizeName[4]; |
| |
| if (! gHasLong && strstr(get_explicit_type_name(vecType),"long")) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecType) ); |
| return CL_SUCCESS; |
| } |
| |
| /* Create the source */ |
| if( vecSize == 1 ) |
| sizeName[ 0 ] = 0; |
| else |
| sprintf( sizeName, "%d", (int)vecSize ); |
| |
| if( vecSize == 1 ) |
| sprintf( kernelSource, singleParamSingleSizeIntegerKernelSourcePattern, |
| get_explicit_type_name( vecType ), get_explicit_type_name( vecType ), |
| useOpKernel ? fnName : "", useOpKernel ? "" : fnName ); |
| else |
| sprintf( kernelSource, singleParamIntegerKernelSourcePattern, |
| get_explicit_type_name( vecType ), get_explicit_type_name( vecType ), |
| get_explicit_type_name( vecType ), sizeName, sizeName, |
| useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName, |
| sizeName ); |
| |
| /* Create kernels */ |
| programPtr = kernelSource; |
| if (create_single_kernel_helper(context, &program, &kernel, 1, |
| (const char **)&programPtr, "sample_test")) |
| { |
| log_error("The program we attempted to compile was: \n%s\n", kernelSource); |
| return -1; |
| } |
| |
| /* Generate some streams */ |
| generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA ); |
| |
| streams[0] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inDataA, NULL); |
| if( streams[0] == NULL ) |
| { |
| log_error("ERROR: Creating input array A failed!\n"); |
| return -1; |
| } |
| |
| if( useOpKernel ) |
| { |
| // Op kernels use an r/w buffer for the second param, so we need to init it with data |
| generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB ); |
| } |
| streams[1] = clCreateBuffer( |
| context, (CL_MEM_READ_WRITE | (useOpKernel ? CL_MEM_COPY_HOST_PTR : 0)), |
| get_explicit_type_size(vecType) * vecSize * TEST_SIZE, |
| (useOpKernel) ? &inDataB : NULL, NULL); |
| if( streams[1] == NULL ) |
| { |
| log_error("ERROR: Creating output array failed!\n"); |
| return -1; |
| } |
| |
| /* Assign streams and execute */ |
| error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| |
| /* Run the kernel */ |
| threads[0] = TEST_SIZE; |
| |
| error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); |
| test_error( error, "Unable to get work group size to use" ); |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); |
| test_error( error, "Unable to execute test kernel" ); |
| |
| memset(outData, 0xFF, get_explicit_type_size( vecType ) * TEST_SIZE * vecSize ); |
| |
| /* Now get the results */ |
| error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, |
| get_explicit_type_size( vecType ) * TEST_SIZE * vecSize, |
| outData, 0, NULL, NULL ); |
| test_error( error, "Unable to read output array!" ); |
| |
| // deal with division by 0 -- any answer is allowed here |
| if( verifyFn == verify_integer_divideAssign || verifyFn == verify_integer_moduloAssign ) |
| patchup_divide_results( outData, inDataA, inDataB, TEST_SIZE * vecSize, vecType ); |
| |
| /* And verify! */ |
| char *p = (char *)outData; |
| char *in = (char *)inDataA; |
| char *in2 = (char *)inDataB; |
| for( i = 0; i < (int)TEST_SIZE; i++ ) |
| { |
| for( size_t j = 0; j < vecSize; j++ ) |
| { |
| if( useOpKernel ) |
| memcpy( &expected, in2, get_explicit_type_size( vecType ) ); |
| |
| verifyFn( in, &expected, vecType ); |
| if( memcmp( &expected, p, get_explicit_type_size( vecType ) ) != 0 ) |
| { |
| switch( get_explicit_type_size( vecType )) |
| { |
| case 1: |
| if( useOpKernel ) |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x)\n", |
| (int)i, (int)j, |
| ((cl_uchar*)&expected)[0], |
| *( (cl_uchar *)p ), |
| *( (cl_uchar *)in ), |
| *( (cl_uchar *)in2 ) ); |
| else |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x)\n", |
| (int)i, (int)j, |
| ((cl_uchar*)&expected)[0], |
| *( (cl_uchar *)p ), |
| *( (cl_uchar *)in ) ); |
| break; |
| |
| case 2: |
| if( useOpKernel ) |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x)\n", |
| (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ), |
| *( (cl_ushort *)in ), *( (cl_ushort *)in2 ) ); |
| else |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x)\n", |
| (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ), |
| *( (cl_ushort *)in ) ); |
| break; |
| |
| case 4: |
| if( useOpKernel ) |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n", |
| (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ), |
| *( (cl_uint *)in ), *( (cl_uint *)in2 ) ); |
| else |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x)\n", |
| (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ), |
| *( (cl_uint *)in ) ); |
| break; |
| |
| case 8: |
| if( useOpKernel ) |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n", |
| (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ), |
| *( (cl_ulong *)in ), *( (cl_ulong *)in2 ) ); |
| else |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx)\n", |
| (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ), |
| *( (cl_ulong *)in ) ); |
| break; |
| } |
| return -1; |
| } |
| p += get_explicit_type_size( vecType ); |
| in += get_explicit_type_size( vecType ); |
| in2 += get_explicit_type_size( vecType ); |
| } |
| } |
| |
| return 0; |
| } |
| |
| int test_single_param_integer_fn( cl_command_queue queue, cl_context context, const char *fnName, singleParamIntegerVerifyFn verifyFn, bool useOpKernel = false ) |
| { |
| ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes }; |
| unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO 3 not tested |
| unsigned int index, typeIndex; |
| int retVal = 0; |
| RandomSeed seed(gRandomSeed ); |
| |
| for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) |
| { |
| if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong) |
| continue; |
| |
| for( index = 0; vecSizes[ index ] != 0; index++ ) |
| { |
| if( test_single_param_integer_kernel(queue, context, fnName, types[ typeIndex ], vecSizes[ index ], verifyFn, seed, useOpKernel ) != 0 ) |
| { |
| log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] ); |
| retVal = -1; |
| } |
| } |
| } |
| |
| return retVal; |
| } |
| |
| bool verify_integer_clz( void *source, void *destination, ExplicitType vecType ) |
| { |
| cl_long testValue; |
| int count; |
| int typeBits; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| testValue = *( (cl_char *)source ); |
| typeBits = 8 * sizeof( cl_char ); |
| break; |
| case kUChar: |
| testValue = *( (cl_uchar *)source ); |
| typeBits = 8 * sizeof( cl_uchar ); |
| break; |
| case kShort: |
| testValue = *( (cl_short *)source ); |
| typeBits = 8 * sizeof( cl_short ); |
| break; |
| case kUShort: |
| testValue = *( (cl_ushort *)source ); |
| typeBits = 8 * sizeof( cl_ushort ); |
| break; |
| case kInt: |
| testValue = *( (cl_int *)source ); |
| typeBits = 8 * sizeof( cl_int ); |
| break; |
| case kUInt: |
| testValue = *( (cl_uint *)source ); |
| typeBits = 8 * sizeof( cl_uint ); |
| break; |
| case kLong: |
| testValue = *( (cl_long *)source ); |
| typeBits = 8 * sizeof( cl_long ); |
| break; |
| case kULong: |
| // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting |
| testValue = *( (cl_ulong *)source ); |
| typeBits = 8 * sizeof( cl_ulong ); |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| |
| count = typeBits; |
| if( testValue ) |
| { |
| testValue <<= 8 * sizeof( testValue ) - typeBits; |
| for( count = 0; 0 == (testValue & CL_LONG_MIN); count++ ) |
| testValue <<= 1; |
| } |
| |
| switch( vecType ) |
| { |
| case kChar: |
| *( (cl_char *)destination ) = count; |
| break; |
| case kUChar: |
| *( (cl_uchar *)destination ) = count; |
| break; |
| case kShort: |
| *( (cl_short *)destination ) = count; |
| break; |
| case kUShort: |
| *( (cl_ushort *)destination ) = count; |
| break; |
| case kInt: |
| *( (cl_int *)destination ) = count; |
| break; |
| case kUInt: |
| *( (cl_uint *)destination ) = count; |
| break; |
| case kLong: |
| *( (cl_long *)destination ) = count; |
| break; |
| case kULong: |
| *( (cl_ulong *)destination ) = count; |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_single_param_integer_fn( queue, context, "clz", verify_integer_clz ); |
| } |
| |
| |
| bool verify_integer_ctz( void *source, void *destination, ExplicitType vecType ) |
| { |
| cl_long testValue; |
| int count; |
| int typeBits; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| testValue = *( (cl_char *)source ); |
| typeBits = 8 * sizeof( cl_char ); |
| break; |
| case kUChar: |
| testValue = *( (cl_uchar *)source ); |
| typeBits = 8 * sizeof( cl_uchar ); |
| break; |
| case kShort: |
| testValue = *( (cl_short *)source ); |
| typeBits = 8 * sizeof( cl_short ); |
| break; |
| case kUShort: |
| testValue = *( (cl_ushort *)source ); |
| typeBits = 8 * sizeof( cl_ushort ); |
| break; |
| case kInt: |
| testValue = *( (cl_int *)source ); |
| typeBits = 8 * sizeof( cl_int ); |
| break; |
| case kUInt: |
| testValue = *( (cl_uint *)source ); |
| typeBits = 8 * sizeof( cl_uint ); |
| break; |
| case kLong: |
| testValue = *( (cl_long *)source ); |
| typeBits = 8 * sizeof( cl_long ); |
| break; |
| case kULong: |
| // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting |
| testValue = *( (cl_ulong *)source ); |
| typeBits = 8 * sizeof( cl_ulong ); |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| |
| if ( testValue == 0 ) |
| count = typeBits; |
| else |
| { |
| for( count = 0; (0 == (testValue & 0x1)); count++ ) |
| testValue >>= 1; |
| } |
| |
| switch( vecType ) |
| { |
| case kChar: |
| *( (cl_char *)destination ) = count; |
| break; |
| case kUChar: |
| *( (cl_uchar *)destination ) = count; |
| break; |
| case kShort: |
| *( (cl_short *)destination ) = count; |
| break; |
| case kUShort: |
| *( (cl_ushort *)destination ) = count; |
| break; |
| case kInt: |
| *( (cl_int *)destination ) = count; |
| break; |
| case kUInt: |
| *( (cl_uint *)destination ) = count; |
| break; |
| case kLong: |
| *( (cl_long *)destination ) = count; |
| break; |
| case kULong: |
| *( (cl_ulong *)destination ) = count; |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| |
| int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_single_param_integer_fn( queue, context, "ctz", verify_integer_ctz ); |
| } |
| |
| #define OP_CASE( op, sizeName, size ) \ |
| case sizeName: \ |
| { \ |
| cl_##size *d = (cl_##size *)destination; \ |
| *d op##= *( (cl_##size *)source ); \ |
| break; \ |
| } |
| |
| #define OP_CASES( op ) \ |
| switch( vecType ) \ |
| { \ |
| OP_CASE( op, kChar, char ) \ |
| OP_CASE( op, kUChar, uchar ) \ |
| OP_CASE( op, kShort, short ) \ |
| OP_CASE( op, kUShort, ushort ) \ |
| OP_CASE( op, kInt, int ) \ |
| OP_CASE( op, kUInt, uint ) \ |
| OP_CASE( op, kLong, long ) \ |
| OP_CASE( op, kULong, ulong ) \ |
| default: \ |
| break; \ |
| } |
| |
| #define OP_TEST( op, opName ) \ |
| bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \ |
| { \ |
| OP_CASES( op ) \ |
| return true; \ |
| } \ |
| int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \ |
| { \ |
| return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \ |
| } |
| |
| OP_TEST( +, add ) |
| OP_TEST( -, subtract ) |
| OP_TEST( *, multiply ) |
| OP_TEST( ^, exclusiveOr ) |
| OP_TEST( |, or ) |
| OP_TEST( &, and ) |
| |
| #define OP_CASE_GUARD( op, sizeName, size ) \ |
| case sizeName: \ |
| { \ |
| cl_##size *d = (cl_##size *)destination; \ |
| cl_##size *s = (cl_##size *)source; \ |
| if( *s == 0 ) \ |
| *d = -1; \ |
| else \ |
| *d op##= *s; \ |
| break; \ |
| } |
| |
| #define OP_CASE_GUARD_SIGNED( op, sizeName, size, MIN_VAL ) \ |
| case sizeName: \ |
| { \ |
| cl_##size *d = (cl_##size *)destination; \ |
| cl_##size *s = (cl_##size *)source; \ |
| if( *s == 0 || (*d == MIN_VAL && *s == -1)) \ |
| *d = -1 - MIN_VAL; \ |
| else \ |
| *d op##= *s; \ |
| break; \ |
| } |
| |
| #define OP_CASES_GUARD( op ) \ |
| switch( vecType ) \ |
| { \ |
| OP_CASE_GUARD_SIGNED( op, kChar, char, CL_CHAR_MIN ) \ |
| OP_CASE_GUARD( op, kUChar, uchar ) \ |
| OP_CASE_GUARD_SIGNED( op, kShort, short, CL_SHRT_MIN ) \ |
| OP_CASE_GUARD( op, kUShort, ushort ) \ |
| OP_CASE_GUARD_SIGNED( op, kInt, int, CL_INT_MIN ) \ |
| OP_CASE_GUARD( op, kUInt, uint ) \ |
| OP_CASE_GUARD_SIGNED( op, kLong, long, CL_LONG_MIN ) \ |
| OP_CASE_GUARD( op, kULong, ulong ) \ |
| default: \ |
| break; \ |
| } |
| |
| #define OP_TEST_GUARD( op, opName ) \ |
| bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \ |
| { \ |
| OP_CASES_GUARD( op ) \ |
| return true; \ |
| } \ |
| int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \ |
| { \ |
| return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \ |
| } |
| |
| OP_TEST_GUARD( /, divide ) |
| OP_TEST_GUARD( %, modulo ) |
| |
| #define PATCH_CASE( _out, _src, _dest, _count, _cl_type ) \ |
| { \ |
| const _cl_type *denom = (const _cl_type* ) _src; \ |
| _cl_type *result = (_cl_type* ) _out; \ |
| for( size_t i = 0; i < _count; i++ ) \ |
| if( denom[i] == 0 ) \ |
| result[i] = (_cl_type) -1; \ |
| } |
| |
| #define PATCH_CASE_SIGNED( _out, _src, _dest, _count, _cl_type, _MIN_VAL ) \ |
| { \ |
| const _cl_type *num = (const _cl_type* ) _dest; \ |
| const _cl_type *denom = (const _cl_type* ) _src; \ |
| _cl_type *result = (_cl_type* ) _out; \ |
| for( size_t i = 0; i < _count; i++ ) \ |
| if( denom[i] == 0 || ( num[i] == _MIN_VAL && denom[i] == -1)) \ |
| result[i] = -1 - _MIN_VAL; \ |
| } |
| |
| static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType ) |
| { |
| switch( vecType ) |
| { |
| case kChar: |
| PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_char, CL_CHAR_MIN ) |
| break; |
| case kUChar: |
| PATCH_CASE( outData, inDataA, inDataB, count, cl_uchar ) |
| break; |
| case kShort: |
| PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_short, CL_SHRT_MIN ) |
| break; |
| case kUShort: |
| PATCH_CASE( outData, inDataA, inDataB, count, cl_ushort ) |
| break; |
| case kInt: |
| PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_int, CL_INT_MIN ) |
| break; |
| case kUInt: |
| PATCH_CASE( outData, inDataA, inDataB, count, cl_uint ) |
| break; |
| case kLong: |
| PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_long, CL_LONG_MIN ) |
| break; |
| case kULong: |
| PATCH_CASE( outData, inDataA, inDataB, count, cl_ulong ) |
| break; |
| default: |
| log_error( "ERROR: internal test error -- unknown data type %d\n", vecType ); |
| break; |
| } |
| } |
| |
| const char *twoParamIntegerKernelSourcePattern = |
| "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *destValues)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " %s%s sA = %s;\n" |
| " %s%s sB = %s;\n" |
| " %s%s dst = %s( sA, sB );\n" |
| " %s;\n" |
| "\n" |
| "}\n"; |
| |
| typedef bool (*twoParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ); |
| |
| static char * build_load_statement( char *outString, size_t vecSize, const char *name ) |
| { |
| if( vecSize != 3 ) |
| sprintf( outString, "%s[ tid ]", name ); |
| else |
| sprintf( outString, "vload3( tid, %s )", name ); |
| return outString; |
| } |
| |
| static char * build_store_statement( char *outString, size_t vecSize, const char *name, const char *srcName ) |
| { |
| if( vecSize != 3 ) |
| sprintf( outString, "%s[ tid ] = %s", name, srcName ); |
| else |
| sprintf( outString, "vstore3( %s, tid, %s )", srcName, name ); |
| return outString; |
| } |
| |
| int test_two_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName, |
| ExplicitType vecAType, ExplicitType vecBType, unsigned int vecSize, twoParamIntegerVerifyFn verifyFn, MTdata d ) |
| { |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper streams[3]; |
| cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected; |
| int error, i; |
| size_t threads[1], localThreads[1]; |
| char kernelSource[10240]; |
| char *programPtr; |
| char sizeName[4], paramSizeName[4]; |
| |
| // embedded profiles don't support long/ulong datatypes |
| if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long")) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) ); |
| return CL_SUCCESS; |
| } |
| |
| /* Create the source */ |
| if( vecSize == 1 ) |
| sizeName[ 0 ] = 0; |
| else |
| sprintf( sizeName, "%d", vecSize ); |
| if( ( vecSize == 1 ) || ( vecSize == 3 ) ) |
| paramSizeName[ 0 ] = 0; |
| else |
| sprintf( paramSizeName, "%d", vecSize ); |
| |
| char sourceALoad[ 128 ], sourceBLoad[ 128 ], destStore[ 128 ]; |
| |
| sprintf( kernelSource, twoParamIntegerKernelSourcePattern, |
| get_explicit_type_name( vecAType ), paramSizeName, |
| get_explicit_type_name( vecBType ), paramSizeName, |
| get_explicit_type_name( vecAType ), paramSizeName, |
| get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ), |
| get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ), |
| get_explicit_type_name( vecAType ), sizeName, |
| fnName, |
| build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" ) |
| ); |
| |
| /* Create kernels */ |
| programPtr = kernelSource; |
| if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) ) |
| { |
| log_error("The program we attempted to compile was: \n%s\n", kernelSource); |
| return -1; |
| } |
| |
| /* Generate some streams */ |
| generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA ); |
| generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB ); |
| |
| streams[0] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL); |
| if( streams[0] == NULL ) |
| { |
| log_error("ERROR: Creating input array A failed!\n"); |
| return -1; |
| } |
| streams[1] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL); |
| if( streams[1] == NULL ) |
| { |
| log_error("ERROR: Creating input array B failed!\n"); |
| return -1; |
| } |
| streams[2] = clCreateBuffer( |
| context, CL_MEM_READ_WRITE, |
| get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, NULL, NULL); |
| if( streams[2] == NULL ) |
| { |
| log_error("ERROR: Creating output array failed!\n"); |
| return -1; |
| } |
| |
| /* Assign streams and execute */ |
| error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| |
| /* Run the kernel */ |
| threads[0] = TEST_SIZE; |
| |
| error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); |
| test_error( error, "Unable to get work group size to use" ); |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); |
| test_error( error, "Unable to execute test kernel" ); |
| |
| memset(outData, 0xFF, get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize); |
| |
| /* Now get the results */ |
| error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, |
| get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize, outData, 0, |
| NULL, NULL ); |
| test_error( error, "Unable to read output array!" ); |
| |
| /* And verify! */ |
| char *inA = (char *)inDataA; |
| char *inB = (char *)inDataB; |
| char *out = (char *)outData; |
| for( i = 0; i < (int)TEST_SIZE; i++ ) |
| { |
| for( size_t j = 0; j < vecSize; j++ ) |
| { |
| bool test = verifyFn( inA, inB, &expected, vecAType ); |
| if( test && ( memcmp( &expected, out, get_explicit_type_size( vecAType ) ) != 0 ) ) |
| { |
| switch( get_explicit_type_size( vecAType )) |
| { |
| case 1: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x), TEST_SIZE %d\n", |
| (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ), |
| *( (cl_uchar *)inA ), |
| *( (cl_uchar *)inB ) , |
| TEST_SIZE); |
| break; |
| |
| case 2: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x), TEST_SIZE %d\n", |
| (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ), |
| *( (cl_ushort *)inA ), |
| *( (cl_ushort *)inB ), |
| TEST_SIZE); |
| break; |
| |
| case 4: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n", |
| (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ), |
| *( (cl_uint *)inA ), |
| *( (cl_uint *)inB ) ); |
| break; |
| |
| case 8: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n", |
| (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ), |
| *( (cl_ulong *)inA ), |
| *( (cl_ulong *)inB ) ); |
| break; |
| } |
| return -1; |
| } |
| inA += get_explicit_type_size( vecAType ); |
| inB += get_explicit_type_size( vecBType ); |
| out += get_explicit_type_size( vecAType ); |
| } |
| } |
| |
| return 0; |
| } |
| |
| int test_two_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn) |
| { |
| ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes }; |
| unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO : 3 not tested |
| unsigned int index, typeIndex; |
| int retVal = 0; |
| RandomSeed seed(gRandomSeed ); |
| |
| for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) |
| { |
| if (( types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong) |
| continue; |
| |
| for( index = 0; vecSizes[ index ] != 0; index++ ) |
| { |
| if( test_two_param_integer_kernel(queue, context, fnName, types[ typeIndex ], types[ typeIndex ], vecSizes[ index ], verifyFn, seed ) != 0 ) |
| { |
| log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] ); |
| retVal = -1; |
| } |
| } |
| } |
| |
| return retVal; |
| } |
| |
| int test_two_param_unmatched_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn) |
| { |
| ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes }; |
| unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; |
| unsigned int index, typeAIndex, typeBIndex; |
| int retVal = 0; |
| RandomSeed seed( gRandomSeed ); |
| |
| for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ ) |
| { |
| if (( types[ typeAIndex ] == kLong || types[ typeAIndex ] == kULong) && !gHasLong) |
| continue; |
| |
| for( typeBIndex = 0; types[ typeBIndex ] != kNumExplicitTypes; typeBIndex++ ) |
| { |
| if (( types[ typeBIndex ] == kLong || types[ typeBIndex ] == kULong) && !gHasLong) |
| continue; |
| |
| for( index = 0; vecSizes[ index ] != 0; index++ ) |
| { |
| if( test_two_param_integer_kernel( queue, context, fnName, types[ typeAIndex ], types[ typeBIndex ], vecSizes[ index ], verifyFn, seed ) != 0 ) |
| { |
| log_error( " Vector %s%d / %s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ], get_explicit_type_name( types[ typeBIndex ] ), vecSizes[ index ] ); |
| retVal = -1; |
| } |
| } |
| } |
| } |
| |
| return retVal; |
| } |
| |
| bool verify_integer_hadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| cl_long testValueA, testValueB, overflow; |
| cl_ulong uValueA, uValueB, uOverflow; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| testValueA = *( (cl_char *)sourceA ); |
| testValueB = *( (cl_char *)sourceB ); |
| *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kUChar: |
| testValueA = *( (cl_uchar *)sourceA ); |
| testValueB = *( (cl_uchar *)sourceB ); |
| *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kShort: |
| testValueA = *( (cl_short *)sourceA ); |
| testValueB = *( (cl_short *)sourceB ); |
| *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kUShort: |
| testValueA = *( (cl_ushort *)sourceA ); |
| testValueB = *( (cl_ushort *)sourceB ); |
| *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kInt: |
| testValueA = *( (cl_int *)sourceA ); |
| testValueB = *( (cl_int *)sourceB ); |
| *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kUInt: |
| testValueA = *( (cl_uint *)sourceA ); |
| testValueB = *( (cl_uint *)sourceB ); |
| *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB ) >> 1 ); |
| break; |
| case kLong: |
| // The long way to avoid dropping bits |
| testValueA = *( (cl_long *)sourceA ); |
| testValueB = *( (cl_long *)sourceB ); |
| overflow = ( testValueA & 0x1 ) + ( testValueB & 0x1 ); |
| *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + ( overflow >> 1 ); |
| break; |
| case kULong: |
| // The long way to avoid dropping bits |
| uValueA = *( (cl_ulong *)sourceA ); |
| uValueB = *( (cl_ulong *)sourceB ); |
| uOverflow = ( uValueA & 0x1 ) + ( uValueB & 0x1 ); |
| *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + ( uOverflow >> 1 ); |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "hadd", verify_integer_hadd ); |
| } |
| |
| bool verify_integer_rhadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| cl_long testValueA, testValueB, overflow; |
| cl_ulong uValueA, uValueB, uOverflow; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| testValueA = *( (cl_char *)sourceA ); |
| testValueB = *( (cl_char *)sourceB ); |
| *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kUChar: |
| testValueA = *( (cl_uchar *)sourceA ); |
| testValueB = *( (cl_uchar *)sourceB ); |
| *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kShort: |
| testValueA = *( (cl_short *)sourceA ); |
| testValueB = *( (cl_short *)sourceB ); |
| *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kUShort: |
| testValueA = *( (cl_ushort *)sourceA ); |
| testValueB = *( (cl_ushort *)sourceB ); |
| *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kInt: |
| testValueA = *( (cl_int *)sourceA ); |
| testValueB = *( (cl_int *)sourceB ); |
| *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kUInt: |
| testValueA = *( (cl_uint *)sourceA ); |
| testValueB = *( (cl_uint *)sourceB ); |
| *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB + 1 ) >> 1 ); |
| break; |
| case kLong: |
| // The long way to avoid dropping bits |
| testValueA = *( (cl_long *)sourceA ); |
| testValueB = *( (cl_long *)sourceB ); |
| overflow = ( testValueA | testValueB ) & 0x1; |
| *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + overflow; |
| break; |
| case kULong: |
| // The long way to avoid dropping bits |
| uValueA = *( (cl_ulong *)sourceA ); |
| uValueB = *( (cl_ulong *)sourceB ); |
| uOverflow = ( uValueA | uValueB ) & 0x1; |
| *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + uOverflow; |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "rhadd", verify_integer_rhadd ); |
| } |
| |
| #define MIN_CASE( type, const ) \ |
| case const : \ |
| { \ |
| cl_##type valueA = *( (cl_##type *)sourceA ); \ |
| cl_##type valueB = *( (cl_##type *)sourceB ); \ |
| *( (cl_##type *)destination ) = (cl_##type)( valueB < valueA ? valueB : valueA ); \ |
| break; \ |
| } |
| |
| bool verify_integer_min( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| switch( vecType ) |
| { |
| MIN_CASE( char, kChar ) |
| MIN_CASE( uchar, kUChar ) |
| MIN_CASE( short, kShort ) |
| MIN_CASE( ushort, kUShort ) |
| MIN_CASE( int, kInt ) |
| MIN_CASE( uint, kUInt ) |
| MIN_CASE( long, kLong ) |
| MIN_CASE( ulong, kULong ) |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "min", verify_integer_min); |
| } |
| |
| #define MAX_CASE( type, const ) \ |
| case const : \ |
| { \ |
| cl_##type valueA = *( (cl_##type *)sourceA ); \ |
| cl_##type valueB = *( (cl_##type *)sourceB ); \ |
| *( (cl_##type *)destination ) = (cl_##type)( valueA < valueB ? valueB : valueA ); \ |
| break; \ |
| } |
| |
| bool verify_integer_max( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| switch( vecType ) |
| { |
| MAX_CASE( char, kChar ) |
| MAX_CASE( uchar, kUChar ) |
| MAX_CASE( short, kShort ) |
| MAX_CASE( ushort, kUShort ) |
| MAX_CASE( int, kInt ) |
| MAX_CASE( uint, kUInt ) |
| MAX_CASE( long, kLong ) |
| MAX_CASE( ulong, kULong ) |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "max", verify_integer_max ); |
| } |
| |
| |
| void multiply_unsigned_64_by_64( cl_ulong sourceA, cl_ulong sourceB, cl_ulong &destLow, cl_ulong &destHi ) |
| { |
| cl_ulong lowA, lowB; |
| cl_ulong highA, highB; |
| |
| // Split up the values |
| lowA = sourceA & 0xffffffff; |
| highA = sourceA >> 32; |
| lowB = sourceB & 0xffffffff; |
| highB = sourceB >> 32; |
| |
| // Note that, with this split, our multiplication becomes: |
| // ( a * b ) |
| // = ( ( aHI << 32 + aLO ) * ( bHI << 32 + bLO ) ) >> 64 |
| // = ( ( aHI << 32 * bHI << 32 ) + ( aHI << 32 * bLO ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64 |
| // = ( ( aHI * bHI << 64 ) + ( aHI * bLO << 32 ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64 |
| // = ( aHI * bHI ) + ( aHI * bLO >> 32 ) + ( aLO * bHI >> 32 ) + ( aLO * bLO >> 64 ) |
| |
| // Now, since each value is 32 bits, the max size of any multiplication is: |
| // ( 2 ^ 32 - 1 ) * ( 2 ^ 32 - 1 ) = 2^64 - 4^32 + 1 = 2^64 - 2^33 + 1, which fits within 64 bits |
| // Which means we can do each component within a 64-bit integer as necessary (each component above marked as AB1 - AB4) |
| cl_ulong aHibHi = highA * highB; |
| cl_ulong aHibLo = highA * lowB; |
| cl_ulong aLobHi = lowA * highB; |
| cl_ulong aLobLo = lowA * lowB; |
| |
| // Assemble terms. |
| // We note that in certain cases, sums of products cannot overflow: |
| // |
| // The maximum product of two N-bit unsigned numbers is |
| // |
| // (2**N-1)^2 = 2**2N - 2**(N+1) + 1 |
| // |
| // We note that we can add the maximum N-bit number to the 2N-bit product twice without overflow: |
| // |
| // (2**N-1)^2 + 2*(2**N-1) = 2**2N - 2**(N+1) + 1 + 2**(N+1) - 2 = 2**2N - 1 |
| // |
| // If we breakdown the product of two numbers a,b into high and low halves of partial products as follows: |
| // |
| // a.hi a.lo |
| // x b.hi b.lo |
| //=============================================================================== |
| // (b.hi*a.hi).hi (b.hi*a.hi).lo |
| // (b.lo*a.hi).hi (b.lo*a.hi).lo |
| // (b.hi*a.lo).hi (b.hi*a.lo).lo |
| // + (b.lo*a.lo).hi (b.lo*a.lo).lo |
| //=============================================================================== |
| // |
| // The (b.lo*a.lo).lo term cannot cause a carry, so we can ignore them for now. We also know from above, that we can add (b.lo*a.lo).hi |
| // and (b.hi*a.lo).lo to the 2N bit term [(b.lo*a.hi).hi + (b.lo*a.hi).lo] without overflow. That takes care of all of the terms |
| // on the right half that might carry. Do that now. |
| // |
| cl_ulong aLobLoHi = aLobLo >> 32; |
| cl_ulong aLobHiLo = aLobHi & 0xFFFFFFFFULL; |
| aHibLo += aLobLoHi + aLobHiLo; |
| |
| // That leaves us with these terms: |
| // |
| // a.hi a.lo |
| // x b.hi b.lo |
| //=============================================================================== |
| // (b.hi*a.hi).hi (b.hi*a.hi).lo |
| // (b.hi*a.lo).hi |
| // [ (b.lo*a.hi).hi + (b.lo*a.hi).lo + other ] |
| // + (b.lo*a.lo).lo |
| //=============================================================================== |
| |
| // All of the overflow potential from the right half has now been accumulated into the [ (b.lo*a.hi).hi + (b.lo*a.hi).lo ] 2N bit term. |
| // We can safely separate into high and low parts. Per our rule above, we know we can accumulate the high part of that and (b.hi*a.lo).hi |
| // into the 2N bit term (b.lo*a.hi) without carry. The low part can be pieced together with (b.lo*a.lo).lo, to give the final low result |
| |
| destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32); // Cant overflow |
| destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL ); |
| } |
| |
| void multiply_signed_64_by_64( cl_long sourceA, cl_long sourceB, cl_ulong &destLow, cl_long &destHi ) |
| { |
| // Find sign of result |
| cl_long aSign = sourceA >> 63; |
| cl_long bSign = sourceB >> 63; |
| cl_long resultSign = aSign ^ bSign; |
| |
| // take absolute values of the argument |
| sourceA = (sourceA ^ aSign) - aSign; |
| sourceB = (sourceB ^ bSign) - bSign; |
| |
| cl_ulong hi; |
| multiply_unsigned_64_by_64( (cl_ulong) sourceA, (cl_ulong) sourceB, destLow, hi ); |
| |
| // Fix the sign |
| if( resultSign ) |
| { |
| destLow ^= resultSign; |
| hi ^= resultSign; |
| destLow -= resultSign; |
| |
| //carry if necessary |
| if( 0 == destLow ) |
| hi -= resultSign; |
| } |
| |
| destHi = (cl_long) hi; |
| } |
| |
| bool verify_integer_mul_hi( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| cl_long testValueA, testValueB, highSigned; |
| cl_ulong highUnsigned, lowHalf; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| testValueA = *( (cl_char *)sourceA ); |
| testValueB = *( (cl_char *)sourceB ); |
| *( (cl_char *)destination ) = (cl_char)( ( testValueA * testValueB ) >> 8 ); |
| break; |
| case kUChar: |
| testValueA = *( (cl_uchar *)sourceA ); |
| testValueB = *( (cl_uchar *)sourceB ); |
| *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA * testValueB ) >> 8 ); |
| break; |
| case kShort: |
| testValueA = *( (cl_short *)sourceA ); |
| testValueB = *( (cl_short *)sourceB ); |
| *( (cl_short *)destination ) = (cl_short)( ( testValueA * testValueB ) >> 16 ); |
| break; |
| case kUShort: |
| testValueA = *( (cl_ushort *)sourceA ); |
| testValueB = *( (cl_ushort *)sourceB ); |
| *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA * testValueB ) >> 16 ); |
| break; |
| case kInt: |
| testValueA = *( (cl_int *)sourceA ); |
| testValueB = *( (cl_int *)sourceB ); |
| *( (cl_int *)destination ) = (cl_int)( ( testValueA * testValueB ) >> 32 ); |
| break; |
| case kUInt: |
| testValueA = *( (cl_uint *)sourceA ); |
| testValueB = *( (cl_uint *)sourceB ); |
| *( (cl_uint *)destination ) = (cl_uint)( ( testValueA * testValueB ) >> 32 ); |
| break; |
| case kLong: |
| testValueA = *( (cl_long *)sourceA ); |
| testValueB = *( (cl_long *)sourceB ); |
| |
| multiply_signed_64_by_64( testValueA, testValueB, lowHalf, highSigned ); |
| *( (cl_long *)destination ) = highSigned; |
| break; |
| case kULong: |
| testValueA = *( (cl_ulong *)sourceA ); |
| testValueB = *( (cl_ulong *)sourceB ); |
| |
| multiply_unsigned_64_by_64( testValueA, testValueB, lowHalf, highUnsigned ); |
| *( (cl_ulong *)destination ) = highUnsigned; |
| break; |
| default: |
| // Should never happen |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "mul_hi", verify_integer_mul_hi ); |
| } |
| |
| bool verify_integer_rotate( void *sourceA, void *sourceB, void *destination, ExplicitType vecType ) |
| { |
| cl_ulong testValueA; |
| char numBits; |
| |
| switch( vecType ) |
| { |
| case kChar: |
| case kUChar: |
| testValueA = *( (cl_uchar *)sourceA ); |
| numBits = *( (cl_uchar *)sourceB ); |
| numBits &= 7; |
| if ( numBits == 0 ) |
| *( (cl_uchar *)destination ) = (cl_uchar)testValueA; |
| else |
| *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA << numBits ) | ( testValueA >> ( 8 - numBits ) ) ); |
| break; |
| case kShort: |
| case kUShort: |
| testValueA = *( (cl_ushort *)sourceA ); |
| numBits = *( (cl_ushort *)sourceB ); |
| numBits &= 15; |
| if ( numBits == 0 ) |
| *( (cl_ushort *)destination ) = (cl_ushort)testValueA; |
| else |
| *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA << numBits ) | ( testValueA >> ( 16 - numBits ) ) ); |
| break; |
| case kInt: |
| case kUInt: |
| testValueA = *( (cl_uint *)sourceA ); |
| numBits = *( (cl_uint *)sourceB ); |
| numBits &= 31; |
| if ( numBits == 0 ) |
| *( (cl_uint *)destination ) = (cl_uint) testValueA; |
| else |
| *( (cl_uint *)destination ) = (cl_uint)( ( testValueA << numBits ) | ( testValueA >> ( 32 - numBits ) ) ); |
| break; |
| case kLong: |
| case kULong: |
| testValueA = *( (cl_ulong *)sourceA ); |
| numBits = *( (cl_ulong *)sourceB ); |
| numBits &= 63; |
| if ( numBits == 0 ) |
| *( (cl_ulong *)destination ) = (cl_ulong)testValueA; |
| else |
| *( (cl_ulong *)destination ) = (cl_ulong)( ( testValueA << numBits ) | ( testValueA >> ( 64 - numBits ) ) ); |
| break; |
| default: |
| // Should never happen |
| log_error( "Unknown type encountered in verify_integer_rotate. Test failed. Aborting...\n" ); |
| abort(); |
| return false; |
| } |
| return true; |
| } |
| |
| int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_two_param_integer_fn( queue, context, "rotate", verify_integer_rotate ); |
| } |
| |
| const char *threeParamIntegerKernelSourcePattern = |
| "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " %s%s sA = %s;\n" |
| " %s%s sB = %s;\n" |
| " %s%s sC = %s;\n" |
| " %s%s dst = %s( sA, sB, sC );\n" |
| " %s;\n" |
| "\n" |
| "}\n"; |
| |
| typedef bool (*threeParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *sourceC, void *destination, |
| ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType ); |
| |
| int test_three_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName, |
| ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType, |
| unsigned int vecSize, threeParamIntegerVerifyFn verifyFn, MTdata d ) |
| { |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper streams[4]; |
| cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], inDataC[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected; |
| int error, i; |
| size_t threads[1], localThreads[1]; |
| char kernelSource[10240]; |
| char *programPtr; |
| char sizeName[4], paramSizeName[4]; |
| |
| if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long")) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) ); |
| return CL_SUCCESS; |
| } |
| |
| |
| /* Create the source */ |
| if( vecSize == 1 ) |
| sizeName[ 0 ] = 0; |
| else |
| sprintf( sizeName, "%d", vecSize ); |
| if( ( vecSize == 1 ) || ( vecSize == 3 ) ) |
| paramSizeName[ 0 ] = 0; |
| else |
| sprintf( paramSizeName, "%d", vecSize ); |
| |
| char sourceALoad[ 128 ], sourceBLoad[ 128 ], sourceCLoad[ 128 ], destStore[ 128 ]; |
| |
| sprintf( kernelSource, threeParamIntegerKernelSourcePattern, |
| get_explicit_type_name( vecAType ), paramSizeName, |
| get_explicit_type_name( vecBType ), paramSizeName, |
| get_explicit_type_name( vecCType ), paramSizeName, |
| get_explicit_type_name( destType ), paramSizeName, |
| get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ), |
| get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ), |
| get_explicit_type_name( vecCType ), sizeName, build_load_statement( sourceCLoad, (size_t)vecSize, "sourceC" ), |
| get_explicit_type_name( destType ), sizeName, |
| fnName, |
| build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" ) |
| ); |
| |
| /* Create kernels */ |
| programPtr = kernelSource; |
| if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) ) |
| { |
| log_error("The program we attempted to compile was: \n%s\n", kernelSource); |
| return -1; |
| } |
| |
| /* Generate some streams */ |
| generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA ); |
| generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB ); |
| generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC ); |
| |
| streams[0] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL); |
| if( streams[0] == NULL ) |
| { |
| log_error("ERROR: Creating input array A failed!\n"); |
| return -1; |
| } |
| streams[1] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL); |
| if( streams[1] == NULL ) |
| { |
| log_error("ERROR: Creating input array B failed!\n"); |
| return -1; |
| } |
| streams[2] = clCreateBuffer( |
| context, CL_MEM_COPY_HOST_PTR, |
| get_explicit_type_size(vecCType) * vecSize * TEST_SIZE, &inDataC, NULL); |
| if( streams[2] == NULL ) |
| { |
| log_error("ERROR: Creating input array C failed!\n"); |
| return -1; |
| } |
| streams[3] = clCreateBuffer( |
| context, CL_MEM_READ_WRITE, |
| get_explicit_type_size(destType) * vecSize * TEST_SIZE, NULL, NULL); |
| if( streams[3] == NULL ) |
| { |
| log_error("ERROR: Creating output array failed!\n"); |
| return -1; |
| } |
| |
| /* Assign streams and execute */ |
| error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] ); |
| test_error( error, "Unable to set indexed kernel arguments" ); |
| |
| /* Run the kernel */ |
| threads[0] = TEST_SIZE; |
| |
| error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); |
| test_error( error, "Unable to get work group size to use" ); |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); |
| test_error( error, "Unable to execute test kernel" ); |
| |
| memset(outData, 0xFF, get_explicit_type_size( destType ) * TEST_SIZE * vecSize); |
| |
| /* Now get the results */ |
| error = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, get_explicit_type_size( destType ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL ); |
| test_error( error, "Unable to read output array!" ); |
| |
| /* And verify! */ |
| char *inA = (char *)inDataA; |
| char *inB = (char *)inDataB; |
| char *inC = (char *)inDataC; |
| char *out = (char *)outData; |
| for( i = 0; i < (int)TEST_SIZE; i++ ) |
| { |
| for( size_t j = 0; j < vecSize; j++ ) |
| { |
| bool test = verifyFn( inA, inB, inC, &expected, vecAType, vecBType, vecCType, destType ); |
| if( test && ( memcmp( &expected, out, get_explicit_type_size( destType ) ) != 0 ) ) |
| { |
| switch( get_explicit_type_size( vecAType )) |
| { |
| case 1: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x, 0x%2.2x)\n", |
| (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ), |
| *( (cl_uchar *)inA ), |
| *( (cl_uchar *)inB ), |
| *( (cl_uchar *)inC ) ); |
| break; |
| |
| case 2: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x, 0x%4.4x)\n", |
| (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ), |
| *( (cl_ushort *)inA ), |
| *( (cl_ushort *)inB ), |
| *( (cl_ushort *)inC ) ); |
| break; |
| |
| case 4: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x, 0x%8.8x)\n", |
| (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ), |
| *( (cl_uint *)inA ), |
| *( (cl_uint *)inB ), |
| *( (cl_uint *)inC ) ); |
| break; |
| |
| case 8: |
| log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx, 0x%16.16llx)\n", |
| (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ), |
| *( (cl_ulong *)inA ), |
| *( (cl_ulong *)inB ), |
| *( (cl_ulong *)inC ) ); |
| break; |
| } |
| return -1; |
| } |
| inA += get_explicit_type_size( vecAType ); |
| inB += get_explicit_type_size( vecBType ); |
| inC += get_explicit_type_size( vecCType ); |
| out += get_explicit_type_size( destType ); |
| } |
| } |
| |
| return 0; |
| } |
| |
| int test_three_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, threeParamIntegerVerifyFn verifyFn) |
| { |
| ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes }; |
| unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; |
| unsigned int index, typeAIndex; |
| int retVal = 0; |
| RandomSeed seed(gRandomSeed); |
| |
| for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ ) |
| { |
| if ((types[ typeAIndex ] == kLong || types[ typeAIndex] == kULong) && !gHasLong) |
| continue; |
| |
| for( index = 0; vecSizes[ index ] != 0; index++ ) |
| { |
| if( test_three_param_integer_kernel(queue, context, fnName, types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], vecSizes[ index ], verifyFn, seed ) != 0 ) |
| { |
| log_error( " Vector %s%d,%s%d,%s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ], |
| get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] , |
| get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] ); |
| retVal = -1; |
| } |
| } |
| } |
| |
| return retVal; |
| } |
| |
| bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *destination, |
| ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType ) |
| { |
| if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar ) |
| { |
| cl_ulong valueA, valueB, valueC; |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| valueA = ((cl_ulong*) sourceA)[0]; |
| valueB = ((cl_ulong*) sourceB)[0]; |
| valueC = ((cl_ulong*) sourceC)[0]; |
| break; |
| case kUInt: |
| valueA = ((cl_uint*) sourceA)[0]; |
| valueB = ((cl_uint*) sourceB)[0]; |
| valueC = ((cl_uint*) sourceC)[0]; |
| break; |
| case kUShort: |
| valueA = ((cl_ushort*) sourceA)[0]; |
| valueB = ((cl_ushort*) sourceB)[0]; |
| valueC = ((cl_ushort*) sourceC)[0]; |
| break; |
| case kUChar: |
| valueA = ((cl_uchar*) sourceA)[0]; |
| valueB = ((cl_uchar*) sourceB)[0]; |
| valueC = ((cl_uchar*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| |
| if(valueB > valueC) { |
| return false; // results are undefined : let expected alone. |
| } |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB); |
| break; |
| case kUInt: |
| ((cl_uint*) destination)[0] = (cl_uint) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| case kUShort: |
| ((cl_ushort*) destination)[0] = (cl_ushort) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| case kUChar: |
| ((cl_uchar*) destination)[0] = (cl_uchar) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| |
| |
| |
| } |
| else |
| { |
| cl_long valueA, valueB, valueC; |
| |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| valueA = ((cl_long*) sourceA)[0]; |
| valueB = ((cl_long*) sourceB)[0]; |
| valueC = ((cl_long*) sourceC)[0]; |
| break; |
| case kInt: |
| valueA = ((cl_int*) sourceA)[0]; |
| valueB = ((cl_int*) sourceB)[0]; |
| valueC = ((cl_int*) sourceC)[0]; |
| break; |
| case kShort: |
| valueA = ((cl_short*) sourceA)[0]; |
| valueB = ((cl_short*) sourceB)[0]; |
| valueC = ((cl_short*) sourceC)[0]; |
| break; |
| case kChar: |
| valueA = ((cl_char*) sourceA)[0]; |
| valueB = ((cl_char*) sourceB)[0]; |
| valueC = ((cl_char*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| if(valueB > valueC) { |
| return false; // undefined behavior : leave "expected" alone |
| } |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB); |
| break; |
| case kInt: |
| ((cl_int*) destination)[0] = (cl_int) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| case kShort: |
| ((cl_short*) destination)[0] = (cl_short) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| case kChar: |
| ((cl_char*) destination)[0] = (cl_char) |
| (MAX(MIN(valueA, valueC), valueB)); |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| } |
| return true; |
| } |
| |
| int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_three_param_integer_fn( queue, context, "clamp", verify_integer_clamp ); |
| } |
| |
| bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *destination, |
| ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType ) |
| { |
| if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar ) |
| { |
| cl_ulong valueA, valueB, valueC; |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| valueA = ((cl_ulong*) sourceA)[0]; |
| valueB = ((cl_ulong*) sourceB)[0]; |
| valueC = ((cl_ulong*) sourceC)[0]; |
| break; |
| case kUInt: |
| valueA = ((cl_uint*) sourceA)[0]; |
| valueB = ((cl_uint*) sourceB)[0]; |
| valueC = ((cl_uint*) sourceC)[0]; |
| break; |
| case kUShort: |
| valueA = ((cl_ushort*) sourceA)[0]; |
| valueB = ((cl_ushort*) sourceB)[0]; |
| valueC = ((cl_ushort*) sourceC)[0]; |
| break; |
| case kUChar: |
| valueA = ((cl_uchar*) sourceA)[0]; |
| valueB = ((cl_uchar*) sourceB)[0]; |
| valueC = ((cl_uchar*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| cl_ulong multHi, multLo; |
| multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi ); |
| |
| multLo += valueC; |
| multHi += multLo < valueC; // carry if overflow |
| if( multHi ) |
| multLo = 0xFFFFFFFFFFFFFFFFULL; |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| ((cl_ulong*) destination)[0] = multLo; |
| break; |
| case kUInt: |
| ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX ); |
| break; |
| case kUShort: |
| ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX ); |
| break; |
| case kUChar: |
| ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX ); |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| } |
| else |
| { |
| cl_long valueA, valueB, valueC; |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| valueA = ((cl_long*) sourceA)[0]; |
| valueB = ((cl_long*) sourceB)[0]; |
| valueC = ((cl_long*) sourceC)[0]; |
| break; |
| case kInt: |
| valueA = ((cl_int*) sourceA)[0]; |
| valueB = ((cl_int*) sourceB)[0]; |
| valueC = ((cl_int*) sourceC)[0]; |
| break; |
| case kShort: |
| valueA = ((cl_short*) sourceA)[0]; |
| valueB = ((cl_short*) sourceB)[0]; |
| valueC = ((cl_short*) sourceC)[0]; |
| break; |
| case kChar: |
| valueA = ((cl_char*) sourceA)[0]; |
| valueB = ((cl_char*) sourceB)[0]; |
| valueC = ((cl_char*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| cl_long multHi; |
| cl_ulong multLo; |
| multiply_signed_64_by_64( valueA, valueB, multLo, multHi ); |
| |
| cl_ulong sum = multLo + valueC; |
| // carry if overflow |
| if( valueC >= 0 ) |
| { |
| if( multLo > sum ) |
| { |
| multHi++; |
| if( CL_LONG_MIN == multHi ) |
| { |
| multHi = CL_LONG_MAX; |
| sum = CL_ULONG_MAX; |
| } |
| } |
| } |
| else |
| { |
| if( multLo < sum ) |
| { |
| multHi--; |
| if( CL_LONG_MAX == multHi ) |
| { |
| multHi = CL_LONG_MIN; |
| sum = 0; |
| } |
| } |
| } |
| |
| // saturate |
| if( multHi > 0 ) |
| sum = CL_LONG_MAX; |
| else if( multHi < -1 ) |
| sum = CL_LONG_MIN; |
| cl_long result = (cl_long) sum; |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| ((cl_long*) destination)[0] = result; |
| break; |
| case kInt: |
| result = MIN( result, (cl_long) CL_INT_MAX ); |
| result = MAX( result, (cl_long) CL_INT_MIN ); |
| ((cl_int*) destination)[0] = (cl_int) result; |
| break; |
| case kShort: |
| result = MIN( result, (cl_long) CL_SHRT_MAX ); |
| result = MAX( result, (cl_long) CL_SHRT_MIN ); |
| ((cl_short*) destination)[0] = (cl_short) result; |
| break; |
| case kChar: |
| result = MIN( result, (cl_long) CL_CHAR_MAX ); |
| result = MAX( result, (cl_long) CL_CHAR_MIN ); |
| ((cl_char*) destination)[0] = (cl_char) result; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| } |
| return true; |
| } |
| |
| int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_three_param_integer_fn( queue, context, "mad_sat", verify_integer_mad_sat ); |
| } |
| |
| bool verify_integer_mad_hi( void *sourceA, void *sourceB, void *sourceC, void *destination, |
| ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType ) |
| { |
| if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar ) |
| { |
| cl_ulong valueA, valueB, valueC; |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| valueA = ((cl_ulong*) sourceA)[0]; |
| valueB = ((cl_ulong*) sourceB)[0]; |
| valueC = ((cl_ulong*) sourceC)[0]; |
| break; |
| case kUInt: |
| valueA = ((cl_uint*) sourceA)[0]; |
| valueB = ((cl_uint*) sourceB)[0]; |
| valueC = ((cl_uint*) sourceC)[0]; |
| break; |
| case kUShort: |
| valueA = ((cl_ushort*) sourceA)[0]; |
| valueB = ((cl_ushort*) sourceB)[0]; |
| valueC = ((cl_ushort*) sourceC)[0]; |
| break; |
| case kUChar: |
| valueA = ((cl_uchar*) sourceA)[0]; |
| valueB = ((cl_uchar*) sourceB)[0]; |
| valueC = ((cl_uchar*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| cl_ulong multHi, multLo; |
| multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi ); |
| |
| switch( vecAType ) |
| { |
| case kULong: |
| ((cl_ulong*) destination)[0] = multHi + valueC; |
| break; |
| case kUInt: |
| ((cl_uint*) destination)[0] = (cl_uint) (( multLo >> 32) + valueC ); |
| break; |
| case kUShort: |
| ((cl_ushort*) destination)[0] = (cl_ushort) (( multLo >> 16) + valueC ); |
| break; |
| case kUChar: |
| ((cl_uchar*) destination)[0] = (cl_uchar) (( multLo >> 8) + valueC ); |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| } |
| else |
| { |
| cl_long valueA, valueB, valueC; |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| valueA = ((cl_long*) sourceA)[0]; |
| valueB = ((cl_long*) sourceB)[0]; |
| valueC = ((cl_long*) sourceC)[0]; |
| break; |
| case kInt: |
| valueA = ((cl_int*) sourceA)[0]; |
| valueB = ((cl_int*) sourceB)[0]; |
| valueC = ((cl_int*) sourceC)[0]; |
| break; |
| case kShort: |
| valueA = ((cl_short*) sourceA)[0]; |
| valueB = ((cl_short*) sourceB)[0]; |
| valueC = ((cl_short*) sourceC)[0]; |
| break; |
| case kChar: |
| valueA = ((cl_char*) sourceA)[0]; |
| valueB = ((cl_char*) sourceB)[0]; |
| valueC = ((cl_char*) sourceC)[0]; |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| |
| cl_long multHi; |
| cl_ulong multLo; |
| multiply_signed_64_by_64( valueA, valueB, multLo, multHi ); |
| |
| switch( vecAType ) |
| { |
| case kLong: |
| ((cl_long*) destination)[0] = multHi + valueC; |
| break; |
| case kInt: |
| ((cl_int*) destination)[0] = (cl_int) ((multLo >> 32) + valueC); |
| break; |
| case kShort: |
| ((cl_short*) destination)[0] = (cl_int) ((multLo >> 16) + valueC); |
| break; |
| case kChar: |
| ((cl_char*) destination)[0] = (cl_char) (cl_int) ((multLo >> 8) + valueC); |
| break; |
| default: |
| //error -- should never get here |
| abort(); |
| break; |
| } |
| } |
| return true; |
| } |
| |
| int test_integer_mad_hi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| return test_three_param_integer_fn( queue, context, "mad_hi", verify_integer_mad_hi ); |
| } |
| |
| |