blob: 519e5bee4821c879d302e4de44eb7dcd01be0a5c [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/ThreadPool.h"
#define NUM_TESTS 23
#define LONG_MATH_SHIFT_SIZE 26
#define QUICK_MATH_SHIFT_SIZE 16
static const char *kernel_code =
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] %s srcB[tid];\n"
"}\n";
static const char *kernel_code_V3 =
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
"}\n";
static const char *kernel_code_V3_scalar_vector =
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
"}\n";
static const char *kernel_code_V3_vector_scalar =
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
"}\n";
// Separate kernel here because it does not fit the pattern
static const char *not_kernel_code =
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = %ssrcA[tid];\n"
"}\n";
static const char *not_kernel_code_V3 =
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( %s vload3( tid, srcA ), tid, dst );\n"
"}\n";
static const char *kernel_code_scalar_shift =
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
"}\n";
static const char *kernel_code_scalar_shift_V3 =
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
"}\n";
static const char *kernel_code_question_colon =
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
"}\n";
static const char *kernel_code_question_colon_V3 =
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
"}\n";
// External verification and data generation functions
extern const char *tests[];
extern const char *test_names[];
extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
// Supported type list
const ExplicitType types[] = {
kChar,
kUChar,
kShort,
kUShort,
kInt,
kUInt,
kLong,
kULong,
};
enum TestStyle
{
kDontCare=0,
kBothVectors,
kInputAScalar,
kInputBScalar,
kVectorScalarScalar, // for the ?: operator only; indicates vector ? scalar : scalar.
kInputCAlsoScalar = 0x80 // Or'ed flag to indicate that the selector for the ?: operator is also scalar
};
typedef struct _perThreadData
{
cl_mem m_streams[3];
cl_int *m_input_ptr[2], *m_output_ptr;
size_t m_type_size;
cl_program m_program[NUM_TESTS];
cl_kernel m_kernel[NUM_TESTS];
} perThreadData;
perThreadData * perThreadDataNew()
{
perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
pThis->m_output_ptr = NULL;
return pThis;
}
void perThreadDataDestroy(perThreadData * pThis)
{
int i;
// cleanup
clReleaseMemObject(pThis->m_streams[0]);
clReleaseMemObject(pThis->m_streams[1]);
clReleaseMemObject(pThis->m_streams[2]);
for (i=0; i<NUM_TESTS; i++)
{
if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
}
free(pThis->m_input_ptr[0]);
free(pThis->m_input_ptr[1]);
free(pThis->m_output_ptr);
free(pThis);
}
cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
int num_elements, int vectorSize,
int inputAVecSize, int inputBVecSize,
cl_context context, int start_test_ID,
int end_test_ID, int testID)
{
int i;
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
const char *type_name = get_explicit_type_name(type);
pThis->m_type_size = get_explicit_type_size(type);
int err;
// Used for the && and || tests where the vector case returns a signed value
const char *signed_type_name;
switch (type) {
case kChar:
case kUChar:
signed_type_name = get_explicit_type_name(kChar);
break;
case kShort:
case kUShort:
signed_type_name = get_explicit_type_name(kShort);
break;
case kInt:
case kUInt:
signed_type_name = get_explicit_type_name(kInt);
break;
case kLong:
case kULong:
signed_type_name = get_explicit_type_name(kLong);
break;
default:
log_error("Invalid type.\n");
return -1;
break;
}
pThis->m_input_ptr[0] =
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
pThis->m_input_ptr[1] =
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
pThis->m_output_ptr =
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
pThis->m_streams[0] = clCreateBuffer(
context, CL_MEM_READ_WRITE,
pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
test_error(err, "clCreateBuffer failed");
pThis->m_streams[1] = clCreateBuffer(
context, CL_MEM_READ_WRITE,
pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
test_error(err, "clCreateBuffer failed");
pThis->m_streams[2] = clCreateBuffer(
context, CL_MEM_READ_WRITE,
pThis->m_type_size * num_elements * vectorSize, NULL, &err);
test_error(err, "clCreateBuffer failed");
const char *vectorString = sizeNames[ vectorSize ];
const char *inputAVectorString = sizeNames[ inputAVecSize ];
const char *inputBVectorString = sizeNames[ inputBVecSize ];
if (testID == -1)
{
log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
}
char programString[4096];
const char *ptr;
const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
for (i=start_test_ID; i<end_test_ID; i++) {
switch (i) {
case 10:
case 11:
sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
break;
case 12:
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
type_name, vectorString, tests[i]);
break;
case 13:
sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
type_name, inputAVectorString, type_name, inputBVectorString,
type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
break;
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
case 20:
case 21:
// Need an unsigned result here for vector sizes > 1
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
break;
case 22:
// Need an unsigned result here for vector sizes > 1
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
break;
default:
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
type_name, vectorString, tests[i]);
break;
}
//printf("kernel: %s\n", programString);
ptr = programString;
err = create_single_kernel_helper( context,
&(pThis->m_program[ i ]),
&(pThis->m_kernel[ i ]), 1,
&ptr, "test" );
test_error( err, "Unable to create test kernel" );
err = clSetKernelArg(pThis->m_kernel[i], 0,
sizeof pThis->m_streams[0],
&(pThis->m_streams[0]) );
err |= clSetKernelArg(pThis->m_kernel[i], 1,
sizeof pThis->m_streams[1],
&(pThis->m_streams[1]) );
err |= clSetKernelArg(pThis->m_kernel[i], 2,
sizeof pThis->m_streams[2],
&(pThis->m_streams[2]) );
test_error(err, "clSetKernelArgs failed");
}
return CL_SUCCESS;
}
typedef struct _globalThreadData
{
cl_device_id m_deviceID;
cl_context m_context;
// cl_command_queue m_queue;
int m_num_elements;
int m_threadcount;
int m_vectorSize;
int m_num_runs_shift;
TestStyle m_style;
ExplicitType m_type;
MTdata * m_pRandData;
uint64_t m_offset;
int m_testID;
perThreadData **m_arrPerThreadData;
} globalThreadData;
globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
int vectorSize, TestStyle style, int num_runs_shift,
ExplicitType type, int testID,
int threadcount)
{
int i;
globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
pThis->m_deviceID = deviceID;
pThis->m_context = context;
// pThis->m_queue = queue;
pThis->m_num_elements = num_elements;
pThis->m_num_runs_shift = num_runs_shift;
pThis->m_vectorSize = vectorSize;
pThis->m_style = style;
pThis->m_type = type;
pThis->m_offset = (uint64_t)0;
pThis->m_testID = testID;
pThis->m_arrPerThreadData = NULL;
pThis->m_threadcount = threadcount;
pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
pThis->m_arrPerThreadData = (perThreadData **)
malloc(threadcount*sizeof(perThreadData *));
for(i=0; i < threadcount; ++i)
{
pThis->m_pRandData[i] = init_genrand(i+1);
pThis->m_arrPerThreadData[i] = NULL;
}
return pThis;
}
void globalThreadDataDestroy(globalThreadData * pThis)
{
int i;
for(i=0; i < pThis->m_threadcount; ++i)
{
free_mtdata(pThis->m_pRandData[i]);
if(pThis->m_arrPerThreadData[i] != NULL)
{
perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
}
}
free(pThis->m_arrPerThreadData);
free(pThis->m_pRandData);
free(pThis);
}
int
test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
perThreadData ** ppThreadData);
cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
{
cl_int error; cl_int result;
globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
cl_command_queue queue;
#if THREAD_DEBUG
log_error("Thread %x (job %x) about to create command queue\n",
thread_id, job_id);
#endif
queue = clCreateCommandQueue (threadInfoGlobal->m_context,
threadInfoGlobal->m_deviceID,0,
&error);
if(error != CL_SUCCESS)
{
log_error("Thread %x (job %x) could not create command queue\n",
thread_id, job_id);
return error; // should we clean up the queue too?
}
#if THREAD_DEBUG
log_error("Thread %x (job %x) created command queue\n",
thread_id, job_id);
#endif
result = test_integer_ops( threadInfoGlobal->m_deviceID,
threadInfoGlobal->m_context,
queue,
threadInfoGlobal->m_num_elements,
threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
threadInfoGlobal->m_num_runs_shift,
threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
threadInfoGlobal->m_pRandData[thread_id],
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
&(threadInfoGlobal->m_arrPerThreadData[thread_id])
);
if(result != 0)
{
log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
thread_id, job_id, result);
// return error;
}
error = clReleaseCommandQueue(queue);
if(error != CL_SUCCESS)
{
log_error("Thread %x (job %x) could not release command queue\n",
thread_id, job_id);
return error;
}
return result;
}
int
test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID)
{
globalThreadData * pThreadInfo = NULL;
cl_int result=0;
cl_uint threadcount = GetThreadCount();
// Check to see if we are using single threaded mode on other than a 1.0 device
if (getenv( "CL_TEST_SINGLE_THREADED" )) {
char device_version[1024] = { 0 };
result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
if(result != CL_SUCCESS)
{
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
return result;
}
if (strcmp("OpenCL 1.0 ",device_version)) {
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
}
}
// This test will run threadcount threads concurrently; each thread will execute test_integer_ops()
// which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize.
// We need to make sure that the total device memory allocated by all threads does not exceed the maximum
// memory on the device. If it does, we decrease num_elements until all threads combined will not
// over-subscribe device memory.
cl_ulong maxDeviceGlobalMem;
result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
if(result != CL_SUCCESS)
{
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
return result;
}
if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
}
// Let's not take all device memory - reduce by 75%
maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
// Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory.
size_t type_size = get_explicit_type_size(type);
while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem)
{
num_elements >>= 1;
}
uint64_t startIndx = (uint64_t)0;
uint64_t endIndx = (1ULL<<num_runs_shift);
uint64_t jobcount = (endIndx-startIndx)/num_elements;
if(jobcount==0)
{
jobcount = 1;
}
pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
vectorSize, style, num_runs_shift,
type, testID, threadcount);
pThreadInfo->m_offset = startIndx;
#if THREAD_DEBUG
log_error("Launching %llx jobs\n",
jobcount);
#endif
result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
if(result != 0)
{
// cleanup ??
log_error("ThreadPool_Do return non-success value %d\n", result);
}
globalThreadDataDestroy(pThreadInfo);
return result;
}
int
test_integer_ops(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
int vectorSize, TestStyle style, int num_runs_shift,
ExplicitType type, int testID, MTdata randDataIn,
uint64_t startIndx, uint64_t endIndx,
perThreadData ** ppThreadData)
{
size_t threads[1];
int err;
int i;
int inputAVecSize, inputBVecSize;
inputAVecSize = inputBVecSize = vectorSize;
if( style == kInputAScalar )
inputAVecSize = 1;
else if( style == kInputBScalar )
inputBVecSize = 1;
/*
if( inputAVecSize != inputBVecSize )
log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
test_names[testID],
get_explicit_type_name(type), vectorSize,
( inputAVecSize == 1 ) ? "scalar" : "vector",
( inputBVecSize == 1 ) ? "scalar" : "vector",
startIndx, endIndx, (1ULL<<num_runs_shift) );
else
log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
test_names[testID],
get_explicit_type_name(type), vectorSize,
startIndx, endIndx, (1ULL<<num_runs_shift));
*/
// Figure out which sub-test to run, or all of them
int start_test_ID = 0;
int end_test_ID = NUM_TESTS;
if (testID != -1) {
start_test_ID = testID;
end_test_ID = testID+1;
}
if (testID > NUM_TESTS) {
log_error("Invalid test ID: %d\n", testID);
return -1;
}
if(*ppThreadData == NULL)
{
*ppThreadData = perThreadDataNew();
err = perThreadDataInit(*ppThreadData,
type, num_elements, vectorSize,
inputAVecSize, inputBVecSize,
context, start_test_ID,
end_test_ID, testID);
test_error(err, "failed to init per thread data\n");
}
perThreadData * pThreadData = *ppThreadData;
threads[0] = (size_t)num_elements;
int error_count = 0;
for (i=start_test_ID; i<end_test_ID; i++)
{
uint64_t indx;
if(startIndx >= endIndx)
{
startIndx = (uint64_t)0;
endIndx = (1ULL<<num_runs_shift);
}
for (indx=startIndx; indx < endIndx; indx+=num_elements)
{
switch (type) {
case kChar:
init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
break;
case kUChar:
init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
break;
case kShort:
init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
break;
case kUShort:
init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
break;
case kInt:
init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
break;
case kUInt:
init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
break;
case kLong:
init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
break;
case kULong:
init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
break;
default:
err = 1;
log_error("Invalid type.\n");
break;
}
err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
// log_info("Performing verification\n");
// If one of the inputs are scalar, we need to extend the input values to vectors
// to accommodate the verify functions
if( vectorSize > 1 )
{
char * p = NULL;
if( style == kInputAScalar )
p = (char *)pThreadData->m_input_ptr[ 0 ];
else if( style == kInputBScalar )
p = (char *)pThreadData->m_input_ptr[ 1 ];
if( p != NULL )
{
for( int element = num_elements - 1; element >= 0; element-- )
{
for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
}
}
}
switch (type) {
case kChar:
err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kUChar:
err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kShort:
err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kUShort:
err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kInt:
err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kUInt:
err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kLong:
err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
case kULong:
err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
break;
default:
err = 1;
log_error("Invalid type.\n");
break;
}
if (err) {
#if 0
log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
log_error( "from 18:\n" );
for( int q = 18; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
inP = (cl_char *)pThreadData->m_input_ptr[1];
for( int q = 18; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
inP = (cl_char *)pThreadData->m_output_ptr;
for( int q = 18; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
log_error( "from 36:\n" );
inP = (cl_char *)pThreadData->m_input_ptr[0];
for( int q = 36; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
inP = (cl_char *)pThreadData->m_input_ptr[1];
for( int q = 36; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
inP = (cl_char *)pThreadData->m_output_ptr;
for( int q = 36; q < 64; q++ )
{
log_error( "%02x ", inP[ q ] );
}
log_error( "\n" );
#endif
error_count++;
break;
}
}
/*
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
if (err) {
log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
get_explicit_type_name(type), sizeNames[vectorSize],
test_names[i],
startIndx, endIndx,
(1ULL<<num_runs_shift));
} else {
log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
get_explicit_type_name(type), sizeNames[vectorSize],
test_names[i],
startIndx, endIndx,
(1ULL<<num_runs_shift));
}
*/
}
return error_count;
}
// Run all the vector sizes for a given test
int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
int errors = 0;
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
return errors;
}
// Run multiple tests for a given type
int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
int errors = 0;
if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
log_info("Detected CL_WIMPY_MODE env\n");
log_info("Skipping long test\n");
return 0;
}
int i;
for (i=0; i<total_tests; i++)
{
int localErrors;
log_info("Testing \"%s\" ", test_names[tests[i]]); fflush( stdout );
localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
if( localErrors )
log_info( "FAILED\n" );
else
log_info( "passed\n" );
errors += localErrors;
}
return errors;
}
// Run all the math tests for a given type
int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
int tests[] = {0, 1, 2, 3, 4};
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
}
// Run all the logic tests for a given type
int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
int tests[] = {5, 6, 7, 12, 14, 15, 22};
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
}
// Run all the shifting tests for a given type
int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
int tests[] = {8, 9, 10, 11};
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
}
// Run all the comparison tests for a given type
int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
int tests[] = {13, 16, 17, 18, 19, 20, 21};
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
}
// Run all tests for a given type
int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
int errors = 0;
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
return errors;
}
// -----------------
// Long tests
// -----------------
int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
}
int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
}
int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
}
int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
}
int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// ULong tests
// -----------------
int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
}
int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
}
int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
}
int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
}
int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// Int tests
// -----------------
int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
}
int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
}
int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
}
int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
}
int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// UInt tests
// -----------------
int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
}
int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
}
int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
}
int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
}
int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// Short tests
// -----------------
int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
}
int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
}
int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
}
int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
}
int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// UShort tests
// -----------------
int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
}
int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
}
int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
}
int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
}
int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// Char tests
// -----------------
int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
}
int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
}
int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
}
int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
}
int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
}
// -----------------
// UChar tests
// -----------------
int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
}
int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
}
int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
}
int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
}
int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
}
int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
}
// These are kept for debugging if you want to run all the tests together.
int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
}
int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
}
int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
}
int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
if (!gHasLong)
{
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
return CL_SUCCESS;
}
return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
}
int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
}
int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
}
int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
}
int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
}
int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
}
int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
}
int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
}
int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
}
int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
}
int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
}
int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
}
int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
}
// Prototype for below
int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
int vectorSize, TestStyle style, ExplicitType type );
// Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
{
int sizes[] = { 2, 3, 4, 8, 16, 0 };
int errors = 0;
for( int i = 0; sizes[ i ] != 0; i++ )
{
if( testID == 13 )
{
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type );
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type );
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type );
}
else
{
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
}
}
return errors;
}
// Run all the tests for scalar-vector and vector-scalar for a given type
int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
{
int errors = 0;
size_t i;
// Shift operators:
// a) cannot take scalars as first parameter and vectors as second
// b) have the vector >> scalar case tested by tests 10 and 11
// so they get skipped entirely
int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
13, 14, 15, 16, 17, 18, 19, 20, 21 };
for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
{
errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
}
return errors;
}
int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int errors = 0;
int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
for( int t = 0; t < numTypes; t++ )
{
if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong)
continue;
errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 );
break;
}
return errors;
}
void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
{
cl_uint bits = genrand_int32(d);
cl_uint bitsLeft = 32;
memset( outData, 0, outDataSize * count );
for( size_t i = 0; i < count; i++ )
{
if( 0 == bitsLeft)
{
bits = genrand_int32(d);
bitsLeft = 32;
}
// Note: we will be setting just any bit non-zero for the type, so we can easily skip past
// and just write bytes (assuming the entire output buffer is already zeroed, which we did)
*outData = ( bits & 1 ) ? 0xff : 0;
bits >>= 1; bitsLeft -= 1;
outData += outDataSize;
}
}
static const char *kernel_question_colon_full =
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" %s%s valA = %ssrcA%s"
" %s%s valB = %ssrcB%s"
" %s%s valC = %ssrcC%s"
" %s%s destVal = valC ? valA : valB;\n"
" %s"
"}\n";
static const char *kernel_qc_load_plain_prefix = "";
static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
static const char *kernel_qc_load_vec3_suffix = ");\n";
static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
int test_question_colon_op(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
int vectorSize, TestStyle style, ExplicitType type )
{
cl_mem streams[4];
cl_int *input_ptr[3], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err;
int inputAVecSize, inputBVecSize, inputCVecSize;
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
// Identical to sizeNames but with a blank for 3, since we use vload/store there
const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata s_randStates;
inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
if( style & kInputCAlsoScalar )
{
style = (TestStyle)( style & ~kInputCAlsoScalar );
inputCVecSize = 1;
}
if( style == kInputAScalar )
inputAVecSize = 1;
else if( style == kInputBScalar )
inputBVecSize = 1;
else if( style == kVectorScalarScalar )
inputAVecSize = inputBVecSize = 1;
log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
( inputAVecSize == 1 ) ? "scalar" : "vector",
( inputBVecSize == 1 ) ? "scalar" : "vector" );
const char *type_name = get_explicit_type_name(type);
size_t type_size = get_explicit_type_size(type);
// Create and initialize I/O buffers
input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
s_randStates = init_genrand( gRandomSeed );
generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
streams[0] = clCreateBuffer(
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
type_size * num_elements * inputAVecSize, input_ptr[0], &err);
test_error(err, "clCreateBuffer failed");
streams[1] = clCreateBuffer(
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
type_size * num_elements * inputBVecSize, input_ptr[1], &err);
test_error(err, "clCreateBuffer failed");
streams[2] = clCreateBuffer(
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
type_size * num_elements * inputCVecSize, input_ptr[2], &err);
test_error(err, "clCreateBuffer failed");
streams[3] =
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
type_size * num_elements * vectorSize, NULL, &err);
test_error(err, "clCreateBuffer failed");
const char *vectorString = sizeNames[ vectorSize ];
const char *inputAVectorString = sizeNames[ inputAVecSize ];
const char *inputBVectorString = sizeNames[ inputBVecSize ];
const char *inputCVectorString = sizeNames[ inputCVecSize ];
char programString[4096];
const char *ptr;
sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
type_name, paramSizeNames[ inputBVecSize ],
type_name, paramSizeNames[ inputCVecSize ],
type_name, paramSizeNames[ vectorSize ],
// Loads
type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
// Dest type
type_name, vectorString,
// Store
( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
ptr = programString;
err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
test_error( err, "Unable to create test kernel" );
err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
test_error(err, "clSetKernelArgs failed");
// Run
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
// Read and verify results
err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
// log_info("Performing verification\n");
int error_count = 0;
char *inputAPtr = (char *)input_ptr[ 0 ];
char *inputBPtr = (char *)input_ptr[ 1 ];
cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
char *actualPtr = (char *)output_ptr;
for( int i = 0; i < num_elements; i++ )
{
for( int j = 0; j < vectorSize; j++ )
{
char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
{
#if 0
char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
print_type_to_string( type, inputAPtr, inputAStr );
print_type_to_string( type, inputBPtr, inputBStr );
print_type_to_string( type, expectedPtr, expectedStr );
print_type_to_string( type, actualPtr, actualStr );
log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
#endif
error_count++;
}
// Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
}
// Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
actualPtr += ( vectorSize == 1 ) ? type_size : 0;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
free_mtdata( s_randStates );
return error_count;
}