| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "harness/compat.h" |
| |
| #include <stdio.h> |
| #include <string.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| |
| #include "procs.h" |
| #include "harness/conversions.h" |
| #include "harness/ThreadPool.h" |
| |
| #define NUM_TESTS 23 |
| |
| #define LONG_MATH_SHIFT_SIZE 26 |
| #define QUICK_MATH_SHIFT_SIZE 16 |
| |
| static const char *kernel_code = |
| "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " dst[tid] = srcA[tid] %s srcB[tid];\n" |
| "}\n"; |
| |
| static const char *kernel_code_V3 = |
| "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n" |
| "}\n"; |
| |
| static const char *kernel_code_V3_scalar_vector = |
| "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n" |
| "}\n"; |
| |
| static const char *kernel_code_V3_vector_scalar = |
| "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n" |
| "}\n"; |
| |
| |
| // Separate kernel here because it does not fit the pattern |
| static const char *not_kernel_code = |
| "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " dst[tid] = %ssrcA[tid];\n" |
| "}\n"; |
| |
| static const char *not_kernel_code_V3 = |
| "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( %s vload3( tid, srcA ), tid, dst );\n" |
| "}\n"; |
| |
| static const char *kernel_code_scalar_shift = |
| "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " dst[tid] = srcA[tid] %s srcB[tid]%s;\n" |
| "}\n"; |
| |
| static const char *kernel_code_scalar_shift_V3 = |
| "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n" |
| "}\n"; |
| |
| static const char *kernel_code_question_colon = |
| "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n" |
| "}\n"; |
| |
| static const char *kernel_code_question_colon_V3 = |
| "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n" |
| "}\n"; |
| |
| |
| |
| |
| // External verification and data generation functions |
| extern const char *tests[]; |
| extern const char *test_names[]; |
| extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n); |
| extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ; |
| extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n); |
| extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ; |
| extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n); |
| extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ; |
| extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n); |
| extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ; |
| extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n); |
| extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ; |
| extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n); |
| extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ; |
| extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n); |
| extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ; |
| extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n); |
| extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ; |
| |
| // Supported type list |
| const ExplicitType types[] = { |
| kChar, |
| kUChar, |
| kShort, |
| kUShort, |
| kInt, |
| kUInt, |
| kLong, |
| kULong, |
| }; |
| |
| enum TestStyle |
| { |
| kDontCare=0, |
| kBothVectors, |
| kInputAScalar, |
| kInputBScalar, |
| kVectorScalarScalar, // for the ?: operator only; indicates vector ? scalar : scalar. |
| kInputCAlsoScalar = 0x80 // Or'ed flag to indicate that the selector for the ?: operator is also scalar |
| }; |
| |
| typedef struct _perThreadData |
| { |
| cl_mem m_streams[3]; |
| cl_int *m_input_ptr[2], *m_output_ptr; |
| size_t m_type_size; |
| cl_program m_program[NUM_TESTS]; |
| cl_kernel m_kernel[NUM_TESTS]; |
| } perThreadData; |
| |
| |
| perThreadData * perThreadDataNew() |
| { |
| perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData)); |
| |
| |
| memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS); |
| memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS); |
| |
| pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL; |
| pThis->m_output_ptr = NULL; |
| |
| return pThis; |
| } |
| |
| |
| void perThreadDataDestroy(perThreadData * pThis) |
| { |
| int i; |
| // cleanup |
| clReleaseMemObject(pThis->m_streams[0]); |
| clReleaseMemObject(pThis->m_streams[1]); |
| clReleaseMemObject(pThis->m_streams[2]); |
| for (i=0; i<NUM_TESTS; i++) |
| { |
| if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]); |
| if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]); |
| } |
| free(pThis->m_input_ptr[0]); |
| free(pThis->m_input_ptr[1]); |
| free(pThis->m_output_ptr); |
| |
| free(pThis); |
| } |
| |
| |
| cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type, |
| int num_elements, int vectorSize, |
| int inputAVecSize, int inputBVecSize, |
| cl_context context, int start_test_ID, |
| int end_test_ID, int testID) |
| { |
| int i; |
| const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; |
| |
| const char *type_name = get_explicit_type_name(type); |
| pThis->m_type_size = get_explicit_type_size(type); |
| int err; |
| // Used for the && and || tests where the vector case returns a signed value |
| const char *signed_type_name; |
| switch (type) { |
| case kChar: |
| case kUChar: |
| signed_type_name = get_explicit_type_name(kChar); |
| break; |
| case kShort: |
| case kUShort: |
| signed_type_name = get_explicit_type_name(kShort); |
| break; |
| case kInt: |
| case kUInt: |
| signed_type_name = get_explicit_type_name(kInt); |
| break; |
| case kLong: |
| case kULong: |
| signed_type_name = get_explicit_type_name(kLong); |
| break; |
| default: |
| log_error("Invalid type.\n"); |
| return -1; |
| break; |
| } |
| |
| pThis->m_input_ptr[0] = |
| (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize); |
| pThis->m_input_ptr[1] = |
| (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize); |
| pThis->m_output_ptr = |
| (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize); |
| pThis->m_streams[0] = clCreateBuffer( |
| context, CL_MEM_READ_WRITE, |
| pThis->m_type_size * num_elements * inputAVecSize, NULL, &err); |
| |
| test_error(err, "clCreateBuffer failed"); |
| |
| pThis->m_streams[1] = clCreateBuffer( |
| context, CL_MEM_READ_WRITE, |
| pThis->m_type_size * num_elements * inputBVecSize, NULL, &err); |
| |
| test_error(err, "clCreateBuffer failed"); |
| |
| pThis->m_streams[2] = clCreateBuffer( |
| context, CL_MEM_READ_WRITE, |
| pThis->m_type_size * num_elements * vectorSize, NULL, &err); |
| |
| test_error(err, "clCreateBuffer failed"); |
| |
| const char *vectorString = sizeNames[ vectorSize ]; |
| const char *inputAVectorString = sizeNames[ inputAVecSize ]; |
| const char *inputBVectorString = sizeNames[ inputBVecSize ]; |
| |
| if (testID == -1) |
| { |
| log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize)); |
| } |
| |
| char programString[4096]; |
| const char *ptr; |
| |
| |
| const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3; |
| |
| for (i=start_test_ID; i<end_test_ID; i++) { |
| switch (i) { |
| case 10: |
| case 11: |
| sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString, |
| type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0")); |
| break; |
| case 12: |
| sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString, |
| type_name, vectorString, tests[i]); |
| break; |
| case 13: |
| sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon, |
| type_name, inputAVectorString, type_name, inputBVectorString, |
| type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ; |
| break; |
| case 14: |
| case 15: |
| case 16: |
| case 17: |
| case 18: |
| case 19: |
| case 20: |
| case 21: |
| // Need an unsigned result here for vector sizes > 1 |
| sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString, |
| ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]); |
| break; |
| case 22: |
| // Need an unsigned result here for vector sizes > 1 |
| sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString, |
| ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]); |
| break; |
| default: |
| sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString, |
| type_name, vectorString, tests[i]); |
| break; |
| } |
| |
| //printf("kernel: %s\n", programString); |
| ptr = programString; |
| err = create_single_kernel_helper( context, |
| &(pThis->m_program[ i ]), |
| &(pThis->m_kernel[ i ]), 1, |
| &ptr, "test" ); |
| test_error( err, "Unable to create test kernel" ); |
| err = clSetKernelArg(pThis->m_kernel[i], 0, |
| sizeof pThis->m_streams[0], |
| &(pThis->m_streams[0]) ); |
| err |= clSetKernelArg(pThis->m_kernel[i], 1, |
| sizeof pThis->m_streams[1], |
| &(pThis->m_streams[1]) ); |
| err |= clSetKernelArg(pThis->m_kernel[i], 2, |
| sizeof pThis->m_streams[2], |
| &(pThis->m_streams[2]) ); |
| test_error(err, "clSetKernelArgs failed"); |
| } |
| |
| return CL_SUCCESS; |
| } |
| |
| typedef struct _globalThreadData |
| { |
| cl_device_id m_deviceID; |
| cl_context m_context; |
| // cl_command_queue m_queue; |
| int m_num_elements; |
| int m_threadcount; |
| int m_vectorSize; |
| int m_num_runs_shift; |
| TestStyle m_style; |
| ExplicitType m_type; |
| MTdata * m_pRandData; |
| uint64_t m_offset; |
| int m_testID; |
| perThreadData **m_arrPerThreadData; |
| } globalThreadData; |
| |
| |
| |
| globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context, |
| cl_command_queue queue, int num_elements, |
| int vectorSize, TestStyle style, int num_runs_shift, |
| ExplicitType type, int testID, |
| int threadcount) |
| { |
| int i; |
| globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData)); |
| pThis->m_deviceID = deviceID; |
| pThis->m_context = context; |
| // pThis->m_queue = queue; |
| pThis->m_num_elements = num_elements; |
| pThis->m_num_runs_shift = num_runs_shift; |
| pThis->m_vectorSize = vectorSize; |
| pThis->m_style = style; |
| pThis->m_type = type; |
| pThis->m_offset = (uint64_t)0; |
| pThis->m_testID = testID; |
| pThis->m_arrPerThreadData = NULL; |
| pThis->m_threadcount = threadcount; |
| |
| pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata)); |
| pThis->m_arrPerThreadData = (perThreadData **) |
| malloc(threadcount*sizeof(perThreadData *)); |
| for(i=0; i < threadcount; ++i) |
| { |
| pThis->m_pRandData[i] = init_genrand(i+1); |
| pThis->m_arrPerThreadData[i] = NULL; |
| } |
| |
| return pThis; |
| } |
| |
| void globalThreadDataDestroy(globalThreadData * pThis) |
| { |
| int i; |
| |
| for(i=0; i < pThis->m_threadcount; ++i) |
| { |
| free_mtdata(pThis->m_pRandData[i]); |
| if(pThis->m_arrPerThreadData[i] != NULL) |
| { |
| perThreadDataDestroy(pThis->m_arrPerThreadData[i]); |
| } |
| } |
| free(pThis->m_arrPerThreadData); |
| free(pThis->m_pRandData); |
| free(pThis); |
| } |
| |
| int |
| test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx, |
| perThreadData ** ppThreadData); |
| |
| |
| cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo ) |
| { |
| cl_int error; cl_int result; |
| globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo; |
| cl_command_queue queue; |
| |
| #if THREAD_DEBUG |
| log_error("Thread %x (job %x) about to create command queue\n", |
| thread_id, job_id); |
| #endif |
| |
| queue = clCreateCommandQueue (threadInfoGlobal->m_context, |
| threadInfoGlobal->m_deviceID,0, |
| &error); |
| |
| if(error != CL_SUCCESS) |
| { |
| log_error("Thread %x (job %x) could not create command queue\n", |
| thread_id, job_id); |
| return error; // should we clean up the queue too? |
| } |
| |
| #if THREAD_DEBUG |
| log_error("Thread %x (job %x) created command queue\n", |
| thread_id, job_id); |
| #endif |
| |
| result = test_integer_ops( threadInfoGlobal->m_deviceID, |
| threadInfoGlobal->m_context, |
| queue, |
| threadInfoGlobal->m_num_elements, |
| threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style, |
| threadInfoGlobal->m_num_runs_shift, |
| threadInfoGlobal->m_type, threadInfoGlobal->m_testID, |
| threadInfoGlobal->m_pRandData[thread_id], |
| threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id, |
| threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1), |
| &(threadInfoGlobal->m_arrPerThreadData[thread_id]) |
| ); |
| |
| if(result != 0) |
| { |
| log_error("Thread %x (job %x) failed test_integer_ops with result %x\n", |
| thread_id, job_id, result); |
| // return error; |
| } |
| |
| |
| error = clReleaseCommandQueue(queue); |
| if(error != CL_SUCCESS) |
| { |
| log_error("Thread %x (job %x) could not release command queue\n", |
| thread_id, job_id); |
| return error; |
| } |
| return result; |
| } |
| |
| int |
| test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID) |
| { |
| globalThreadData * pThreadInfo = NULL; |
| cl_int result=0; |
| cl_uint threadcount = GetThreadCount(); |
| |
| // Check to see if we are using single threaded mode on other than a 1.0 device |
| if (getenv( "CL_TEST_SINGLE_THREADED" )) { |
| |
| char device_version[1024] = { 0 }; |
| result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL ); |
| if(result != CL_SUCCESS) |
| { |
| log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result); |
| return result; |
| } |
| |
| if (strcmp("OpenCL 1.0 ",device_version)) { |
| log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n"); |
| } |
| } |
| |
| // This test will run threadcount threads concurrently; each thread will execute test_integer_ops() |
| // which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize. |
| // We need to make sure that the total device memory allocated by all threads does not exceed the maximum |
| // memory on the device. If it does, we decrease num_elements until all threads combined will not |
| // over-subscribe device memory. |
| cl_ulong maxDeviceGlobalMem; |
| result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL); |
| if(result != CL_SUCCESS) |
| { |
| log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result); |
| return result; |
| } |
| |
| if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) { |
| maxDeviceGlobalMem = (cl_ulong)SIZE_MAX; |
| } |
| |
| // Let's not take all device memory - reduce by 75% |
| maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2; |
| // Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory. |
| size_t type_size = get_explicit_type_size(type); |
| while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem) |
| { |
| num_elements >>= 1; |
| } |
| |
| uint64_t startIndx = (uint64_t)0; |
| uint64_t endIndx = (1ULL<<num_runs_shift); |
| uint64_t jobcount = (endIndx-startIndx)/num_elements; |
| |
| if(jobcount==0) |
| { |
| jobcount = 1; |
| } |
| |
| pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements, |
| vectorSize, style, num_runs_shift, |
| type, testID, threadcount); |
| |
| |
| pThreadInfo->m_offset = startIndx; |
| |
| #if THREAD_DEBUG |
| log_error("Launching %llx jobs\n", |
| jobcount); |
| #endif |
| |
| result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo); |
| |
| if(result != 0) |
| { |
| // cleanup ?? |
| log_error("ThreadPool_Do return non-success value %d\n", result); |
| |
| } |
| globalThreadDataDestroy(pThreadInfo); |
| return result; |
| } |
| |
| |
| |
| int |
| test_integer_ops(cl_device_id deviceID, cl_context context, |
| cl_command_queue queue, int num_elements, |
| int vectorSize, TestStyle style, int num_runs_shift, |
| ExplicitType type, int testID, MTdata randDataIn, |
| uint64_t startIndx, uint64_t endIndx, |
| perThreadData ** ppThreadData) |
| { |
| size_t threads[1]; |
| int err; |
| int i; |
| int inputAVecSize, inputBVecSize; |
| |
| |
| |
| inputAVecSize = inputBVecSize = vectorSize; |
| if( style == kInputAScalar ) |
| inputAVecSize = 1; |
| else if( style == kInputBScalar ) |
| inputBVecSize = 1; |
| |
| /* |
| if( inputAVecSize != inputBVecSize ) |
| log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n", |
| test_names[testID], |
| get_explicit_type_name(type), vectorSize, |
| ( inputAVecSize == 1 ) ? "scalar" : "vector", |
| ( inputBVecSize == 1 ) ? "scalar" : "vector", |
| startIndx, endIndx, (1ULL<<num_runs_shift) ); |
| else |
| log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n", |
| test_names[testID], |
| get_explicit_type_name(type), vectorSize, |
| startIndx, endIndx, (1ULL<<num_runs_shift)); |
| */ |
| |
| |
| // Figure out which sub-test to run, or all of them |
| int start_test_ID = 0; |
| int end_test_ID = NUM_TESTS; |
| if (testID != -1) { |
| start_test_ID = testID; |
| end_test_ID = testID+1; |
| } |
| if (testID > NUM_TESTS) { |
| log_error("Invalid test ID: %d\n", testID); |
| return -1; |
| } |
| |
| if(*ppThreadData == NULL) |
| { |
| *ppThreadData = perThreadDataNew(); |
| err = perThreadDataInit(*ppThreadData, |
| type, num_elements, vectorSize, |
| inputAVecSize, inputBVecSize, |
| context, start_test_ID, |
| end_test_ID, testID); |
| test_error(err, "failed to init per thread data\n"); |
| } |
| |
| perThreadData * pThreadData = *ppThreadData; |
| |
| |
| |
| threads[0] = (size_t)num_elements; |
| int error_count = 0; |
| for (i=start_test_ID; i<end_test_ID; i++) |
| { |
| uint64_t indx; |
| |
| |
| if(startIndx >= endIndx) |
| { |
| startIndx = (uint64_t)0; |
| endIndx = (1ULL<<num_runs_shift); |
| } |
| for (indx=startIndx; indx < endIndx; indx+=num_elements) |
| { |
| |
| switch (type) { |
| case kChar: |
| init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kUChar: |
| init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kShort: |
| init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kUShort: |
| init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kInt: |
| init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kUInt: |
| init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kLong: |
| init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| case kULong: |
| init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn); |
| break; |
| default: |
| err = 1; |
| log_error("Invalid type.\n"); |
| break; |
| } |
| |
| |
| err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL); |
| test_error(err, "clEnqueueWriteBuffer failed"); |
| err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL ); |
| test_error(err, "clEnqueueWriteBuffer failed"); |
| |
| err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); |
| test_error(err, "clEnqueueNDRangeKernel failed"); |
| |
| err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL ); |
| test_error(err, "clEnqueueReadBuffer failed"); |
| |
| // log_info("Performing verification\n"); |
| |
| // If one of the inputs are scalar, we need to extend the input values to vectors |
| // to accommodate the verify functions |
| if( vectorSize > 1 ) |
| { |
| char * p = NULL; |
| if( style == kInputAScalar ) |
| p = (char *)pThreadData->m_input_ptr[ 0 ]; |
| else if( style == kInputBScalar ) |
| p = (char *)pThreadData->m_input_ptr[ 1 ]; |
| if( p != NULL ) |
| { |
| for( int element = num_elements - 1; element >= 0; element-- ) |
| { |
| for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ ) |
| memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size ); |
| } |
| } |
| } |
| |
| switch (type) { |
| case kChar: |
| err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kUChar: |
| err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kShort: |
| err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kUShort: |
| err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kInt: |
| err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kUInt: |
| err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kLong: |
| err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| case kULong: |
| err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize); |
| break; |
| default: |
| err = 1; |
| log_error("Invalid type.\n"); |
| break; |
| } |
| |
| if (err) { |
| #if 0 |
| log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements ); |
| cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0]; |
| log_error( "from 18:\n" ); |
| for( int q = 18; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| inP = (cl_char *)pThreadData->m_input_ptr[1]; |
| for( int q = 18; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| inP = (cl_char *)pThreadData->m_output_ptr; |
| for( int q = 18; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| log_error( "from 36:\n" ); |
| inP = (cl_char *)pThreadData->m_input_ptr[0]; |
| for( int q = 36; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| inP = (cl_char *)pThreadData->m_input_ptr[1]; |
| for( int q = 36; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| inP = (cl_char *)pThreadData->m_output_ptr; |
| for( int q = 36; q < 64; q++ ) |
| { |
| log_error( "%02x ", inP[ q ] ); |
| } |
| log_error( "\n" ); |
| #endif |
| error_count++; |
| break; |
| } |
| } |
| |
| /* |
| |
| const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; |
| |
| if (err) { |
| log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n", |
| get_explicit_type_name(type), sizeNames[vectorSize], |
| test_names[i], |
| startIndx, endIndx, |
| (1ULL<<num_runs_shift)); |
| } else { |
| log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n", |
| get_explicit_type_name(type), sizeNames[vectorSize], |
| test_names[i], |
| startIndx, endIndx, |
| (1ULL<<num_runs_shift)); |
| } |
| */ |
| } |
| |
| |
| |
| return error_count; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| // Run all the vector sizes for a given test |
| int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) { |
| int errors = 0; |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID); |
| return errors; |
| } |
| |
| // Run multiple tests for a given type |
| int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) { |
| int errors = 0; |
| |
| if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) { |
| log_info("Detected CL_WIMPY_MODE env\n"); |
| log_info("Skipping long test\n"); |
| return 0; |
| } |
| |
| int i; |
| for (i=0; i<total_tests; i++) |
| { |
| int localErrors; |
| log_info("Testing \"%s\" ", test_names[tests[i]]); fflush( stdout ); |
| localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]); |
| if( localErrors ) |
| log_info( "FAILED\n" ); |
| else |
| log_info( "passed\n" ); |
| |
| errors += localErrors; |
| } |
| |
| return errors; |
| } |
| |
| // Run all the math tests for a given type |
| int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) { |
| int tests[] = {0, 1, 2, 3, 4}; |
| return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int))); |
| } |
| |
| // Run all the logic tests for a given type |
| int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) { |
| int tests[] = {5, 6, 7, 12, 14, 15, 22}; |
| return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int))); |
| } |
| |
| // Run all the shifting tests for a given type |
| int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) { |
| int tests[] = {8, 9, 10, 11}; |
| return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int))); |
| } |
| |
| // Run all the comparison tests for a given type |
| int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) { |
| int tests[] = {13, 16, 17, 18, 19, 20, 21}; |
| return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int))); |
| } |
| |
| // Run all tests for a given type |
| int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) { |
| int errors = 0; |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1); |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1); |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1); |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1); |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1); |
| errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1); |
| return errors; |
| } |
| |
| |
| // ----------------- |
| // Long tests |
| // ----------------- |
| int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // ULong tests |
| // ----------------- |
| int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // Int tests |
| // ----------------- |
| int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // UInt tests |
| // ----------------- |
| int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // Short tests |
| // ----------------- |
| int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // UShort tests |
| // ----------------- |
| int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // Char tests |
| // ----------------- |
| int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| // ----------------- |
| // UChar tests |
| // ----------------- |
| int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| |
| |
| // These are kept for debugging if you want to run all the tests together. |
| |
| int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| if (!gHasLong) |
| { |
| log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" ); |
| return CL_SUCCESS; |
| } |
| return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE); |
| } |
| |
| int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { |
| return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE); |
| } |
| |
| // Prototype for below |
| int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, |
| int vectorSize, TestStyle style, ExplicitType type ); |
| |
| // Run all the vector sizes for a given test in scalar-vector and vector-scalar modes |
| int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) |
| { |
| int sizes[] = { 2, 3, 4, 8, 16, 0 }; |
| int errors = 0; |
| |
| for( int i = 0; sizes[ i ] != 0; i++ ) |
| { |
| if( testID == 13 ) |
| { |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type ); |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type ); |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type ); |
| |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type ); |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type ); |
| errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type ); |
| } |
| else |
| { |
| errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID); |
| errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID); |
| } |
| } |
| return errors; |
| } |
| |
| // Run all the tests for scalar-vector and vector-scalar for a given type |
| int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num ) |
| { |
| int errors = 0; |
| size_t i; |
| |
| // Shift operators: |
| // a) cannot take scalars as first parameter and vectors as second |
| // b) have the vector >> scalar case tested by tests 10 and 11 |
| // so they get skipped entirely |
| |
| int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7, |
| 13, 14, 15, 16, 17, 18, 19, 20, 21 }; |
| for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++) |
| { |
| errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]); |
| } |
| return errors; |
| } |
| |
| int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) |
| { |
| int errors = 0; |
| int numTypes = sizeof( types ) / sizeof( types[ 0 ] ); |
| |
| for( int t = 0; t < numTypes; t++ ) |
| { |
| if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong) |
| continue; |
| |
| errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 ); |
| break; |
| } |
| |
| return errors; |
| } |
| |
| void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize ) |
| { |
| cl_uint bits = genrand_int32(d); |
| cl_uint bitsLeft = 32; |
| |
| memset( outData, 0, outDataSize * count ); |
| |
| for( size_t i = 0; i < count; i++ ) |
| { |
| if( 0 == bitsLeft) |
| { |
| bits = genrand_int32(d); |
| bitsLeft = 32; |
| } |
| |
| // Note: we will be setting just any bit non-zero for the type, so we can easily skip past |
| // and just write bytes (assuming the entire output buffer is already zeroed, which we did) |
| *outData = ( bits & 1 ) ? 0xff : 0; |
| |
| bits >>= 1; bitsLeft -= 1; |
| |
| outData += outDataSize; |
| } |
| } |
| |
| static const char *kernel_question_colon_full = |
| "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| "\n" |
| " %s%s valA = %ssrcA%s" |
| " %s%s valB = %ssrcB%s" |
| " %s%s valC = %ssrcC%s" |
| " %s%s destVal = valC ? valA : valB;\n" |
| " %s" |
| "}\n"; |
| |
| static const char *kernel_qc_load_plain_prefix = ""; |
| static const char *kernel_qc_load_plain_suffix = "[ tid ];\n"; |
| |
| static const char *kernel_qc_load_vec3_prefix = "vload3( tid, "; |
| static const char *kernel_qc_load_vec3_suffix = ");\n"; |
| |
| static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n"; |
| static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n"; |
| |
| int test_question_colon_op(cl_device_id deviceID, cl_context context, |
| cl_command_queue queue, int num_elements, |
| int vectorSize, TestStyle style, ExplicitType type ) |
| { |
| cl_mem streams[4]; |
| cl_int *input_ptr[3], *output_ptr; |
| cl_program program; |
| cl_kernel kernel; |
| size_t threads[1]; |
| int err; |
| int inputAVecSize, inputBVecSize, inputCVecSize; |
| const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; |
| // Identical to sizeNames but with a blank for 3, since we use vload/store there |
| const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; |
| MTdata s_randStates; |
| |
| inputAVecSize = inputBVecSize = inputCVecSize = vectorSize; |
| if( style & kInputCAlsoScalar ) |
| { |
| style = (TestStyle)( style & ~kInputCAlsoScalar ); |
| inputCVecSize = 1; |
| } |
| if( style == kInputAScalar ) |
| inputAVecSize = 1; |
| else if( style == kInputBScalar ) |
| inputBVecSize = 1; |
| else if( style == kVectorScalarScalar ) |
| inputAVecSize = inputBVecSize = 1; |
| |
| log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n", |
| get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector", |
| ( inputAVecSize == 1 ) ? "scalar" : "vector", |
| ( inputBVecSize == 1 ) ? "scalar" : "vector" ); |
| |
| |
| const char *type_name = get_explicit_type_name(type); |
| size_t type_size = get_explicit_type_size(type); |
| |
| // Create and initialize I/O buffers |
| |
| input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize); |
| input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize); |
| input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize); |
| output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize); |
| |
| s_randStates = init_genrand( gRandomSeed ); |
| |
| generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] ); |
| generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] ); |
| generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size ); |
| |
| streams[0] = clCreateBuffer( |
| context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, |
| type_size * num_elements * inputAVecSize, input_ptr[0], &err); |
| test_error(err, "clCreateBuffer failed"); |
| streams[1] = clCreateBuffer( |
| context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, |
| type_size * num_elements * inputBVecSize, input_ptr[1], &err); |
| test_error(err, "clCreateBuffer failed"); |
| streams[2] = clCreateBuffer( |
| context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, |
| type_size * num_elements * inputCVecSize, input_ptr[2], &err); |
| test_error(err, "clCreateBuffer failed"); |
| streams[3] = |
| clCreateBuffer(context, CL_MEM_WRITE_ONLY, |
| type_size * num_elements * vectorSize, NULL, &err); |
| test_error(err, "clCreateBuffer failed"); |
| |
| const char *vectorString = sizeNames[ vectorSize ]; |
| const char *inputAVectorString = sizeNames[ inputAVecSize ]; |
| const char *inputBVectorString = sizeNames[ inputBVecSize ]; |
| const char *inputCVectorString = sizeNames[ inputCVecSize ]; |
| |
| char programString[4096]; |
| const char *ptr; |
| |
| sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ], |
| type_name, paramSizeNames[ inputBVecSize ], |
| type_name, paramSizeNames[ inputCVecSize ], |
| type_name, paramSizeNames[ vectorSize ], |
| // Loads |
| type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix, |
| type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix, |
| type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix, |
| // Dest type |
| type_name, vectorString, |
| // Store |
| ( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain ); |
| |
| ptr = programString; |
| err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" ); |
| test_error( err, "Unable to create test kernel" ); |
| |
| err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] ); |
| err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] ); |
| err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] ); |
| err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] ); |
| test_error(err, "clSetKernelArgs failed"); |
| |
| // Run |
| threads[0] = (size_t)num_elements; |
| |
| err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); |
| test_error(err, "clEnqueueNDRangeKernel failed"); |
| |
| // Read and verify results |
| err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL ); |
| test_error(err, "clEnqueueReadBuffer failed"); |
| |
| // log_info("Performing verification\n"); |
| int error_count = 0; |
| |
| char *inputAPtr = (char *)input_ptr[ 0 ]; |
| char *inputBPtr = (char *)input_ptr[ 1 ]; |
| cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ]; |
| char *actualPtr = (char *)output_ptr; |
| |
| for( int i = 0; i < num_elements; i++ ) |
| { |
| for( int j = 0; j < vectorSize; j++ ) |
| { |
| char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr; |
| if( memcmp( expectedPtr, actualPtr, type_size ) != 0 ) |
| { |
| #if 0 |
| char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ]; |
| print_type_to_string( type, inputAPtr, inputAStr ); |
| print_type_to_string( type, inputBPtr, inputBStr ); |
| print_type_to_string( type, expectedPtr, expectedStr ); |
| print_type_to_string( type, actualPtr, actualStr ); |
| log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n", |
| type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" ); |
| #endif |
| error_count++; |
| } |
| // Advance for each element member. Note if any of the vec sizes are 1, they don't advance here |
| inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size; |
| inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size; |
| inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size; |
| actualPtr += ( vectorSize == 1 ) ? 0 : type_size; |
| } |
| // Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct |
| inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0; |
| inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0; |
| inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0; |
| actualPtr += ( vectorSize == 1 ) ? type_size : 0; |
| } |
| |
| // cleanup |
| clReleaseMemObject(streams[0]); |
| clReleaseMemObject(streams[1]); |
| clReleaseMemObject(streams[2]); |
| clReleaseMemObject(streams[3]); |
| clReleaseKernel(kernel); |
| clReleaseProgram(program); |
| free(input_ptr[0]); |
| free(input_ptr[1]); |
| free(input_ptr[2]); |
| free(output_ptr); |
| free_mtdata( s_randStates ); |
| |
| return error_count; |
| } |