| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "structs.h" |
| |
| |
| #include "defines.h" |
| |
| #define DEBUG_MEM_ALLOC 0 |
| |
| /** typedef struct _bufferStruct |
| { |
| void * m_pIn; |
| void * m_pOut; |
| |
| cl_mem m_outBuffer; |
| cl_mem m_inBuffer; |
| |
| size_t m_bufSize; |
| } bufferStruct; |
| */ |
| |
| |
| clState *newClState(cl_device_id device, cl_context context, |
| cl_command_queue queue) |
| { |
| clState *pResult = (clState *)malloc(sizeof(clState)); |
| #if DEBUG_MEM_ALLOC |
| log_info("malloc clState * %x\n", pResult); |
| #endif |
| |
| pResult->m_device = device; |
| pResult->m_context = context; |
| pResult->m_queue = queue; |
| |
| pResult->m_kernel = NULL; |
| pResult->m_program = NULL; |
| return pResult; |
| } |
| |
| clState *destroyClState(clState *pState) |
| { |
| clStateDestroyProgramAndKernel(pState); |
| #if DEBUG_MEM_ALLOC |
| log_info("delete (free) clState * %x\n", pState); |
| #endif |
| free(pState); |
| return NULL; |
| } |
| |
| |
| int clStateMakeProgram(clState *pState, const char *prog, |
| const char *kernelName) |
| { |
| const char *srcArr[1] = { NULL }; |
| srcArr[0] = prog; |
| int err = |
| create_single_kernel_helper(pState->m_context, &(pState->m_program), |
| &(pState->m_kernel), 1, srcArr, kernelName); |
| #if DEBUG_MEM_ALLOC |
| log_info("create program and kernel\n"); |
| #endif |
| return err; |
| } |
| |
| int runKernel(clState *pState, size_t numThreads) |
| { |
| int err; |
| pState->m_numThreads = numThreads; |
| err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL, |
| &(pState->m_numThreads), NULL, 0, NULL, NULL); |
| if (err != CL_SUCCESS) |
| { |
| log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err); |
| return -1; |
| } |
| return 0; |
| } |
| |
| |
| void clStateDestroyProgramAndKernel(clState *pState) |
| { |
| #if DEBUG_MEM_ALLOC |
| log_info("destroy program and kernel\n"); |
| #endif |
| if (pState->m_kernel != NULL) |
| { |
| clReleaseKernel(pState->m_kernel); |
| pState->m_kernel = NULL; |
| } |
| if (pState->m_program != NULL) |
| { |
| clReleaseProgram(pState->m_program); |
| pState->m_program = NULL; |
| } |
| } |
| |
| bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState) |
| { |
| int error; |
| bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct)); |
| #if DEBUG_MEM_ALLOC |
| log_info("malloc bufferStruct * %x\n", pResult); |
| #endif |
| |
| pResult->m_bufSizeIn = inSize; |
| pResult->m_bufSizeOut = outSize; |
| |
| pResult->m_pIn = malloc(inSize); |
| pResult->m_pOut = malloc(outSize); |
| #if DEBUG_MEM_ALLOC |
| log_info("malloc m_pIn %x\n", pResult->m_pIn); |
| log_info("malloc m_pOut %x\n", pResult->m_pOut); |
| #endif |
| |
| pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY, |
| inSize, NULL, &error); |
| if (pResult->m_inBuffer == NULL) |
| { |
| vlog_error("clCreateArray failed for input (%d)\n", error); |
| return destroyBufferStruct(pResult, pClState); |
| } |
| #if DEBUG_MEM_ALLOC |
| log_info("clCreateBuffer %x\n", pResult->m_inBuffer); |
| #endif |
| |
| pResult->m_outBuffer = clCreateBuffer( |
| pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error); |
| if (pResult->m_outBuffer == NULL) |
| { |
| vlog_error("clCreateArray failed for output (%d)\n", error); |
| return destroyBufferStruct(pResult, pClState); |
| } |
| #if DEBUG_MEM_ALLOC |
| log_info("clCreateBuffer %x\n", pResult->m_outBuffer); |
| #endif |
| |
| pResult->m_bufferUploaded = false; |
| |
| return pResult; |
| } |
| |
| bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState) |
| { |
| if (destroyMe) |
| { |
| if (destroyMe->m_outBuffer != NULL) |
| { |
| #if DEBUG_MEM_ALLOC |
| log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer); |
| #endif |
| clReleaseMemObject(destroyMe->m_outBuffer); |
| destroyMe->m_outBuffer = NULL; |
| } |
| if (destroyMe->m_inBuffer != NULL) |
| { |
| #if DEBUG_MEM_ALLOC |
| log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer); |
| #endif |
| clReleaseMemObject(destroyMe->m_inBuffer); |
| destroyMe->m_inBuffer = NULL; |
| } |
| if (destroyMe->m_pIn != NULL) |
| { |
| #if DEBUG_MEM_ALLOC |
| log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn); |
| #endif |
| free(destroyMe->m_pIn); |
| destroyMe->m_pIn = NULL; |
| } |
| if (destroyMe->m_pOut != NULL) |
| { |
| #if DEBUG_MEM_ALLOC |
| log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut); |
| #endif |
| free(destroyMe->m_pOut); |
| destroyMe->m_pOut = NULL; |
| } |
| #if DEBUG_MEM_ALLOC |
| log_info("delete (free) bufferStruct * %x\n", destroyMe); |
| #endif |
| free((void *)destroyMe); |
| destroyMe = NULL; |
| } |
| return destroyMe; |
| } |
| |
| void initContents(bufferStruct *pBufferStruct, clState *pClState, |
| size_t typeSize, size_t countIn, size_t countOut) |
| { |
| size_t i; |
| |
| uint64_t start = 0; |
| |
| switch (typeSize) |
| { |
| case 1: { |
| uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn); |
| for (i = 0; i < countIn; ++i) |
| { |
| ub[i] = (uint8_t)start++; |
| } |
| break; |
| } |
| case 2: { |
| uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn); |
| for (i = 0; i < countIn; ++i) |
| { |
| us[i] = (uint16_t)start++; |
| } |
| break; |
| } |
| case 4: { |
| if (!g_wimpyMode) |
| { |
| uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn); |
| for (i = 0; i < countIn; ++i) |
| { |
| ui[i] = (uint32_t)start++; |
| } |
| } |
| else |
| { |
| // The short test doesn't iterate over the entire 32 bit space |
| // so we alternate between positive and negative values |
| int32_t *ui = (int32_t *)(pBufferStruct->m_pIn); |
| int32_t sign = 1; |
| for (i = 0; i < countIn; ++i, ++start) |
| { |
| ui[i] = (int32_t)start * sign; |
| sign = sign * -1; |
| } |
| } |
| break; |
| } |
| case 8: { |
| // We don't iterate over the entire space of 64 bit so for the |
| // selects, we want to test positive and negative values |
| int64_t *ll = (int64_t *)(pBufferStruct->m_pIn); |
| int64_t sign = 1; |
| for (i = 0; i < countIn; ++i, ++start) |
| { |
| ll[i] = start * sign; |
| sign = sign * -1; |
| } |
| break; |
| } |
| default: { |
| log_error("invalid type size %x\n", (int)typeSize); |
| } |
| } |
| // pBufferStruct->m_bufSizeIn |
| // pBufferStruct->m_bufSizeOut |
| } |
| |
| int pushArgs(bufferStruct *pBufferStruct, clState *pClState) |
| { |
| int err; |
| if (!pBufferStruct->m_bufferUploaded) |
| { |
| err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer, |
| CL_TRUE, 0, pBufferStruct->m_bufSizeIn, |
| pBufferStruct->m_pIn, 0, NULL, NULL); |
| #if DEBUG_MEM_ALLOC |
| log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer); |
| #endif |
| if (err != CL_SUCCESS) |
| { |
| log_error("clEnqueueWriteBuffer failed\n"); |
| return -1; |
| } |
| pBufferStruct->m_bufferUploaded = true; |
| } |
| |
| err = clSetKernelArg( |
| pClState->m_kernel, 0, |
| sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn, |
| &(pBufferStruct->m_inBuffer)); |
| #if DEBUG_MEM_ALLOC |
| // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer); |
| #endif |
| if (err != CL_SUCCESS) |
| { |
| log_error("clSetKernelArgs failed, first arg (0)\n"); |
| return -1; |
| } |
| |
| err = clSetKernelArg( |
| pClState->m_kernel, 1, |
| sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut, |
| &(pBufferStruct->m_outBuffer)); |
| if (err != CL_SUCCESS) |
| { |
| log_error("clSetKernelArgs failed, second arg (1)\n"); |
| return -1; |
| } |
| |
| #if DEBUG_MEM_ALLOC |
| // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer); |
| #endif |
| |
| return 0; |
| } |
| |
| int retrieveResults(bufferStruct *pBufferStruct, clState *pClState) |
| { |
| int err; |
| err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer, |
| CL_TRUE, 0, pBufferStruct->m_bufSizeOut, |
| pBufferStruct->m_pOut, 0, NULL, NULL); |
| if (err != CL_SUCCESS) |
| { |
| log_error("clEnqueueReadBuffer failed\n"); |
| return -1; |
| } |
| return 0; |
| } |
| |
| // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames |
| // and g_arrVecSizes |
| int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState, |
| size_t minAlign) |
| { |
| size_t i; |
| cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut); |
| for (i = 0; i < pClState->m_numThreads; ++i) |
| { |
| if ((targetArr[i]) % minAlign != (cl_uint)0) |
| { |
| vlog_error("Error %d (of %d). Expected a multple of %x, got %x\n", |
| i, pClState->m_numThreads, minAlign, targetArr[i]); |
| return -1; |
| } |
| } |
| |
| /* log_info("\n"); |
| for(i = 0; i < 4; ++i) { |
| log_info("%lx, ", targetArr[i]); |
| } |
| log_info("\n"); |
| fflush(stdout); */ |
| return 0; |
| } |
| |
| int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState, |
| size_t typeSize, size_t vecWidth) |
| { |
| size_t i; |
| cl_int targetSize = (cl_int)vecWidth; |
| cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut); |
| if (targetSize == 3) |
| { |
| targetSize = 4; // hack for 4-aligned vec3 types |
| } |
| for (i = 0; i < pClState->m_numThreads; ++i) |
| { |
| if (targetArr[i] != targetSize) |
| { |
| vlog_error("Error %ld (of %ld). Expected %d, got %d\n", i, |
| pClState->m_numThreads, targetSize, targetArr[i]); |
| return -1; |
| } |
| } |
| return 0; |
| } |
| |
| // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames |
| // and g_arrVecSizes |
| int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState, |
| size_t totSize, size_t beforeSize) |
| { |
| size_t i; |
| cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut); |
| for (i = 0; i < pClState->m_numThreads; ++i) |
| { |
| if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0) |
| { |
| vlog_error("Error %d (of %d). Expected %d more than a multple of " |
| "%d, got %d \n", |
| i, pClState->m_numThreads, beforeSize, totSize, |
| targetArr[i]); |
| return -1; |
| } |
| } |
| |
| /* log_info("\n"); |
| for(i = 0; i < 4; ++i) { |
| log_info("%lx, ", targetArr[i]); |
| } |
| log_info("\n"); |
| fflush(stdout); */ |
| return 0; |
| } |