| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "procs.h" |
| |
| // Design: |
| // To test sub buffers, we first create one main buffer. We then create several sub-buffers and |
| // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of |
| // what results it expects, and so we can test scaling degrees of Actions on scaling numbers of |
| // sub-buffers. |
| |
| class SubBufferWrapper : public clMemWrapper |
| { |
| public: |
| cl_mem mParentBuffer; |
| size_t mOrigin; |
| size_t mSize; |
| |
| cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size ) |
| { |
| mParentBuffer = parent; |
| mOrigin = origin; |
| mSize = size; |
| |
| cl_buffer_region region; |
| region.origin = mOrigin; |
| region.size = mSize; |
| |
| cl_int error; |
| mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error ); |
| return error; |
| } |
| }; |
| |
| class Action |
| { |
| public: |
| virtual ~Action() {} |
| virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0; |
| virtual const char * GetName( void ) const = 0; |
| |
| static MTdata d; |
| static MTdata GetRandSeed( void ) |
| { |
| if ( d == 0 ) |
| d = init_genrand( gRandomSeed ); |
| return d; |
| } |
| static void FreeRandSeed() { |
| if ( d != 0 ) { |
| free_mtdata(d); |
| d = 0; |
| } |
| } |
| }; |
| |
| MTdata Action::d = 0; |
| |
| class ReadWriteAction : public Action |
| { |
| public: |
| virtual ~ReadWriteAction() {} |
| virtual const char * GetName( void ) const { return "ReadWrite";} |
| |
| virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) |
| { |
| cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize); |
| if (!tempBuffer) { |
| log_error("Out of memory\n"); |
| return -1; |
| } |
| cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL ); |
| test_error( error, "Unable to enqueue buffer read" ); |
| |
| size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() ); |
| size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() ); |
| |
| for ( size_t i = start; i < end; i++ ) |
| { |
| tempBuffer[ i ] |= tag; |
| parentBufferState[ i + buffer1.mOrigin ] |= tag; |
| } |
| |
| error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL ); |
| test_error( error, "Unable to enqueue buffer write" ); |
| free(tempBuffer); |
| return CL_SUCCESS; |
| } |
| }; |
| |
| #ifndef MAX |
| #define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) ) |
| #endif |
| #ifndef MIN |
| #define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) ) |
| #endif |
| |
| class CopyAction : public Action |
| { |
| public: |
| virtual ~CopyAction() {} |
| virtual const char * GetName( void ) const { return "Copy";} |
| |
| virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) |
| { |
| // Copy from sub-buffer 1 to sub-buffer 2 |
| size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() ); |
| |
| size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() ); |
| size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() ); |
| |
| cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL ); |
| test_error( error, "Unable to enqueue buffer copy" ); |
| |
| memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size ); |
| |
| return CL_SUCCESS; |
| } |
| }; |
| |
| class MapAction : public Action |
| { |
| public: |
| virtual ~MapAction() {} |
| virtual const char * GetName( void ) const { return "Map";} |
| |
| virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) |
| { |
| size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() ); |
| size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() ); |
| |
| cl_int error; |
| void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ), |
| start, size, 0, NULL, NULL, &error ); |
| test_error( error, "Unable to map buffer" ); |
| |
| cl_char *cPtr = (cl_char *)mappedPtr; |
| for ( size_t i = 0; i < size; i++ ) |
| { |
| cPtr[ i ] |= tag; |
| parentBufferState[ i + start + buffer1.mOrigin ] |= tag; |
| } |
| |
| error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL ); |
| test_error( error, "Unable to unmap buffer" ); |
| |
| return CL_SUCCESS; |
| } |
| }; |
| |
| class KernelReadWriteAction : public Action |
| { |
| public: |
| virtual ~KernelReadWriteAction() {} |
| virtual const char * GetName( void ) const { return "KernelReadWrite";} |
| |
| virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) |
| { |
| const char *kernelCode[] = { |
| "__kernel void readTest( __global char *inBuffer, char tag )\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " inBuffer[ tid ] |= tag;\n" |
| "}\n" }; |
| |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| cl_int error; |
| |
| if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) ) |
| { |
| return -1; |
| } |
| |
| size_t threads[1] = { buffer1.mSize }; |
| |
| error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 ); |
| test_error( error, "Unable to set kernel argument" ); |
| error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag ); |
| test_error( error, "Unable to set kernel argument" ); |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); |
| test_error( error, "Unable to queue kernel" ); |
| |
| for ( size_t i = 0; i < buffer1.mSize; i++ ) |
| parentBufferState[ i + buffer1.mOrigin ] |= tag; |
| |
| return CL_SUCCESS; |
| } |
| }; |
| |
| cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize ) |
| { |
| cl_ulong maxAllocSize; |
| cl_int error; |
| |
| // Get the largest possible buffer we could allocate |
| error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL ); |
| test_error( error, "Unable to get max alloc size" ); |
| |
| // Don't create a buffer quite that big, just so we have some space left over for other work |
| outSize = (size_t)( maxAllocSize / 5 ); |
| |
| // Cap at 32M so tests complete in a reasonable amount of time. |
| if ( outSize > 32 << 20 ) |
| outSize = 32 << 20; |
| |
| return CL_SUCCESS; |
| } |
| |
| size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index ) |
| { |
| for ( size_t i = 0; i < numSubBuffers; i++ ) |
| { |
| if ( subBuffers[ i ].mOrigin > index ) |
| return numSubBuffers; |
| if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) ) |
| return i; |
| } |
| return numSubBuffers; |
| } |
| |
| // This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers |
| // can't overlap) |
| int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign ) |
| { |
| clMemWrapper mainBuffer; |
| SubBufferWrapper subBuffers[ 8 ]; |
| size_t numSubBuffers; |
| cl_int error; |
| size_t i; |
| MTdata m = init_genrand( 22 ); |
| |
| |
| cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize); |
| cl_char * actualResults = (cl_char*)calloc(1,mainSize); |
| |
| for ( i = 0; i < mainSize / 4; i++ ) |
| ((cl_uint*) mainBufferContents)[i] = genrand_int32(m); |
| |
| free_mtdata( m ); |
| |
| // Create the main buffer to test against |
| mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error ); |
| test_error( error, "Unable to create test main buffer" ); |
| |
| // Create some sub-buffers to use |
| size_t toStartFrom = 0; |
| for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ ) |
| { |
| size_t endRange = toStartFrom + ( mainSize / 4 ); |
| if ( endRange > mainSize ) |
| endRange = mainSize; |
| |
| size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign; |
| size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign; |
| error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size ); |
| test_error( error, "Unable to allocate sub buffer" ); |
| |
| toStartFrom = offset + size; |
| if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) ) |
| break; |
| } |
| |
| ReadWriteAction rwAction; |
| MapAction mapAction; |
| CopyAction copyAction; |
| KernelReadWriteAction kernelAction; |
| |
| Action * actions[] = { &rwAction, &mapAction, ©Action, &kernelAction }; |
| int numErrors = 0; |
| |
| // Do the following steps twice, to make sure the parent gets updated *and* we can |
| // still work on the sub-buffers |
| cl_command_queue prev_queue = queueA; |
| for ( int time = 0; time < 2; time++ ) |
| { |
| // Randomly apply actions to the set of sub buffers |
| size_t i; |
| for ( i = 0; i < 64; i++ ) |
| { |
| int which = random_in_range( 0, 3, Action::GetRandSeed() ); |
| int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() ); |
| int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() ); |
| int whichBufferB; |
| do |
| { |
| whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() ); |
| } while ( whichBufferB == whichBufferA ); |
| |
| cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA; |
| if (queue != prev_queue) { |
| error = clFinish( prev_queue ); |
| test_error( error, "Error finishing other queue." ); |
| |
| prev_queue = queue; |
| } |
| |
| error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents ); |
| test_error( error, "Unable to execute action against sub buffers" ); |
| } |
| |
| error = clFinish( queueA ); |
| test_error( error, "Error finishing queueA." ); |
| |
| error = clFinish( queueB ); |
| test_error( error, "Error finishing queueB." ); |
| |
| // Validate by reading the final contents of the main buffer and |
| // validating against our ref copy we generated |
| error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL ); |
| test_error( error, "Unable to enqueue buffer read" ); |
| |
| for ( i = 0; i < mainSize; i += 65536 ) |
| { |
| size_t left = 65536; |
| if ( ( i + left ) > mainSize ) |
| left = mainSize - i; |
| |
| if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 ) |
| continue; |
| |
| // The fast compare failed, so we need to determine where exactly the failure is |
| |
| for ( size_t j = 0; j < left; j++ ) |
| { |
| if ( actualResults[ i + j ] != mainBufferContents[ i + j ] ) |
| { |
| // Hit a failure; report the subbuffer at this address as having failed |
| size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j ); |
| if ( sbThatFailed == numSubBuffers ) |
| { |
| log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize ); |
| // Since this is a nonsensical, don't bother continuing to check |
| // (we will, however, print our map of sub-buffers for comparison) |
| for ( size_t k = 0; k < numSubBuffers; k++ ) |
| { |
| log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize ); |
| } |
| return -1; |
| } |
| log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize ); |
| size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1; |
| i = newPos & ~65535; |
| j = newPos - i; |
| numErrors++; |
| } |
| } |
| } |
| } |
| |
| free(mainBufferContents); |
| free(actualResults); |
| Action::FreeRandSeed(); |
| |
| return numErrors; |
| } |
| |
| int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) |
| { |
| cl_int error; |
| size_t mainSize; |
| cl_uint addressAlignBits; |
| |
| // Get the size of the main buffer to use |
| error = get_reasonable_buffer_size( deviceID, mainSize ); |
| test_error( error, "Unable to get reasonable buffer size" ); |
| |
| // Determine the alignment of the device so we can make sure sub buffers are valid |
| error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL ); |
| test_error( error, "Unable to get device's address alignment" ); |
| |
| size_t addressAlign = addressAlignBits/8; |
| |
| return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign ); |
| } |
| |
| // This test performs the same basic operations as sub_buffers_read_write, but instead of a single |
| // device, it creates a context and buffer shared between two devices, then executes commands |
| // on queues for each device to ensure that everything still operates as expected. |
| int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) |
| { |
| cl_int error; |
| |
| |
| // First obtain the second device |
| cl_device_id otherDevice = GetOpposingDevice( deviceID ); |
| if ( otherDevice == NULL ) |
| { |
| log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" ); |
| return -1; |
| } |
| if ( otherDevice == deviceID ) |
| { |
| log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" ); |
| return 0; |
| } |
| |
| // Determine the device id. |
| size_t param_size; |
| error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, ¶m_size ); |
| test_error( error, "Error obtaining device name" ); |
| |
| #if !(defined(_WIN32) && defined(_MSC_VER)) |
| char device_name[param_size]; |
| #else |
| char* device_name = (char*)_malloca(param_size); |
| #endif |
| error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL ); |
| test_error( error, "Error obtaining device name" ); |
| |
| log_info( "\tOther device obtained for dual device test is type %s\n", device_name ); |
| |
| // Create a shared context for these two devices |
| cl_device_id devices[ 2 ] = { deviceID, otherDevice }; |
| clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error ); |
| test_error( error, "Unable to create shared context" ); |
| |
| // Create two queues (can't use the existing one, because it's on the wrong context) |
| clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error ); |
| test_error( error, "Unable to create command queue on main device" ); |
| |
| clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error ); |
| test_error( error, "Unable to create command queue on secondary device" ); |
| |
| // Determine the reasonable buffer size and address alignment that applies to BOTH devices |
| size_t maxBuffer1, maxBuffer2; |
| error = get_reasonable_buffer_size( deviceID, maxBuffer1 ); |
| test_error( error, "Unable to get buffer size for main device" ); |
| |
| error = get_reasonable_buffer_size( otherDevice, maxBuffer2 ); |
| test_error( error, "Unable to get buffer size for secondary device" ); |
| maxBuffer1 = MIN( maxBuffer1, maxBuffer2 ); |
| |
| cl_uint addressAlign1Bits, addressAlign2Bits; |
| error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL ); |
| test_error( error, "Unable to get main device's address alignment" ); |
| |
| error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL ); |
| test_error( error, "Unable to get secondary device's address alignment" ); |
| |
| cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8; |
| |
| // Finally time to run! |
| return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 ); |
| } |
| |
| cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults ) |
| { |
| const char *kernelCode[] = { |
| "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n" |
| "{\n" |
| " int tid = get_global_id(0);\n" |
| " outBuffer[ tid ] = inBuffer[ tid ];\n" |
| "}\n" }; |
| |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| cl_int error; |
| |
| if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) ) |
| { |
| return -1; |
| } |
| |
| size_t threads[1] = { length }; |
| |
| clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error ); |
| test_error( error, "Unable to create output stream" ); |
| |
| error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer ); |
| test_error( error, "Unable to set kernel argument" ); |
| error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream ); |
| test_error( error, "Unable to set kernel argument" ); |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); |
| test_error( error, "Unable to queue kernel" ); |
| |
| error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL ); |
| test_error( error, "Unable to read results from kernel" ); |
| |
| return CL_SUCCESS; |
| } |
| |
| |
| int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) |
| { |
| cl_int error; |
| size_t mainSize; |
| cl_uint addressAlign; |
| |
| clMemWrapper mainBuffer; |
| SubBufferWrapper subBuffers[ 16 ]; |
| |
| |
| // Create the main buffer to test against |
| error = get_reasonable_buffer_size( deviceID, mainSize ); |
| test_error( error, "Unable to get reasonable buffer size" ); |
| |
| mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error ); |
| test_error( error, "Unable to create test main buffer" ); |
| |
| // Determine the alignment of the device so we can make sure sub buffers are valid |
| error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL ); |
| test_error( error, "Unable to get device's address alignment" ); |
| |
| // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap) |
| for ( size_t i = 0; i < 16; i++ ) |
| { |
| size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign; |
| size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign; |
| |
| error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size ); |
| test_error( error, "Unable to allocate sub buffer" ); |
| } |
| |
| /// For logging, we determine the amount of overlap we just generated |
| // Build a fast in-out map to help with generating the stats |
| int sbMap[ 32 ], mapSize = 0; |
| for ( int i = 0; i < 16; i++ ) |
| { |
| int j; |
| for ( j = 0; j < mapSize; j++ ) |
| { |
| size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize ) |
| : subBuffers[ sbMap[ j ] ].mOrigin; |
| if ( subBuffers[ i ].mOrigin < pt ) |
| { |
| // Origin is before this part of the map, so move map forward so we can insert |
| memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) ); |
| sbMap[ j ] = i; |
| mapSize++; |
| break; |
| } |
| } |
| if ( j == mapSize ) |
| { |
| sbMap[ j ] = i; |
| mapSize++; |
| } |
| |
| size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize; |
| for ( j = 0; j < mapSize; j++ ) |
| { |
| size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize ) |
| : subBuffers[ sbMap[ j ] ].mOrigin; |
| if ( endPt < pt ) |
| { |
| // Origin is before this part of the map, so move map forward so we can insert |
| memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) ); |
| sbMap[ j ] = -( i + 1 ); |
| mapSize++; |
| break; |
| } |
| } |
| if ( j == mapSize ) |
| { |
| sbMap[ j ] = -( i + 1 ); |
| mapSize++; |
| } |
| } |
| long long delta = 0; |
| size_t maxOverlap = 1, overlap = 0; |
| for ( int i = 0; i < 32; i++ ) |
| { |
| if ( sbMap[ i ] >= 0 ) |
| { |
| overlap++; |
| if ( overlap > 1 ) |
| delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin ); |
| if ( overlap > maxOverlap ) |
| maxOverlap = overlap; |
| } |
| else |
| { |
| if ( overlap > 1 ) |
| delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize ); |
| overlap--; |
| } |
| } |
| |
| log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n", |
| 16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap ); |
| |
| // Write some random contents to the main buffer |
| cl_char * contents = new cl_char[ mainSize ]; |
| generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents ); |
| |
| error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL ); |
| test_error( error, "Unable to write to main buffer" ); |
| |
| // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents |
| cl_char * tempBuffer = new cl_char[ mainSize ]; |
| |
| int numErrors = 0; |
| for ( size_t i = 0; i < 16; i++ ) |
| { |
| // Read from this buffer |
| int which = random_in_range( 0, 1, Action::GetRandSeed() ); |
| if ( which ) |
| error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL ); |
| else |
| error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer ); |
| test_error( error, "Unable to read sub buffer contents" ); |
| |
| if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 ) |
| { |
| log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i ); |
| numErrors++; |
| } |
| } |
| |
| delete [] contents; |
| delete [] tempBuffer; |
| Action::FreeRandSeed(); |
| |
| return numErrors; |
| } |
| |