| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "gl_headers.h" |
| |
| #include "testBase.h" |
| #include "setup.h" |
| #include "harness/genericThread.h" |
| |
| #ifndef GLsync |
| // For OpenGL before 3.2, we look for the ARB_sync extension and try to use that |
| #if !defined(_WIN32) |
| #include <inttypes.h> |
| #endif // !_WIN32 |
| typedef int64_t GLint64; |
| typedef uint64_t GLuint64; |
| typedef struct __GLsync *GLsync; |
| |
| typedef GLsync (*glFenceSyncPtr)(GLenum condition,GLbitfield flags); |
| glFenceSyncPtr glFenceSyncFunc; |
| |
| typedef bool (*glIsSyncPtr)(GLsync sync); |
| glIsSyncPtr glIsSyncFunc; |
| |
| typedef void (*glDeleteSyncPtr)(GLsync sync); |
| glDeleteSyncPtr glDeleteSyncFunc; |
| |
| typedef GLenum (*glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); |
| glClientWaitSyncPtr glClientWaitSyncFunc; |
| |
| typedef void (*glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout); |
| glWaitSyncPtr glWaitSyncFunc; |
| |
| typedef void (*glGetInteger64vPtr)(GLenum pname, GLint64 *params); |
| glGetInteger64vPtr glGetInteger64vFunc; |
| |
| typedef void (*glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length, |
| GLint *values); |
| glGetSyncivPtr glGetSyncivFunc; |
| |
| #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError())) |
| |
| static void InitSyncFns( void ) |
| { |
| glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" ); |
| glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" ); |
| glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" ); |
| glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" ); |
| glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" ); |
| glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" ); |
| glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" ); |
| } |
| |
| #define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 |
| |
| #define GL_OBJECT_TYPE 0x9112 |
| #define GL_SYNC_CONDITION 0x9113 |
| #define GL_SYNC_STATUS 0x9114 |
| #define GL_SYNC_FLAGS 0x9115 |
| |
| #define GL_SYNC_FENCE 0x9116 |
| |
| #define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 |
| |
| #define GL_UNSIGNALED 0x9118 |
| #define GL_SIGNALED 0x9119 |
| |
| #define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 |
| |
| #define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull |
| |
| #define GL_ALREADY_SIGNALED 0x911A |
| #define GL_TIMEOUT_EXPIRED 0x911B |
| #define GL_CONDITION_SATISFIED 0x911C |
| #define GL_WAIT_FAILED 0x911D |
| |
| #define USING_ARB_sync 1 |
| #endif |
| |
| typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ; |
| |
| clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr; |
| |
| |
| static const char *updateBuffersKernel[] = { |
| "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n" |
| "{\n" |
| " size_t tid = get_global_id(0);\n" |
| "\n" |
| " size_t xVal = ( tid & ( horizWrap - 1 ) );\n" |
| " vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n" |
| " vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n" |
| "\n" |
| " int rowV = rowIdx + 1;\n" |
| " colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n" |
| " //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n" |
| " colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n" |
| "}\n" }; |
| |
| //Passthrough VertexShader |
| static const char vertexshader[] = |
| "uniform mat4 projMatrix;\n" |
| "attribute vec4 inPosition;\n" |
| "attribute vec4 inColor;\n" |
| "varying vec4 outColor;\n" |
| "void main (void) {\n" |
| " gl_Position = projMatrix*inPosition;\n" |
| " outColor = inColor;\n" |
| "}\n"; |
| |
| //Passthrough FragmentShader |
| static const char fragmentshader[] = |
| "varying vec4 outColor;\n" |
| "void main (void) {\n" |
| " gl_FragColor = outColor;\n" |
| "}\n"; |
| |
| GLuint createShaderProgram(GLint *posLoc, GLint *colLoc) |
| { |
| GLint logLength, status; |
| GLuint program = glCreateProgram(); |
| GLuint vpShader; |
| |
| char* vpstr = (char*)malloc(sizeof(vertexshader)); |
| strcpy(vpstr, vertexshader); |
| |
| vpShader = glCreateShader(GL_VERTEX_SHADER); |
| glShaderSource(vpShader, 1, (const GLchar **)&vpstr, NULL); |
| glCompileShader(vpShader); |
| glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength); |
| if (logLength > 0) { |
| GLchar *log = (GLchar*) malloc(logLength); |
| glGetShaderInfoLog(vpShader, logLength, &logLength, log); |
| log_info("Vtx Shader compile log:\n%s", log); |
| free(log); |
| } |
| |
| free(vpstr); |
| vpstr = NULL; |
| |
| glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status); |
| if (status == 0) |
| { |
| log_error("Failed to compile vtx shader:\n"); |
| return 0; |
| } |
| |
| glAttachShader(program, vpShader); |
| |
| GLuint fpShader; |
| char* fpstr = (char*)malloc(strlen(fragmentshader)); |
| strcpy(fpstr, fragmentshader); |
| fpShader = glCreateShader(GL_FRAGMENT_SHADER); |
| glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL); |
| glCompileShader(fpShader); |
| |
| free(fpstr); |
| fpstr = NULL; |
| |
| glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength); |
| if (logLength > 0) { |
| GLchar *log = (GLchar*)malloc(logLength); |
| glGetShaderInfoLog(fpShader, logLength, &logLength, log); |
| log_info("Frag Shader compile log:\n%s", log); |
| free(log); |
| } |
| |
| glAttachShader(program, fpShader); |
| glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status); |
| if (status == 0) |
| { |
| log_error("Failed to compile frag shader:\n\n"); |
| return 0; |
| } |
| |
| glLinkProgram(program); |
| glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength); |
| if (logLength > 0) { |
| GLchar *log = (GLchar*)malloc(logLength); |
| glGetProgramInfoLog(program, logLength, &logLength, log); |
| log_info("Program link log:\n%s", log); |
| free(log); |
| } |
| |
| glGetProgramiv(program, GL_LINK_STATUS, &status); |
| if (status == 0) |
| { |
| log_error("Failed to link program\n"); |
| return 0; |
| } |
| |
| glValidateProgram(program); |
| glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength); |
| if (logLength > 0) { |
| GLchar *log = (GLchar*)malloc(logLength); |
| glGetProgramInfoLog(program, logLength, &logLength, log); |
| log_info("Program validate log:\n%s", log); |
| free(log); |
| } |
| |
| glGetProgramiv(program, GL_VALIDATE_STATUS, &status); |
| if (status == 0) |
| { |
| log_error("Failed to validate program\n"); |
| return 0; |
| } |
| |
| *posLoc = glGetAttribLocation(program, "inPosition"); |
| *colLoc = glGetAttribLocation(program, "inColor"); |
| |
| return program; |
| } |
| |
| void destroyShaderProgram(GLuint program) |
| { |
| GLuint shaders[2]; |
| GLsizei count; |
| glUseProgram(0); |
| glGetAttachedShaders(program, 2, &count, shaders); |
| int i; |
| for(i = 0; i < count; i++) |
| { |
| glDetachShader(program, shaders[i]); |
| glDeleteShader(shaders[i]); |
| } |
| glDeleteProgram(program); |
| } |
| |
| // This function queues up and runs the above CL kernel that writes the vertex data |
| cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, |
| cl_int rowIdx, cl_event fenceEvent, size_t numThreads ) |
| { |
| cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx ); |
| test_error( error, "Unable to set kernel arguments" ); |
| |
| clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2; |
| int numEvents = ( fenceEvent != NULL ) ? 1 : 0; |
| cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL; |
| |
| error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 ); |
| test_error( error, "Unable to acquire GL obejcts"); |
| error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 ); |
| test_error( error, "Unable to acquire GL obejcts"); |
| |
| cl_event evts[ 2 ] = { acqEvent1, acqEvent2 }; |
| |
| error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent ); |
| test_error( error, "Unable to execute test kernel" ); |
| |
| error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 ); |
| test_error(error, "clEnqueueReleaseGLObjects failed"); |
| error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 ); |
| test_error(error, "clEnqueueReleaseGLObjects failed"); |
| |
| evts[ 0 ] = relEvent1; |
| evts[ 1 ] = relEvent2; |
| error = clWaitForEvents( 2, evts ); |
| test_error( error, "Unable to wait for release events" ); |
| |
| return 0; |
| } |
| |
| class RunThread : public genericThread |
| { |
| public: |
| |
| cl_kernel mKernel; |
| cl_command_queue mQueue; |
| cl_mem mStream0, mStream1; |
| cl_int mRowIdx; |
| cl_event mFenceEvent; |
| size_t mNumThreads; |
| |
| RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads ) |
| : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads ) |
| { |
| } |
| |
| void SetRunData( cl_int rowIdx, cl_event fenceEvent ) |
| { |
| mRowIdx = rowIdx; |
| mFenceEvent = fenceEvent; |
| } |
| |
| virtual void * IRun( void ) |
| { |
| cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads ); |
| return (void *)(intptr_t)error; |
| } |
| }; |
| |
| |
| int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device ) |
| { |
| int error; |
| const int framebufferSize = 512; |
| cl_platform_id platform_id = NULL; |
| |
| if( !is_extension_available( device, "cl_khr_gl_event" ) ) |
| { |
| log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); |
| return 0; |
| } |
| |
| error = clGetDeviceInfo(device, |
| CL_DEVICE_PLATFORM, |
| sizeof(platform_id), |
| &platform_id, |
| NULL); |
| if(error) |
| { |
| return error; |
| } |
| |
| clCreateEventFromGLsyncKHR_ptr = \ |
| (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id,"clCreateEventFromGLsyncKHR"); |
| if( clCreateEventFromGLsyncKHR_ptr == NULL ) |
| { |
| log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" ); |
| clCreateEventFromGLsyncKHR_ptr = \ |
| (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateEventFromGLsyncAPPLE"); |
| return -1; |
| } |
| |
| #ifdef USING_ARB_sync |
| char *gl_version_str = (char*)glGetString( GL_VERSION ); |
| float glCoreVersion; |
| sscanf(gl_version_str, "%f", &glCoreVersion); |
| if( glCoreVersion < 3.0f ) |
| { |
| log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion ); |
| return 0; |
| } |
| |
| #ifdef __APPLE__ |
| CGLContextObj currCtx = CGLGetCurrentContext(); |
| CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx); |
| GLint val, screen; |
| CGLGetVirtualScreen(currCtx, &screen); |
| CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val); |
| if(val != kCGLOGLPVersion_3_2_Core) |
| { |
| log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it" |
| "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion ); |
| return -1; |
| } |
| #else |
| // Need platform specific way to query if current GL context was created with 3.x profile |
| log_error( "ERROR: not implemented\n\n" ); |
| return -1; |
| #endif |
| |
| InitSyncFns(); |
| #endif |
| |
| #ifdef __APPLE__ |
| CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs); |
| #else |
| // Need platform specific way to set device with id rend_vs the current |
| // rendering target |
| log_error( "ERROR: not implemented\n\n" ); |
| return -1; |
| #endif |
| |
| GLint posLoc, colLoc; |
| GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc); |
| if(!shaderprogram) |
| { |
| log_error("Failed to create shader program\n"); |
| return -1; |
| } |
| |
| float l = 0.0f; float r = framebufferSize; |
| float b = 0.0f; float t = framebufferSize; |
| |
| float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f, |
| 0.0f, 2.0f/(t-b), 0.0f, 0.0f, |
| 0.0f, 0.0f, -1.0f, 0.0f, |
| -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f |
| }; |
| |
| glUseProgram(shaderprogram); |
| GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix"); |
| glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix); |
| glUseProgram(0); |
| |
| // Note: the framebuffer is just the target to verify our results against, so we don't |
| // really care to go through all the possible formats in this case |
| glFramebufferWrapper glFramebuffer; |
| glRenderbufferWrapper glRenderbuffer; |
| error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, |
| GL_RGBA, GL_UNSIGNED_BYTE, |
| &glFramebuffer, &glRenderbuffer ); |
| if( error != 0 ) |
| return error; |
| |
| // GLuint vao; |
| // glGenVertexArrays(1, &vao); |
| // glBindVertexArray(vao); |
| |
| glBufferWrapper vtxBuffer, colorBuffer; |
| glGenBuffers( 1, &vtxBuffer ); |
| glGenBuffers( 1, &colorBuffer ); |
| |
| const int numHorizVertices = ( framebufferSize * 64 ) + 1; |
| |
| glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); |
| glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); |
| |
| glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); |
| glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW ); |
| |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper streams[ 2 ]; |
| |
| if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) ) |
| return -1; |
| |
| |
| streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error ); |
| test_error( error, "Unable to create CL buffer from GL vertex buffer" ); |
| |
| streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error ); |
| test_error( error, "Unable to create CL buffer from GL color buffer" ); |
| |
| error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); |
| test_error( error, "Unable to set kernel arguments" ); |
| |
| error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); |
| test_error( error, "Unable to set kernel arguments" ); |
| |
| cl_int horizWrap = (cl_int)framebufferSize; |
| error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap ); |
| test_error( error, "Unable to set kernel arguments" ); |
| |
| glViewport( 0, 0, framebufferSize, framebufferSize ); |
| glClearColor( 0, 0, 0, 0 ); |
| glClear( GL_COLOR_BUFFER_BIT ); |
| glClear( GL_DEPTH_BUFFER_BIT ); |
| glDisable( GL_DEPTH_TEST ); |
| glEnable( GL_BLEND ); |
| glBlendFunc( GL_ONE, GL_ONE ); |
| |
| clEventWrapper fenceEvent; |
| GLsync glFence = 0; |
| |
| // Do a loop through 8 different horizontal stripes against the framebuffer |
| RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices ); |
| |
| for( int i = 0; i < 8; i++ ) |
| { |
| // if current rendering device is not the compute device and |
| // separateThreads == false which means compute is going on same |
| // thread and we are using implicit synchronization (no GLSync obj used) |
| // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need |
| // to wait for rendering to finish on other device before CL can start |
| // writing to CL/GL shared mem objects. When separateThreads is true i.e. |
| // we are using GLSync obj to synchronize then we dont need to call glFinish |
| // here since CL should wait for rendering on other device before this |
| // GLSync object to finish before it starts writing to shared mem object. |
| // Also rend_device == compute_device no need to call glFinish |
| if(rend_device != device && !separateThreads) |
| glFinish(); |
| |
| if( separateThreads ) |
| { |
| thread.SetRunData( (cl_int)i, fenceEvent ); |
| thread.Start(); |
| |
| error = (cl_int)(size_t)thread.Join(); |
| } |
| else |
| { |
| error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices ); |
| } |
| test_error( error, "Unable to run CL kernel" ); |
| |
| glUseProgram(shaderprogram); |
| glEnableVertexAttribArray(posLoc); |
| glEnableVertexAttribArray(colLoc); |
| glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer ); |
| glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); |
| glBindBuffer( GL_ARRAY_BUFFER, colorBuffer ); |
| glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0); |
| glBindBuffer( GL_ARRAY_BUFFER, 0 ); |
| |
| glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 ); |
| |
| glDisableVertexAttribArray(posLoc); |
| glDisableVertexAttribArray(colLoc); |
| glUseProgram(0); |
| |
| if( separateThreads ) |
| { |
| // If we're on the same thread, then we're testing implicit syncing, so we |
| // don't need the actual fence code |
| if( fenceEvent != NULL ) |
| { |
| clReleaseEvent( fenceEvent ); |
| glDeleteSyncFunc( glFence ); |
| } |
| |
| glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 ); |
| fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error ); |
| test_error( error, "Unable to create CL event from GL fence" ); |
| |
| // in case of explicit synchronization, we just wait for the sync object to complete |
| // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility |
| // to flush on the context on which glSync is created |
| glFlush(); |
| } |
| } |
| |
| if( glFence != 0 ) |
| // Don't need the final release for fenceEvent, because the wrapper will take care of that |
| glDeleteSyncFunc( glFence ); |
| |
| #ifdef __APPLE__ |
| CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs); |
| #else |
| // Need platform specific code to set the current rendering device (OpenGL target) |
| // to device with id read_vs so that next glReadPixels get submitted to that device |
| log_error( "ERROR: not implemented\n\n" ); |
| return -1; |
| #endif |
| // Grab the contents of the final framebuffer |
| BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, |
| GL_COLOR_ATTACHMENT0_EXT, |
| GL_RGBA8_OES, GL_UNSIGNED_BYTE, GL_RGBA, GL_UNSIGNED_BYTE, kUChar, |
| framebufferSize, 128 ) ); |
| |
| // Check the contents now. We should end up with solid color bands 32 pixels high and the |
| // full width of the framebuffer, at values (128,128,128) due to the additive blending |
| for( int i = 0; i < 8; i++ ) |
| { |
| for( int y = 0; y < 4; y++ ) |
| { |
| // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass |
| cl_uchar valA = ( ( ( i + 1 ) & 1 ) ) * numHorizVertices * 2 / framebufferSize; |
| cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize; |
| cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize; |
| |
| cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ]; |
| for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ ) |
| { |
| if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) || |
| ( row[ x * 4 + 2 ] != valC ) ) |
| { |
| log_error( "ERROR: Output framebuffer did not validate!\n" ); |
| DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData ); |
| log_error( "RUNS:\n" ); |
| uint32_t *p = (uint32_t *)(char *)resultData; |
| size_t a = 0; |
| for( size_t t = 1; t < framebufferSize * framebufferSize; t++ ) |
| { |
| if( p[ a ] != 0 ) |
| { |
| if( p[ t ] == 0 ) |
| { |
| log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1, |
| (int)( a % framebufferSize ), (int)( a / framebufferSize ), |
| (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ), |
| p[ a ] ); |
| a = t; |
| } |
| } |
| else |
| { |
| if( p[ t ] != 0 ) |
| { |
| a = t; |
| } |
| } |
| |
| } |
| return -1; |
| } |
| } |
| } |
| } |
| |
| glDeleteBuffers( 1, &vtxBuffer ); |
| glDeleteBuffers( 1, &colorBuffer ); |
| destroyShaderProgram(shaderprogram); |
| // glDeleteVertexArrays(1, &vao); |
| return 0; |
| } |
| |
| int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) |
| { |
| GLint vs_count = 0; |
| cl_device_id *device_list = NULL; |
| |
| if( !is_extension_available( device, "cl_khr_gl_event" ) ) |
| { |
| log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" ); |
| return 0; |
| } |
| #ifdef __APPLE__ |
| CGLContextObj ctx = CGLGetCurrentContext(); |
| CGLPixelFormatObj pix = CGLGetPixelFormat(ctx); |
| CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count); |
| |
| device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count); |
| clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL); |
| #else |
| // Need platform specific way of getting devices from CL context to which OpenGL can render |
| // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES |
| log_error( "ERROR: not implemented\n\n" ); |
| return -1; |
| #endif |
| |
| GLint rend_vs, read_vs; |
| int error = 0; |
| int any_failed = 0; |
| |
| // Loop through all the devices capable to OpenGL rendering |
| // and set them as current rendering target |
| for(rend_vs = 0; rend_vs < vs_count; rend_vs++) |
| { |
| // Loop through all the devices and set them as current |
| // compute target |
| for(read_vs = 0; read_vs < vs_count; read_vs++) |
| { |
| cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs]; |
| char rend_name[200], read_name[200]; |
| |
| clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL); |
| clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL); |
| |
| log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name); |
| error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device ); |
| any_failed |= error; |
| if( error != 0 ) |
| log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" ); |
| else |
| log_info("Implicit syncing Passed\n"); |
| |
| error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device ); |
| any_failed |= error; |
| if( error != 0 ) |
| log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" ); |
| else |
| log_info("Explicit syncing Passed\n"); |
| } |
| } |
| |
| free(device_list); |
| |
| return any_failed; |
| } |