| // |
| // Copyright (c) 2020 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include <stdio.h> |
| #include <string.h> |
| #include "harness/testHarness.h" |
| #include "harness/typeWrappers.h" |
| |
| #include <vector> |
| |
| #include "procs.h" |
| #include "utils.h" |
| #include <time.h> |
| |
| static int max_nestingLevel = 10; |
| |
| static const char* enqueue_multi_level = R"( |
| void block_fn(__global int* res, int level) |
| { |
| queue_t def_q = get_default_queue(); |
| if(--level < 0) return; |
| void (^kernelBlock)(void) = ^{ block_fn(res, level); }; |
| ndrange_t ndrange = ndrange_1D(1); |
| int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); |
| if(enq_res != CLK_SUCCESS) { (*res) = -1; return; } |
| else if(*res != -1) { (*res)++; } |
| } |
| kernel void enqueue_multi_level(__global int* res, int level) |
| { |
| *res = 0; |
| block_fn(res, level); |
| })"; |
| |
| int test_enqueue_profiling(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements) |
| { |
| cl_int err_ret, res = 0; |
| clCommandQueueWrapper dev_queue; |
| clCommandQueueWrapper host_queue; |
| |
| cl_uint maxQueueSize = 0; |
| err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, |
| sizeof(maxQueueSize), &maxQueueSize, 0); |
| test_error(err_ret, |
| "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed"); |
| |
| cl_queue_properties dev_queue_prop_def[] = { |
| CL_QUEUE_PROPERTIES, |
| CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE |
| | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE, |
| CL_QUEUE_SIZE, maxQueueSize, 0 |
| }; |
| |
| dev_queue = clCreateCommandQueueWithProperties( |
| context, device, dev_queue_prop_def, &err_ret); |
| test_error(err_ret, |
| "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_" |
| "DEFAULT) failed"); |
| |
| cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES, |
| CL_QUEUE_PROFILING_ENABLE, |
| 0 }; |
| |
| host_queue = clCreateCommandQueueWithProperties( |
| context, device, host_queue_prop_def, &err_ret); |
| test_error(err_ret, |
| "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_" |
| "DEFAULT) failed"); |
| |
| cl_int status; |
| size_t size = 1; |
| cl_int result = 0; |
| |
| clMemWrapper res_mem; |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| |
| cl_event kernel_event; |
| |
| err_ret = create_single_kernel_helper(context, &program, &kernel, 1, |
| &enqueue_multi_level, |
| "enqueue_multi_level"); |
| if (check_error(err_ret, "Create single kernel failed")) return -1; |
| |
| res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, |
| sizeof(result), &result, &err_ret); |
| test_error(err_ret, "clCreateBuffer() failed"); |
| |
| err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem); |
| test_error(err_ret, "clSetKernelArg(0) failed"); |
| |
| for (int level = 0; level < max_nestingLevel; level++) |
| { |
| err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level); |
| test_error(err_ret, "clSetKernelArg(1) failed"); |
| |
| err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size, |
| &size, 0, NULL, &kernel_event); |
| test_error(err_ret, |
| "clEnqueueNDRangeKernel('enqueue_multi_level') failed"); |
| |
| err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0, |
| sizeof(result), &result, 0, NULL, NULL); |
| test_error(err_ret, "clEnqueueReadBuffer() failed"); |
| |
| if (result != level) |
| { |
| log_error("Kernel execution should return the maximum nesting " |
| " level (got %d instead of %d)", |
| result, level); |
| return -1; |
| } |
| |
| err_ret = |
| clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, |
| sizeof(status), &status, NULL); |
| test_error(err_ret, "clGetEventInfo() failed"); |
| |
| if (check_error(status, "Kernel execution status %d", status)) |
| return status; |
| |
| cl_ulong end; |
| err_ret = clGetEventProfilingInfo( |
| kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL); |
| test_error(err_ret, "clGetEventProfilingInfo() failed"); |
| |
| cl_ulong complete; |
| err_ret = |
| clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE, |
| sizeof(complete), &complete, NULL); |
| test_error(err_ret, "clGetEventProfilingInfo() failed"); |
| |
| if (end > complete) |
| { |
| log_error( |
| "Profiling END should be smaller than or equal to COMPLETE for " |
| "kernels that use the on-device queue"); |
| return -1; |
| } |
| |
| log_info("Profiling info for '%s' kernel is OK for level %d.\n", |
| "enqueue_multi_level", level); |
| |
| clReleaseEvent(kernel_event); |
| } |
| |
| return res; |
| } |