| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "procs.h" |
| #include "subhelpers.h" |
| |
| typedef struct |
| { |
| cl_uint maxSubGroupSize; |
| cl_uint numSubGroups; |
| } result_data; |
| |
| |
| int test_sub_group_info(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements, |
| bool useCoreSubgroups) |
| { |
| static const size_t gsize0 = 80; |
| int i, error; |
| size_t realSize; |
| size_t kernel_max_subgroup_size, kernel_subgroup_count; |
| size_t global[] = { gsize0, 14, 10 }; |
| size_t local[] = { 0, 0, 0 }; |
| result_data result[gsize0]; |
| |
| cl_uint max_dimensions; |
| |
| error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, |
| sizeof(max_dimensions), &max_dimensions, NULL); |
| test_error(error, |
| "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); |
| |
| cl_platform_id platform; |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper out; |
| std::stringstream kernel_sstr; |
| if (useCoreSubgroups) |
| { |
| kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; |
| } |
| kernel_sstr |
| << "\n" |
| "typedef struct {\n" |
| " uint maxSubGroupSize;\n" |
| " uint numSubGroups;\n" |
| "} result_data;\n" |
| "\n" |
| "__kernel void query_kernel( __global result_data *outData )\n" |
| "{\n" |
| " int gid = get_global_id( 0 );\n" |
| " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" |
| " outData[gid].numSubGroups = get_num_sub_groups();\n" |
| "}"; |
| |
| const std::string &kernel_str = kernel_sstr.str(); |
| const char *kernel_src = kernel_str.c_str(); |
| error = create_single_kernel_helper(context, &program, &kernel, 1, |
| &kernel_src, "query_kernel"); |
| if (error != 0) return error; |
| |
| // Determine some local dimensions to use for the test. |
| if (max_dimensions == 1) |
| { |
| error = get_max_common_work_group_size(context, kernel, global[0], |
| &local[0]); |
| test_error(error, "get_max_common_work_group_size failed"); |
| } |
| else if (max_dimensions == 2) |
| { |
| error = |
| get_max_common_2D_work_group_size(context, kernel, global, local); |
| test_error(error, "get_max_common_2D_work_group_size failed"); |
| } |
| else |
| { |
| error = |
| get_max_common_3D_work_group_size(context, kernel, global, local); |
| test_error(error, "get_max_common_3D_work_group_size failed"); |
| } |
| |
| error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), |
| (void *)&platform, NULL); |
| test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM"); |
| |
| subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); |
| clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = |
| subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); |
| if (clGetKernelSubGroupInfo_ptr == NULL) |
| { |
| log_error("ERROR: %s function not available", |
| subgroupsApiSet.clGetKernelSubGroupInfo_name); |
| return TEST_FAIL; |
| } |
| |
| error = clGetKernelSubGroupInfo_ptr( |
| kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local), |
| (void *)&local, sizeof(kernel_max_subgroup_size), |
| (void *)&kernel_max_subgroup_size, &realSize); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: %s function error for " |
| "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", |
| subgroupsApiSet.clGetKernelSubGroupInfo_name); |
| return TEST_FAIL; |
| } |
| log_info( |
| "The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", |
| (int)kernel_max_subgroup_size); |
| if (realSize != sizeof(kernel_max_subgroup_size)) |
| { |
| log_error("ERROR: Returned size of max sub group size not valid! " |
| "(Expected %d, got %d)\n", |
| (int)sizeof(kernel_max_subgroup_size), (int)realSize); |
| return TEST_FAIL; |
| } |
| error = clGetKernelSubGroupInfo_ptr( |
| kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local), |
| (void *)&local, sizeof(kernel_subgroup_count), |
| (void *)&kernel_subgroup_count, &realSize); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: %s function error " |
| "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", |
| subgroupsApiSet.clGetKernelSubGroupInfo_name); |
| return TEST_FAIL; |
| } |
| log_info( |
| "The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", |
| (int)kernel_subgroup_count); |
| |
| if (realSize != sizeof(kernel_subgroup_count)) |
| { |
| log_error("ERROR: Returned size of sub group count not valid! " |
| "(Expected %d, got %d)\n", |
| (int)sizeof(kernel_subgroup_count), (int)realSize); |
| return TEST_FAIL; |
| } |
| |
| // Verify that the kernel gets the same max_subgroup_size and subgroup_count |
| out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, |
| &error); |
| test_error(error, "clCreateBuffer failed"); |
| |
| error = clSetKernelArg(kernel, 0, sizeof(out), &out); |
| test_error(error, "clSetKernelArg failed"); |
| |
| error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, |
| local, 0, NULL, NULL); |
| test_error(error, "clEnqueueNDRangeKernel failed"); |
| |
| error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), |
| &result, 0, NULL, NULL); |
| test_error(error, "clEnqueueReadBuffer failed"); |
| |
| error = clFinish(queue); |
| test_error(error, "clFinish failed"); |
| |
| for (i = 0; i < (int)gsize0; ++i) |
| { |
| if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) |
| { |
| log_error("ERROR: get_max_subgroup_size() doesn't match result " |
| "from clGetKernelSubGroupInfoKHR, %u vs %u\n", |
| result[i].maxSubGroupSize, |
| (cl_uint)kernel_max_subgroup_size); |
| return -1; |
| } |
| if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) |
| { |
| log_error("ERROR: get_num_sub_groups() doesn't match result from " |
| "clGetKernelSubGroupInfoKHR, %u vs %u\n", |
| result[i].numSubGroups, (cl_uint)kernel_subgroup_count); |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| int test_sub_group_info_core(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements) |
| { |
| return test_sub_group_info(device, context, queue, num_elements, true); |
| } |
| |
| int test_sub_group_info_ext(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements) |
| { |
| bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); |
| |
| if (!hasExtension) |
| { |
| log_info( |
| "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| |
| return test_sub_group_info(device, context, queue, num_elements, false); |
| } |