| // |
| // Copyright (c) 2017 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| #include "procs.h" |
| #include "harness/conversions.h" |
| #include "harness/typeWrappers.h" |
| |
| struct get_test_data |
| { |
| cl_uint subGroupSize; |
| cl_uint maxSubGroupSize; |
| cl_uint numSubGroups; |
| cl_uint enqNumSubGroups; |
| cl_uint subGroupId; |
| cl_uint subGroupLocalId; |
| bool operator==(get_test_data x) |
| { |
| return subGroupSize == x.subGroupSize |
| && maxSubGroupSize == x.maxSubGroupSize |
| && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId |
| && subGroupLocalId == x.subGroupLocalId; |
| } |
| }; |
| |
| static int check_group(const get_test_data *result, int nw, cl_uint ensg, |
| int maxwgs) |
| { |
| int first = -1; |
| int last = -1; |
| int i, j; |
| cl_uint hit[32]; |
| |
| for (i = 0; i < nw; ++i) |
| { |
| if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0) |
| first = i; |
| if (result[i].subGroupId == result[0].numSubGroups - 1 |
| && result[i].subGroupLocalId == 0) |
| last = i; |
| if (first != -1 && last != -1) break; |
| } |
| |
| if (first == -1 || last == -1) |
| { |
| log_error("ERROR: expected sub group id's are missing\n"); |
| return -1; |
| } |
| |
| // Check them |
| if (result[first].subGroupSize == 0) |
| { |
| log_error("ERROR: get_sub_group_size() returned 0\n"); |
| return -1; |
| } |
| if (result[first].maxSubGroupSize == 0 |
| || result[first].maxSubGroupSize > maxwgs) |
| { |
| log_error( |
| "ERROR: get_max_subgroup_size() returned incorrect result: %u\n", |
| result[first].maxSubGroupSize); |
| return -1; |
| } |
| if (result[first].subGroupSize > result[first].maxSubGroupSize) |
| { |
| log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n"); |
| return -1; |
| } |
| if (result[last].subGroupSize > result[first].subGroupSize) |
| { |
| log_error("ERROR: last sub group larger than first sub group\n"); |
| return -1; |
| } |
| if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) |
| { |
| log_error( |
| "ERROR: get_num_sub_groups() returned incorrect result: %u \n", |
| result[first].numSubGroups); |
| return -1; |
| } |
| |
| memset(hit, 0, sizeof(hit)); |
| for (i = 0; i < nw; ++i) |
| { |
| if (result[i].maxSubGroupSize != result[first].maxSubGroupSize |
| || result[i].numSubGroups != result[first].numSubGroups) |
| { |
| log_error("ERROR: unexpected variation in get_*_sub_group_*()\n"); |
| return -1; |
| } |
| if (result[i].subGroupId >= result[first].numSubGroups) |
| { |
| log_error( |
| "ERROR: get_sub_group_id() returned out of range value: %u\n", |
| result[i].subGroupId); |
| return -1; |
| } |
| if (result[i].enqNumSubGroups != ensg) |
| { |
| log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect " |
| "value: %u\n", |
| result[i].enqNumSubGroups); |
| return -1; |
| } |
| if (result[first].numSubGroups > 1) |
| { |
| if (result[i].subGroupId < result[first].numSubGroups - 1) |
| { |
| if (result[i].subGroupSize != result[first].subGroupSize) |
| { |
| log_error( |
| "ERROR: unexpected variation in get_*_sub_group_*()\n"); |
| return -1; |
| } |
| if (result[i].subGroupLocalId >= result[first].subGroupSize) |
| { |
| log_error("ERROR: get_sub_group_local_id() returned out of " |
| "bounds value: %u \n", |
| result[i].subGroupLocalId); |
| return -1; |
| } |
| } |
| else |
| { |
| if (result[i].subGroupSize != result[last].subGroupSize) |
| { |
| log_error( |
| "ERROR: unexpected variation in get_*_sub_group_*()\n"); |
| return -1; |
| } |
| if (result[i].subGroupLocalId >= result[last].subGroupSize) |
| { |
| log_error("ERROR: get_sub_group_local_id() returned out of " |
| "bounds value: %u \n", |
| result[i].subGroupLocalId); |
| return -1; |
| } |
| } |
| } |
| else |
| { |
| if (result[i].subGroupSize != result[first].subGroupSize) |
| { |
| log_error( |
| "ERROR: unexpected variation in get_*_sub_group_*()\n"); |
| return -1; |
| } |
| if (result[i].subGroupLocalId >= result[first].subGroupSize) |
| { |
| log_error("ERROR: get_sub_group_local_id() returned out of " |
| "bounds value: %u \n", |
| result[i].subGroupLocalId); |
| return -1; |
| } |
| } |
| |
| j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId |
| + (result[i].subGroupLocalId >> 5); |
| if (j < sizeof(hit) / 4) |
| { |
| cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); |
| if ((hit[j] & b) != 0) |
| { |
| log_error("ERROR: get_sub_group_local_id() repeated a result " |
| "in the same sub group\n"); |
| return -1; |
| } |
| hit[j] |= b; |
| } |
| } |
| |
| return 0; |
| } |
| |
| int test_work_item_functions(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements, |
| bool useCoreSubgroups) |
| { |
| static const size_t lsize = 200; |
| int error; |
| int i, j, k, q, r, nw; |
| int maxwgs; |
| cl_uint ensg; |
| size_t global; |
| size_t local; |
| get_test_data result[lsize * 6]; |
| clProgramWrapper program; |
| clKernelWrapper kernel; |
| clMemWrapper out; |
| std::stringstream kernel_sstr; |
| if (useCoreSubgroups) |
| { |
| kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; |
| } |
| kernel_sstr |
| << "\n" |
| "\n" |
| "typedef struct {\n" |
| " uint subGroupSize;\n" |
| " uint maxSubGroupSize;\n" |
| " uint numSubGroups;\n" |
| " uint enqNumSubGroups;\n" |
| " uint subGroupId;\n" |
| " uint subGroupLocalId;\n" |
| "} get_test_data;\n" |
| "\n" |
| "__kernel void get_test( __global get_test_data *outData )\n" |
| "{\n" |
| " int gid = get_global_id( 0 );\n" |
| " outData[gid].subGroupSize = get_sub_group_size();\n" |
| " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n" |
| " outData[gid].numSubGroups = get_num_sub_groups();\n" |
| " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n" |
| " outData[gid].subGroupId = get_sub_group_id();\n" |
| " outData[gid].subGroupLocalId = get_sub_group_local_id();\n" |
| "}"; |
| const std::string &kernel_str = kernel_sstr.str(); |
| const char *kernel_src = kernel_str.c_str(); |
| error = create_single_kernel_helper_with_build_options( |
| context, &program, &kernel, 1, &kernel_src, "get_test", |
| "-cl-std=CL2.0"); |
| if (error != 0) return error; |
| |
| error = get_max_allowed_work_group_size(context, kernel, &local, NULL); |
| if (error != 0) return error; |
| |
| maxwgs = (int)local; |
| |
| // Limit it a bit so we have muliple work groups |
| // Ideally this will still be large enough to give us multiple subgroups |
| if (local > lsize) local = lsize; |
| |
| // Create our buffer |
| out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, |
| &error); |
| test_error(error, "clCreateBuffer failed"); |
| |
| // Set argument |
| error = clSetKernelArg(kernel, 0, sizeof(out), &out); |
| test_error(error, "clSetKernelArg failed"); |
| |
| global = local * 5; |
| |
| // Make sure we have a flexible range |
| global += 3 * local / 4; |
| |
| // Collect the data |
| memset((void *)&result, 0xf0, sizeof(result)); |
| |
| error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), |
| (void *)&result, 0, NULL, NULL); |
| test_error(error, "clEnqueueWriteBuffer failed"); |
| |
| error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, |
| NULL, NULL); |
| test_error(error, "clEnqueueNDRangeKernel failed"); |
| |
| error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), |
| (void *)&result, 0, NULL, NULL); |
| test_error(error, "clEnqueueReadBuffer failed"); |
| |
| error = clFinish(queue); |
| test_error(error, "clFinish failed"); |
| |
| nw = (int)local; |
| ensg = result[0].enqNumSubGroups; |
| |
| // Check the first group |
| error = check_group(result, nw, ensg, maxwgs); |
| if (error) return error; |
| |
| q = (int)global / nw; |
| r = (int)global % nw; |
| |
| // Check the remaining work groups including the last if it is the same size |
| for (k = 1; k < q; ++k) |
| { |
| for (j = 0; j < nw; ++j) |
| { |
| i = k * nw + j; |
| if (!(result[i] == result[i - nw])) |
| { |
| log_error("ERROR: sub group mapping is not identical for all " |
| "work groups\n"); |
| return -1; |
| } |
| } |
| } |
| |
| // Check the last group if it wasn't the same size |
| if (r != 0) |
| { |
| error = check_group(result + q * nw, r, ensg, maxwgs); |
| if (error) return error; |
| } |
| |
| return 0; |
| } |
| |
| int test_work_item_functions_core(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements) |
| { |
| return test_work_item_functions(device, context, queue, num_elements, true); |
| } |
| |
| int test_work_item_functions_ext(cl_device_id device, cl_context context, |
| cl_command_queue queue, int num_elements) |
| { |
| bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); |
| |
| if (!hasExtension) |
| { |
| log_info( |
| "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); |
| return TEST_SKIPPED_ITSELF; |
| } |
| |
| return test_work_item_functions(device, context, queue, num_elements, |
| false); |
| } |