blob: 6ccdcc6e6fc584cecbdc635d89fa96ad231725cc [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "crc32.h"
#include "kernelHelpers.h"
#include "deviceInfo.h"
#include "errorHelpers.h"
#include "imageHelpers.h"
#include "typeWrappers.h"
#include "testHarness.h"
#include "parseParameters.h"
#include <cassert>
#include <vector>
#include <string>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <mutex>
#include <algorithm>
#if defined(_WIN32)
std::string slash = "\\";
#else
std::string slash = "/";
#endif
static std::mutex gCompilerMutex;
static cl_int get_first_device_id(const cl_context context,
cl_device_id &device);
long get_file_size(const std::string &fileName)
{
std::ifstream ifs(fileName.c_str(), std::ios::binary);
if (!ifs.good()) return 0;
// get length of file:
ifs.seekg(0, std::ios::end);
std::ios::pos_type length = ifs.tellg();
return static_cast<long>(length);
}
static std::string get_kernel_content(unsigned int numKernelLines,
const char *const *kernelProgram)
{
std::string kernel;
for (size_t i = 0; i < numKernelLines; ++i)
{
std::string chunk(kernelProgram[i], 0, std::string::npos);
kernel += chunk;
}
return kernel;
}
std::string get_kernel_name(const std::string &source)
{
// Create list of kernel names
std::string kernelsList;
size_t kPos = source.find("kernel");
while (kPos != std::string::npos)
{
// check for '__kernel'
size_t pos = kPos;
if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
pos -= 2;
// check character before 'kernel' (white space expected)
size_t wsPos = source.find_last_of(" \t\r\n", pos);
if (wsPos == std::string::npos || wsPos + 1 == pos)
{
// check character after 'kernel' (white space expected)
size_t akPos = kPos + sizeof("kernel") - 1;
wsPos = source.find_first_of(" \t\r\n", akPos);
if (!(wsPos == akPos))
{
kPos = source.find("kernel", kPos + 1);
continue;
}
bool attributeFound;
do
{
attributeFound = false;
// find '(' after kernel name name
size_t pPos = source.find("(", akPos);
if (!(pPos != std::string::npos)) continue;
// check for not empty kernel name before '('
pos = source.find_last_not_of(" \t\r\n", pPos - 1);
if (!(pos != std::string::npos && pos > akPos)) continue;
// find character before kernel name
wsPos = source.find_last_of(" \t\r\n", pos);
if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
std::string name =
source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
// check for kernel attribute
if (name == "__attribute__")
{
attributeFound = true;
int pCount = 1;
akPos = pPos + 1;
while (pCount > 0 && akPos != std::string::npos)
{
akPos = source.find_first_of("()", akPos + 1);
if (akPos != std::string::npos)
{
if (source[akPos] == '(')
pCount++;
else
pCount--;
}
}
}
else
{
kernelsList += name + ".";
}
} while (attributeFound);
}
kPos = source.find("kernel", kPos + 1);
}
std::ostringstream oss;
if (MAX_LEN_FOR_KERNEL_LIST > 0)
{
if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
{
kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
kernelsList[kernelsList.size() - 1] = '.';
kernelsList[kernelsList.size() - 1] = '.';
}
oss << kernelsList;
}
return oss.str();
}
static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)
{
switch (compilationMode)
{
default: assert(0 && "Invalid compilation mode"); abort();
case kOnline:
assert(0 && "Invalid compilation mode for offline compilation");
abort();
case kBinary: return "binary";
case kSpir_v: return "SPIR-V";
}
}
static std::string get_unique_filename_prefix(unsigned int numKernelLines,
const char *const *kernelProgram,
const char *buildOptions)
{
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
std::string kernelName = get_kernel_name(kernel);
cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
std::ostringstream oss;
oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
<< kernelCrc;
if (buildOptions)
{
cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
oss << '.' << std::hex << std::setfill('0') << std::setw(8)
<< bOptionsCrc;
}
return oss.str();
}
static std::string
get_cl_build_options_filename_with_path(const std::string &filePath,
const std::string &fileNamePrefix)
{
return filePath + slash + fileNamePrefix + ".options";
}
static std::string
get_cl_source_filename_with_path(const std::string &filePath,
const std::string &fileNamePrefix)
{
return filePath + slash + fileNamePrefix + ".cl";
}
static std::string
get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
const std::string &filePath,
const std::string &fileNamePrefix)
{
std::string binaryFilename = filePath + slash + fileNamePrefix;
if (kSpir_v == mode)
{
std::ostringstream extension;
extension << ".spv" << deviceAddrSpaceSize;
binaryFilename += extension.str();
}
return binaryFilename;
}
static bool file_exist_on_disk(const std::string &filePath,
const std::string &fileName)
{
std::string fileNameWithPath = filePath + slash + fileName;
bool exist = false;
std::ifstream ifs;
ifs.open(fileNameWithPath.c_str(), std::ios::binary);
if (ifs.good()) exist = true;
ifs.close();
return exist;
}
static bool should_save_kernel_source_to_disk(CompilationMode mode,
CompilationCacheMode cacheMode,
const std::string &binaryPath,
const std::string &binaryName)
{
bool saveToDisk = false;
if (cacheMode == kCacheModeDumpCl
|| (cacheMode == kCacheModeOverwrite && mode != kOnline))
{
saveToDisk = true;
}
if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
{
saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
}
return saveToDisk;
}
static int save_kernel_build_options_to_disk(const std::string &path,
const std::string &prefix,
const char *buildOptions)
{
std::string filename =
get_cl_build_options_filename_with_path(path, prefix);
std::ofstream ofs(filename.c_str(), std::ios::binary);
if (!ofs.good())
{
log_info("Can't save kernel build options: %s\n", filename.c_str());
return -1;
}
ofs.write(buildOptions, strlen(buildOptions));
ofs.close();
log_info("Saved kernel build options to file: %s\n", filename.c_str());
return CL_SUCCESS;
}
static int save_kernel_source_to_disk(const std::string &path,
const std::string &prefix,
const std::string &source)
{
std::string filename = get_cl_source_filename_with_path(path, prefix);
std::ofstream ofs(filename.c_str(), std::ios::binary);
if (!ofs.good())
{
log_info("Can't save kernel source: %s\n", filename.c_str());
return -1;
}
ofs.write(source.c_str(), source.size());
ofs.close();
log_info("Saved kernel source to file: %s\n", filename.c_str());
return CL_SUCCESS;
}
static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
const char *const *kernelProgram,
const char *buildOptions)
{
int error;
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
std::string kernelNamePrefix =
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
// save kernel source to disk
error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
kernel);
// save kernel build options to disk if exists
if (buildOptions != NULL)
error |= save_kernel_build_options_to_disk(
gCompilationCachePath, kernelNamePrefix, buildOptions);
return error;
}
static std::string
get_compilation_mode_str(const CompilationMode compilationMode)
{
switch (compilationMode)
{
default: assert(0 && "Invalid compilation mode"); abort();
case kOnline: return "online";
case kBinary: return "binary";
case kSpir_v: return "spir-v";
}
}
#ifdef KHRONOS_OFFLINE_COMPILER
static std::string
get_khronos_compiler_command(const cl_uint device_address_space_size,
const bool openclCXX, const std::string &bOptions,
const std::string &sourceFilename,
const std::string &outputFilename)
{
// Set compiler options
// Emit SPIR-V
std::string compilerOptions = " -cc1 -emit-spirv";
// <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V
// use spir64-unknown-unknown.
if (device_address_space_size == 32)
{
compilerOptions += " -triple=spir-unknown-unknown";
}
else
{
compilerOptions += " -triple=spir64-unknown-unknown";
}
// Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by
// Khronos)
if (openclCXX)
{
compilerOptions = compilerOptions + " -cl-std=c++";
}
// Set correct includes
if (openclCXX)
{
compilerOptions += " -I ";
compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
}
else
{
compilerOptions += " -include opencl.h";
}
#ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS
compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS);
#endif
// Add build options passed to this function
compilerOptions += " " + bOptions;
compilerOptions += " " + sourceFilename + " -o " + outputFilename;
std::string runString =
STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
return runString;
}
#endif // KHRONOS_OFFLINE_COMPILER
static cl_int get_cl_device_info_str(const cl_device_id device,
const cl_uint device_address_space_size,
const CompilationMode compilationMode,
std::string &clDeviceInfo)
{
std::string extensionsString = get_device_extensions_string(device);
std::string versionString = get_device_version_string(device);
std::ostringstream clDeviceInfoStream;
std::string file_type =
get_offline_compilation_file_type_str(compilationMode);
clDeviceInfoStream << "# OpenCL device info affecting " << file_type
<< " offline compilation:" << std::endl
<< "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
<< std::endl
<< "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
<< std::endl;
/* We only need the device's supported IL version(s) when compiling IL
* that will be loaded with clCreateProgramWithIL() */
if (compilationMode == kSpir_v)
{
std::string ilVersionString = get_device_il_version_string(device);
clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
<< "\"" << std::endl;
}
clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
<< std::endl;
clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
<< (0 == checkForImageSupport(device)) << std::endl;
clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
<< "\"" << std::endl;
clDeviceInfo = clDeviceInfoStream.str();
return CL_SUCCESS;
}
static int write_cl_device_info(const cl_device_id device,
const cl_uint device_address_space_size,
const CompilationMode compilationMode,
std::string &clDeviceInfoFilename)
{
std::string clDeviceInfo;
int error = get_cl_device_info_str(device, device_address_space_size,
compilationMode, clDeviceInfo);
if (error != CL_SUCCESS)
{
return error;
}
cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
/* Get the filename for the clDeviceInfo file.
* Note: the file includes the hash on its content, so it is usually
* unnecessary to delete it. */
std::ostringstream clDeviceInfoFilenameStream;
clDeviceInfoFilenameStream << gCompilationCachePath << slash
<< "clDeviceInfo-";
clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
<< crc << ".txt";
clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
{
/* The CL device info file has already been created.
* Nothing to do. */
return 0;
}
/* The file does not exist or its length is not as expected.
* Create/overwrite it. */
std::ofstream ofs(clDeviceInfoFilename);
if (!ofs.good())
{
log_info("OfflineCompiler: can't create CL device info file: %s\n",
clDeviceInfoFilename.c_str());
return -1;
}
ofs << clDeviceInfo;
ofs.close();
return CL_SUCCESS;
}
static std::string get_offline_compilation_command(
const cl_uint device_address_space_size,
const CompilationMode compilationMode, const std::string &bOptions,
const std::string &sourceFilename, const std::string &outputFilename,
const std::string &clDeviceInfoFilename)
{
std::ostringstream wrapperOptions;
wrapperOptions << gCompilationProgram
<< " --mode=" << get_compilation_mode_str(compilationMode)
<< " --source=" << sourceFilename
<< " --output=" << outputFilename
<< " --cl-device-info=" << clDeviceInfoFilename;
if (bOptions != "")
{
// Add build options passed to this function
wrapperOptions << " -- " << bOptions;
}
return wrapperOptions.str();
}
static int invoke_offline_compiler(const cl_device_id device,
const cl_uint device_address_space_size,
const CompilationMode compilationMode,
const std::string &bOptions,
const std::string &sourceFilename,
const std::string &outputFilename,
const bool openclCXX)
{
std::string runString;
if (openclCXX)
{
#ifndef KHRONOS_OFFLINE_COMPILER
log_error("CL C++ compilation is not possible: "
"KHRONOS_OFFLINE_COMPILER was not defined.\n");
return CL_INVALID_OPERATION;
#else
if (compilationMode != kSpir_v)
{
log_error("Compilation mode must be SPIR-V for Khronos compiler");
return -1;
}
runString = get_khronos_compiler_command(
device_address_space_size, openclCXX, bOptions, sourceFilename,
outputFilename);
#endif
}
else
{
std::string clDeviceInfoFilename;
// See cl_offline_compiler-interface.txt for a description of the
// format of the CL device information file generated below, and
// the internal command line interface for invoking the offline
// compiler.
cl_int err =
write_cl_device_info(device, device_address_space_size,
compilationMode, clDeviceInfoFilename);
if (err != CL_SUCCESS)
{
log_error("Failed writing CL device info file\n");
return err;
}
runString = get_offline_compilation_command(
device_address_space_size, compilationMode, bOptions,
sourceFilename, outputFilename, clDeviceInfoFilename);
}
// execute script
log_info("Executing command: %s\n", runString.c_str());
fflush(stdout);
int returnCode = system(runString.c_str());
if (returnCode != 0)
{
log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
return CL_COMPILE_PROGRAM_FAILURE;
}
return CL_SUCCESS;
}
static cl_int get_first_device_id(const cl_context context,
cl_device_id &device)
{
cl_uint numDevices = 0;
cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
sizeof(cl_uint), &numDevices, NULL);
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
if (numDevices == 0)
{
log_error("ERROR: No CL devices found\n");
return -1;
}
std::vector<cl_device_id> devices(numDevices, 0);
error =
clGetContextInfo(context, CL_CONTEXT_DEVICES,
numDevices * sizeof(cl_device_id), &devices[0], NULL);
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
device = devices[0];
return CL_SUCCESS;
}
static cl_int get_device_address_bits(const cl_device_id device,
cl_uint &device_address_space_size)
{
cl_int error =
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
&device_address_space_size, NULL);
test_error(error, "Unable to obtain device address bits");
if (device_address_space_size != 32 && device_address_space_size != 64)
{
log_error("ERROR: Unexpected number of device address bits: %u\n",
device_address_space_size);
return -1;
}
return CL_SUCCESS;
}
static int get_offline_compiler_output(
std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
const bool openclCXX, const CompilationMode compilationMode,
const std::string &bOptions, const std::string &kernelPath,
const std::string &kernelNamePrefix)
{
std::string sourceFilename =
get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
std::string outputFilename = get_binary_filename_with_path(
compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
ifs.open(outputFilename.c_str(), std::ios::binary);
if (!ifs.good())
{
std::string file_type =
get_offline_compilation_file_type_str(compilationMode);
if (gCompilationCacheMode == kCacheModeForceRead)
{
log_info("OfflineCompiler: can't open cached %s file: %s\n",
file_type.c_str(), outputFilename.c_str());
return -1;
}
else
{
int error = invoke_offline_compiler(
device, deviceAddrSpaceSize, compilationMode, bOptions,
sourceFilename, outputFilename, openclCXX);
if (error != CL_SUCCESS) return error;
// read output file
ifs.open(outputFilename.c_str(), std::ios::binary);
if (!ifs.good())
{
log_info("OfflineCompiler: can't read generated %s file: %s\n",
file_type.c_str(), outputFilename.c_str());
return -1;
}
}
}
return CL_SUCCESS;
}
static int create_single_kernel_helper_create_program_offline(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char *const *kernelProgram,
const char *buildOptions, const bool openclCXX,
CompilationMode compilationMode)
{
if (kCacheModeDumpCl == gCompilationCacheMode)
{
return -1;
}
// Get device CL_DEVICE_ADDRESS_BITS
int error;
cl_uint device_address_space_size = 0;
if (device == NULL)
{
error = get_first_device_id(context, device);
test_error(error, "Failed to get device ID for first device");
}
error = get_device_address_bits(device, device_address_space_size);
if (error != CL_SUCCESS) return error;
// set build options
std::string bOptions;
bOptions += buildOptions ? std::string(buildOptions) : "";
std::string kernelName =
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
std::ifstream ifs;
error = get_offline_compiler_output(ifs, device, device_address_space_size,
openclCXX, compilationMode, bOptions,
gCompilationCachePath, kernelName);
if (error != CL_SUCCESS) return error;
// -----------------------------------------------------------------------------------
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
// ------------------
// -----------------------------------------------------------------------------------
// Only OpenCL C++ to SPIR-V compilation
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
if (openclCXX)
{
return CL_SUCCESS;
}
#endif
ifs.seekg(0, ifs.end);
int length = ifs.tellg();
ifs.seekg(0, ifs.beg);
// treat modifiedProgram as input for clCreateProgramWithBinary
if (compilationMode == kBinary)
{
// read binary from file:
std::vector<unsigned char> modifiedKernelBuf(length);
ifs.read((char *)&modifiedKernelBuf[0], length);
ifs.close();
size_t lengths = modifiedKernelBuf.size();
const unsigned char *binaries = { &modifiedKernelBuf[0] };
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
"clCreateProgramWithBinary\n");
*outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
&binaries, NULL, &error);
if (*outProgram == NULL || error != CL_SUCCESS)
{
print_error(error, "clCreateProgramWithBinary failed");
return error;
}
}
// treat modifiedProgram as input for clCreateProgramWithIL
else if (compilationMode == kSpir_v)
{
// read spir-v from file:
std::vector<unsigned char> modifiedKernelBuf(length);
ifs.read((char *)&modifiedKernelBuf[0], length);
ifs.close();
size_t length = modifiedKernelBuf.size();
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
"clCreateProgramWithIL\n");
if (gCoreILProgram)
{
*outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
length, &error);
}
else
{
cl_platform_id platform;
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
sizeof(cl_platform_id), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
clGetExtensionFunctionAddressForPlatform(
platform, "clCreateProgramWithILKHR");
if (clCreateProgramWithILKHR == NULL)
{
log_error(
"ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
return -1;
}
*outProgram = clCreateProgramWithILKHR(
context, &modifiedKernelBuf[0], length, &error);
}
if (*outProgram == NULL || error != CL_SUCCESS)
{
if (gCoreILProgram)
{
print_error(error, "clCreateProgramWithIL failed");
}
else
{
print_error(error, "clCreateProgramWithILKHR failed");
}
return error;
}
}
return CL_SUCCESS;
}
static int create_single_kernel_helper_create_program(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram,
const char *buildOptions, const bool openclCXX,
CompilationMode compilationMode)
{
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
std::string filePrefix =
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
bool shouldSaveToDisk = should_save_kernel_source_to_disk(
compilationMode, gCompilationCacheMode, gCompilationCachePath,
filePrefix);
if (shouldSaveToDisk)
{
if (CL_SUCCESS
!= save_kernel_source_and_options_to_disk(
numKernelLines, kernelProgram, buildOptions))
{
log_error("Unable to dump kernel source to disk");
return -1;
}
}
if (compilationMode == kOnline)
{
int error = CL_SUCCESS;
/* Create the program object from source */
*outProgram = clCreateProgramWithSource(context, numKernelLines,
kernelProgram, NULL, &error);
if (*outProgram == NULL || error != CL_SUCCESS)
{
print_error(error, "clCreateProgramWithSource failed");
return error;
}
return CL_SUCCESS;
}
else
{
return create_single_kernel_helper_create_program_offline(
context, device, outProgram, numKernelLines, kernelProgram,
buildOptions, openclCXX, compilationMode);
}
}
int create_single_kernel_helper_create_program(
cl_context context, cl_program *outProgram, unsigned int numKernelLines,
const char **kernelProgram, const char *buildOptions, const bool openclCXX)
{
return create_single_kernel_helper_create_program(
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
openclCXX, gCompilationMode);
}
int create_single_kernel_helper_create_program_for_device(
cl_context context, cl_device_id device, cl_program *outProgram,
unsigned int numKernelLines, const char **kernelProgram,
const char *buildOptions, const bool openclCXX)
{
return create_single_kernel_helper_create_program(
context, device, outProgram, numKernelLines, kernelProgram,
buildOptions, openclCXX, gCompilationMode);
}
int create_single_kernel_helper_with_build_options(
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
unsigned int numKernelLines, const char **kernelProgram,
const char *kernelName, const char *buildOptions, const bool openclCXX)
{
return create_single_kernel_helper(context, outProgram, outKernel,
numKernelLines, kernelProgram,
kernelName, buildOptions, openclCXX);
}
// Creates and builds OpenCL C/C++ program, and creates a kernel
int create_single_kernel_helper(cl_context context, cl_program *outProgram,
cl_kernel *outKernel,
unsigned int numKernelLines,
const char **kernelProgram,
const char *kernelName,
const char *buildOptions, const bool openclCXX)
{
// For the logic that automatically adds -cl-std it is much cleaner if the
// build options have RAII. This buffer will store the potentially updated
// build options, in which case buildOptions will point at the string owned
// by this buffer.
std::string build_options_internal{ buildOptions ? buildOptions : "" };
// Check the build options for the -cl-std option.
if (!buildOptions || !strstr(buildOptions, "-cl-std"))
{
// If the build option isn't present add it using the latest OpenCL-C
// version supported by the device. This allows calling code to force a
// particular CL C version if it is required, but also means that
// callers need not specify a version if they want to assume the most
// recent CL C.
auto version = get_max_OpenCL_C_for_context(context);
std::string cl_std{};
if (version >= Version(3, 0))
{
cl_std = "-cl-std=CL3.0";
}
else if (version >= Version(2, 0) && version < Version(3, 0))
{
cl_std = "-cl-std=CL2.0";
}
else
{
// If the -cl-std build option is not specified, the highest OpenCL
// C 1.x language version supported by each device is used when
// compiling the program for each device.
cl_std = "";
}
build_options_internal += ' ';
build_options_internal += cl_std;
buildOptions = build_options_internal.c_str();
}
int error;
// Create OpenCL C++ program
if (openclCXX)
{
// -----------------------------------------------------------------------------------
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
// ------------------
// -----------------------------------------------------------------------------------
// Only OpenCL C++ to SPIR-V compilation
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
// Save global variable
bool tempgCompilationCacheMode = gCompilationCacheMode;
// Force OpenCL C++ -> SPIR-V compilation on every run
gCompilationCacheMode = kCacheModeOverwrite;
#endif
error = create_openclcpp_program(context, outProgram, numKernelLines,
kernelProgram, buildOptions);
if (error != CL_SUCCESS)
{
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
return error;
}
// -----------------------------------------------------------------------------------
// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT
// ------------------
// -----------------------------------------------------------------------------------
#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
// Restore global variables
gCompilationCacheMode = tempgCompilationCacheMode;
log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n",
kernelName);
return error;
#endif
}
// Create OpenCL C program
else
{
error = create_single_kernel_helper_create_program(
context, outProgram, numKernelLines, kernelProgram, buildOptions);
if (error != CL_SUCCESS)
{
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
return error;
}
}
// Remove offline-compiler-only build options
std::string newBuildOptions;
if (buildOptions != NULL)
{
newBuildOptions = buildOptions;
std::string offlineCompierOptions[] = {
"-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
};
for (auto &s : offlineCompierOptions)
{
std::string::size_type i = newBuildOptions.find(s);
if (i != std::string::npos) newBuildOptions.erase(i, s.length());
}
}
// Build program and create kernel
return build_program_create_kernel_helper(
context, outProgram, outKernel, numKernelLines, kernelProgram,
kernelName, newBuildOptions.c_str());
}
// Creates OpenCL C++ program
int create_openclcpp_program(cl_context context, cl_program *outProgram,
unsigned int numKernelLines,
const char **kernelProgram,
const char *buildOptions)
{
// Create program
return create_single_kernel_helper_create_program(
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
true, kSpir_v);
}
// Builds OpenCL C/C++ program and creates
int build_program_create_kernel_helper(
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
unsigned int numKernelLines, const char **kernelProgram,
const char *kernelName, const char *buildOptions)
{
int error;
/* Compile the program */
int buildProgramFailed = 0;
int printedSource = 0;
error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
if (error != CL_SUCCESS)
{
unsigned int i;
print_error(error, "clBuildProgram failed");
buildProgramFailed = 1;
printedSource = 1;
log_error("Build options: %s\n", buildOptions);
log_error("Original source is: ------------\n");
for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
}
// Verify the build status on all devices
cl_uint deviceCount = 0;
error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
sizeof(deviceCount), &deviceCount, NULL);
if (error != CL_SUCCESS)
{
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
return error;
}
if (deviceCount == 0)
{
log_error("No devices found for program.\n");
return -1;
}
cl_device_id *devices =
(cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
if (NULL == devices) return -1;
BufferOwningPtr<cl_device_id> devicesBuf(devices);
memset(devices, 0, deviceCount * sizeof(cl_device_id));
error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
sizeof(cl_device_id) * deviceCount, devices, NULL);
if (error != CL_SUCCESS)
{
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
return error;
}
cl_uint z;
bool buildFailed = false;
for (z = 0; z < deviceCount; z++)
{
char deviceName[4096] = "";
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
deviceName, NULL);
if (error != CL_SUCCESS || deviceName[0] == '\0')
{
log_error("Device \"%d\" failed to return a name\n", z);
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
}
cl_build_status buildStatus;
error = clGetProgramBuildInfo(*outProgram, devices[z],
CL_PROGRAM_BUILD_STATUS,
sizeof(buildStatus), &buildStatus, NULL);
if (error != CL_SUCCESS)
{
print_error(error,
"clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
return error;
}
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
&& deviceCount == 1)
{
buildFailed = true;
log_error("clBuildProgram returned an error, but buildStatus is "
"marked as CL_BUILD_SUCCESS.\n");
}
if (buildStatus != CL_BUILD_SUCCESS)
{
char statusString[64] = "";
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
sprintf(statusString, "CL_BUILD_SUCCESS");
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
sprintf(statusString, "CL_BUILD_NONE");
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
sprintf(statusString, "CL_BUILD_ERROR");
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
else
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
if (buildStatus != CL_BUILD_SUCCESS)
log_error(
"Build not successful for device \"%s\", status: %s\n",
deviceName, statusString);
size_t paramSize = 0;
error = clGetProgramBuildInfo(*outProgram, devices[z],
CL_PROGRAM_BUILD_LOG, 0, NULL,
&paramSize);
if (error != CL_SUCCESS)
{
print_error(
error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
return error;
}
std::string log;
log.resize(paramSize / sizeof(char));
error = clGetProgramBuildInfo(*outProgram, devices[z],
CL_PROGRAM_BUILD_LOG, paramSize,
&log[0], NULL);
if (error != CL_SUCCESS || log[0] == '\0')
{
log_error("Device %d (%s) failed to return a build log\n", z,
deviceName);
if (error)
{
print_error(
error,
"clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
return error;
}
else
{
log_error("clGetProgramBuildInfo returned an empty log.\n");
return -1;
}
}
// In this case we've already printed out the code above.
if (!printedSource)
{
unsigned int i;
log_error("Original source is: ------------\n");
for (i = 0; i < numKernelLines; i++)
log_error("%s", kernelProgram[i]);
printedSource = 1;
}
log_error("Build log for device \"%s\" is: ------------\n",
deviceName);
log_error("%s\n", log.c_str());
log_error("\n----------\n");
return -1;
}
}
if (buildFailed)
{
return -1;
}
/* And create a kernel from it */
if (kernelName != NULL)
{
*outKernel = clCreateKernel(*outProgram, kernelName, &error);
if (*outKernel == NULL || error != CL_SUCCESS)
{
print_error(error, "Unable to create kernel");
return error;
}
}
return 0;
}
int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
size_t *outMaxSize, size_t *outLimits)
{
cl_device_id *devices;
size_t size, maxCommonSize = 0;
int numDevices, i, j, error;
cl_uint numDims;
size_t outSize;
size_t sizeLimit[] = { 1, 1, 1 };
/* Assume fewer than 16 devices will be returned */
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
test_error(error, "Unable to obtain list of devices size for context");
devices = (cl_device_id *)malloc(outSize);
BufferOwningPtr<cl_device_id> devicesBuf(devices);
error =
clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
test_error(error, "Unable to obtain list of devices for context");
numDevices = (int)(outSize / sizeof(cl_device_id));
for (i = 0; i < numDevices; i++)
{
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(size), &size, NULL);
test_error(error, "Unable to obtain max work group size for device");
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
error = clGetKernelWorkGroupInfo(kernel, devices[i],
CL_KERNEL_WORK_GROUP_SIZE,
sizeof(size), &size, NULL);
test_error(
error,
"Unable to obtain max work group size for device and kernel combo");
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(numDims), &numDims, NULL);
test_error(
error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
sizeLimit[0] = 1;
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
numDims * sizeof(size_t), sizeLimit, NULL);
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
if (outLimits != NULL)
{
if (i == 0)
{
for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
}
else
{
for (j = 0; j < (int)numDims; j++)
{
if (sizeLimit[j] < outLimits[j])
outLimits[j] = sizeLimit[j];
}
}
}
}
*outMaxSize = (unsigned int)maxCommonSize;
return 0;
}
extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
cl_kernel kernel,
size_t *outSize)
{
cl_uint maxDim;
size_t maxWgSize;
size_t *maxWgSizePerDim;
int error;
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(size_t), &maxWgSize, NULL);
test_error(error,
"clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(cl_uint), &maxDim, NULL);
test_error(error,
"clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
if (!maxWgSizePerDim)
{
log_error("Unable to allocate maxWgSizePerDim\n");
return -1;
}
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
if (error != CL_SUCCESS)
{
log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
free(maxWgSizePerDim);
return error;
}
// "maxWgSize" is limited to that of the first dimension.
if (maxWgSize > maxWgSizePerDim[0])
{
maxWgSize = maxWgSizePerDim[0];
}
free(maxWgSizePerDim);
*outSize = maxWgSize;
return 0;
}
int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
size_t globalThreadSize, size_t *outMaxSize)
{
size_t sizeLimit[3];
int error =
get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
if (error != 0) return error;
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize
*/
/* Note for speed, we don't need to check the range of maxCommonSize, b/c
once it gets to 1, the modulo test will succeed and break the loop anyway
*/
for (;
(globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
(*outMaxSize)--)
;
return 0;
}
int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
size_t *globalThreadSizes,
size_t *outMaxSizes)
{
size_t sizeLimit[3];
size_t maxSize;
int error =
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
if (error != 0) return error;
/* Now find a set of factors, multiplied together less than maxSize, but
each a factor of the global sizes */
/* Simple case */
if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
{
if (globalThreadSizes[0] <= sizeLimit[0]
&& globalThreadSizes[1] <= sizeLimit[1])
{
outMaxSizes[0] = globalThreadSizes[0];
outMaxSizes[1] = globalThreadSizes[1];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i = 0; i < 2; i++)
{
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|| (sizeForThisOne > sizeLimit[i]);
sizeForThisOne--)
;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
}
return 0;
}
int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
size_t *globalThreadSizes,
size_t *outMaxSizes)
{
size_t sizeLimit[3];
size_t maxSize;
int error =
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
if (error != 0) return error;
/* Now find a set of factors, multiplied together less than maxSize, but
each a factor of the global sizes */
/* Simple case */
if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
<= maxSize)
{
if (globalThreadSizes[0] <= sizeLimit[0]
&& globalThreadSizes[1] <= sizeLimit[1]
&& globalThreadSizes[2] <= sizeLimit[2])
{
outMaxSizes[0] = globalThreadSizes[0];
outMaxSizes[1] = globalThreadSizes[1];
outMaxSizes[2] = globalThreadSizes[2];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i = 0; i < 3; i++)
{
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|| (sizeForThisOne > sizeLimit[i]);
sizeForThisOne--)
;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
}
return 0;
}
/* Helper to determine if a device supports an image format */
int is_image_format_supported(cl_context context, cl_mem_flags flags,
cl_mem_object_type image_type,
const cl_image_format *fmt)
{
cl_image_format *list;
cl_uint count = 0;
cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
NULL, &count);
if (count == 0) return 0;
list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
if (NULL == list)
{
log_error("Error: unable to allocate %ld byte buffer for image format "
"list at %s:%d (err = %d)\n",
count * sizeof(cl_image_format), __FILE__, __LINE__, err);
return 0;
}
BufferOwningPtr<cl_image_format> listBuf(list);
cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
list, NULL);
if (error)
{
log_error("Error: failed to obtain supported image type list at %s:%d "
"(err = %d)\n",
__FILE__, __LINE__, err);
return 0;
}
// iterate looking for a match.
cl_uint i;
for (i = 0; i < count; i++)
{
if (fmt->image_channel_data_type == list[i].image_channel_data_type
&& fmt->image_channel_order == list[i].image_channel_order)
break;
}
return (i < count) ? 1 : 0;
}
size_t get_pixel_bytes(const cl_image_format *fmt);
size_t get_pixel_bytes(const cl_image_format *fmt)
{
size_t chanCount;
switch (fmt->image_channel_order)
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_INTENSITY:
case CL_LUMINANCE:
case CL_DEPTH: chanCount = 1; break;
case CL_RG:
case CL_RA:
case CL_RGx: chanCount = 2; break;
case CL_RGB:
case CL_RGBx:
case CL_sRGB:
case CL_sRGBx: chanCount = 3; break;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
case CL_sBGRA:
case CL_sRGBA:
#ifdef CL_1RGB_APPLE
case CL_1RGB_APPLE:
#endif
#ifdef CL_BGR1_APPLE
case CL_BGR1_APPLE:
#endif
chanCount = 4;
break;
default:
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
abort();
break;
}
switch (fmt->image_channel_data_type)
{
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555: return 2;
case CL_UNORM_INT_101010: return 4;
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8: return chanCount;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_HALF_FLOAT:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return chanCount * 2;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_FLOAT: return chanCount * 4;
default:
log_error("Unknown channel data type at %s:%d!\n", __FILE__,
__LINE__);
abort();
}
return 0;
}
test_status verifyImageSupport(cl_device_id device)
{
int result = checkForImageSupport(device);
if (result == 0)
{
return TEST_PASS;
}
if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
{
log_error("SKIPPED: Device does not supported images as required by "
"this test!\n");
return TEST_SKIP;
}
return TEST_FAIL;
}
int checkForImageSupport(cl_device_id device)
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error =
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
test_error(error, "Unable to query device for image support");
if (i == 0)
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
int checkFor3DImageSupport(cl_device_id device)
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error =
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
test_error(error, "Unable to query device for image support");
if (i == 0)
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
char profile[128];
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
NULL);
test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
{
size_t width = -1L;
size_t height = -1L;
size_t depth = -1L;
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
sizeof(width), &width, NULL);
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
sizeof(height), &height, NULL);
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
sizeof(depth), &depth, NULL);
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
int checkForReadWriteImageSupport(cl_device_id device)
{
if (checkForImageSupport(device))
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
auto device_cl_version = get_device_cl_version(device);
if (device_cl_version >= Version(3, 0))
{
// In OpenCL 3.0, Read-Write images are optional.
// Check if they are supported.
cl_uint are_rw_images_supported{};
test_error(
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
sizeof(are_rw_images_supported),
&are_rw_images_supported, nullptr),
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
if (0 == are_rw_images_supported)
{
log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
}
// READ_WRITE images are not supported on 1.X devices.
else if (device_cl_version < Version(2, 0))
{
log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
// Support for read-write image arguments is required
// for an 2.X device if the device supports images.
/* So our support is good */
return 0;
}
size_t get_min_alignment(cl_context context)
{
static cl_uint align_size = 0;
if (0 == align_size)
{
cl_device_id *devices;
size_t devices_size = 0;
cl_uint result = 0;
cl_int error;
int i;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
&devices_size);
test_error_ret(error, "clGetContextInfo failed", 0);
devices = (cl_device_id *)malloc(devices_size);
if (devices == NULL)
{
print_error(error, "malloc failed");
return 0;
}
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
(void *)devices, NULL);
test_error_ret(error, "clGetContextInfo failed", 0);
for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
{
cl_uint alignment = 0;
error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(cl_uint), (void *)&alignment, NULL);
if (error == CL_SUCCESS)
{
alignment >>= 3; // convert bits to bytes
result = (alignment > result) ? alignment : result;
}
else
print_error(error, "clGetDeviceInfo failed");
}
align_size = result;
free(devices);
}
return align_size;
}
cl_device_fp_config get_default_rounding_mode(cl_device_id device)
{
char profileStr[128] = "";
cl_device_fp_config single = 0;
int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(single), &single, NULL);
if (error)
test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
0);
if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
if (0 == (single & CL_FP_ROUND_TO_ZERO))
test_error_ret(-1,
"FAILURE: device must support either "
"CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
0);
// Make sure we are an embedded device before allowing a pass
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
&profileStr, NULL)))
test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
if (strcmp(profileStr, "EMBEDDED_PROFILE"))
test_error_ret(error,
"FAILURE: non-EMBEDDED_PROFILE devices must support "
"CL_FP_ROUND_TO_NEAREST",
0);
return CL_FP_ROUND_TO_ZERO;
}
int checkDeviceForQueueSupport(cl_device_id device,
cl_command_queue_properties prop)
{
cl_command_queue_properties realProps;
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
sizeof(realProps), &realProps, NULL);
test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
return (realProps & prop) ? 1 : 0;
}
int printDeviceHeader(cl_device_id device)
{
char deviceName[512], deviceVendor[512], deviceVersion[512],
cLangVersion[512];
int error;
error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
deviceName, NULL);
test_error(error, "Unable to get CL_DEVICE_NAME for device");
error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
deviceVendor, NULL);
test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
deviceVersion, NULL);
test_error(error, "Unable to get CL_DEVICE_VERSION for device");
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
sizeof(cLangVersion), cLangVersion, NULL);
test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
"Device Version = %s%s%s\n",
deviceName, deviceVendor, deviceVersion,
(error == CL_SUCCESS) ? ", CL C Version = " : "",
(error == CL_SUCCESS) ? cLangVersion : "");
auto version = get_device_cl_version(device);
if (version >= Version(3, 0))
{
auto ctsVersion = get_device_info_string(
device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
log_info("Device latest conformance version passed: %s\n",
ctsVersion.c_str());
}
return CL_SUCCESS;
}
Version get_device_cl_c_version(cl_device_id device)
{
auto device_cl_version = get_device_cl_version(device);
// The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
// did not exist, but since this is just the first version we can
// return 1.0.
if (device_cl_version == Version{ 1, 0 })
{
return Version{ 1, 0 };
}
// Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
// versions are backwards compatible, hence querying with the
// CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
// OpenCL C version.
size_t opencl_c_version_size_in_bytes{};
auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
&opencl_c_version_size_in_bytes);
test_error_ret(error,
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
(Version{ -1, 0 }));
std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
error =
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
opencl_c_version.size(), &opencl_c_version[0], nullptr);
test_error_ret(error,
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
(Version{ -1, 0 }));
// Scrape out the major, minor pair from the string.
auto major = opencl_c_version[opencl_c_version.find('.') - 1];
auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
return Version{ major - '0', minor - '0' };
}
Version get_device_latest_cl_c_version(cl_device_id device)
{
auto device_cl_version = get_device_cl_version(device);
// If the device version >= 3.0 it must support the
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
// recent CL C version supported by the device.
if (device_cl_version >= Version{ 3, 0 })
{
size_t opencl_c_all_versions_size_in_bytes{};
auto error =
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
&opencl_c_all_versions_size_in_bytes);
test_error_ret(
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
(Version{ -1, 0 }));
std::vector<cl_name_version> name_versions(
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
opencl_c_all_versions_size_in_bytes,
name_versions.data(), nullptr);
test_error_ret(
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
(Version{ -1, 0 }));
Version max_supported_cl_c_version{};
for (const auto &name_version : name_versions)
{
Version current_version{ CL_VERSION_MAJOR(name_version.version),
CL_VERSION_MINOR(name_version.version) };
max_supported_cl_c_version =
(current_version > max_supported_cl_c_version)
? current_version
: max_supported_cl_c_version;
}
return max_supported_cl_c_version;
}
return get_device_cl_c_version(device);
}
Version get_max_OpenCL_C_for_context(cl_context context)
{
// Get all the devices in the context and find the maximum
// universally supported OpenCL C version.
size_t devices_size_in_bytes{};
auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
&devices_size_in_bytes);
test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
(Version{ -1, 0 }));
std::vector<cl_device_id> devices(devices_size_in_bytes
/ sizeof(cl_device_id));
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
devices.data(), nullptr);
auto current_version = get_device_latest_cl_c_version(devices[0]);
std::for_each(std::next(devices.begin()), devices.end(),
[&current_version](cl_device_id device) {
auto device_version =
get_device_latest_cl_c_version(device);
// OpenCL 3.0 is not backwards compatible with 2.0.
// If we have 3.0 and 2.0 in the same driver we
// use 1.2.
if (((device_version >= Version(2, 0)
&& device_version < Version(3, 0))
&& current_version >= Version(3, 0))
|| (device_version >= Version(3, 0)
&& (current_version >= Version(2, 0)
&& current_version < Version(3, 0))))
{
current_version = Version(1, 2);
}
else
{
current_version =
(std::min)(device_version, current_version);
}
});
return current_version;
}
bool device_supports_cl_c_version(cl_device_id device, Version version)
{
auto device_cl_version = get_device_cl_version(device);
// In general, a device does not support an OpenCL C version if it is <=
// CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
// If the device version >= 3.0 it must support the
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
// used must appear in the query result if it's <=
// CL_DEVICE_OPENCL_C_VERSION.
if (device_cl_version >= Version{ 3, 0 })
{
size_t opencl_c_all_versions_size_in_bytes{};
auto error =
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
&opencl_c_all_versions_size_in_bytes);
test_error_ret(
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
(false));
std::vector<cl_name_version> name_versions(
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
opencl_c_all_versions_size_in_bytes,
name_versions.data(), nullptr);
test_error_ret(
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
(false));
for (const auto &name_version : name_versions)
{
Version current_version{ CL_VERSION_MAJOR(name_version.version),
CL_VERSION_MINOR(name_version.version) };
if (current_version == version)
{
return true;
}
}
}
return version <= get_device_cl_c_version(device);
}
bool poll_until(unsigned timeout_ms, unsigned interval_ms,
std::function<bool()> fn)
{
unsigned time_spent_ms = 0;
bool ret = false;
while (time_spent_ms < timeout_ms)
{
ret = fn();
if (ret)
{
break;
}
usleep(interval_ms * 1000);
time_spent_ms += interval_ms;
}
return ret;
}