test_conformance/SVM/main.cpp - external/github.com/KhronosGroup/OpenCL-CTS - Git at Google

 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "harness/compat.h"

 #include <stdio.h>
 #include <vector>
 #include <sstream>
 #include "harness/testHarness.h"
 #include "harness/kernelHelpers.h"

 #include "common.h"

 // SVM Atomic wrappers.
 // Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
 // Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
 // For now only Windows x86 is implemented, add support for other platforms as needed.
 cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
 {
 #if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
   return *pValue;  // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
 #elif defined(__GNUC__)
 	return __sync_add_and_fetch(pValue, 0);
 #else
   log_error("ERROR: AtomicLoadExplicit function not implemented\n");
   return -1;
 #endif
 }
 // all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
 cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
 {
 #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
   return InterlockedExchangeAdd( (volatile LONG*) object, operand);
 #elif defined(__GNUC__)
   return __sync_fetch_and_add(object, operand);
 #else
   log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
   return -1;
 #endif
 }

 cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
 {
 #if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
   return InterlockedExchange( (volatile LONG*) object, desired);
 #elif defined(__GNUC__)
   return __sync_lock_test_and_set(object, desired);
 #else
   log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
   return -1;
 #endif
 }


 const char *linked_list_create_and_verify_kernels[] = {
   "typedef struct Node {\n"
   "    int global_id;\n"
   "    int position_in_list;\n"
   "    __global struct Node* pNext;\n"
   "} Node;\n"
   "\n"
   // The allocation_index parameter must be initialized on the host to N work-items
   // The first N nodes in pNodes will be the heads of the lists.
   "__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
   "{\n"
   "    size_t i = get_global_id(0);\n"
   "    __global Node *pNode = &pNodes[i];\n"
   "\n"
   "    pNode->global_id = i;\n"
   "    pNode->position_in_list = 0;\n"
   "\n"
   "    __global Node *pNew;\n"
   "    for(int j=1; j < list_length; j++)\n"
   "    {\n"
   "        pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
   "        pNew->global_id = i;\n"
   "        pNew->position_in_list = j;\n"
   "        pNode->pNext = pNew;  // link new node onto end of list\n"
   "        pNode = pNew;   // move to end of list\n"
   "    }\n"
   "}\n"

   "__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
   "{\n"
   "    size_t i = get_global_id(0);\n"
   "    __global Node *pNode = &pNodes[i];\n"
   "\n"
   "    for(int j=0; j < list_length; j++)\n"
   "    {\n"
   "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
   "        {\n"
   "            atomic_inc(num_correct);\n"
   "        } \n"
   "        else {\n"
   "            break;\n"
   "        }\n"
   "        pNode = pNode->pNext;\n"
   "    }\n"
   "}\n"
 };


 // The first N nodes in pNodes will be the heads of the lists.
 void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
 {
   size_t allocation_index = num_lists;  // heads of lists are in first num_lists nodes.

   for(cl_uint i = 0; i < num_lists; i++)
   {
     Node *pNode = &pNodes[i];
     pNode->global_id = i;
     pNode->position_in_list = 0;
     Node *pNew;
     for(int j=1; j < list_length; j++)
     {
       pNew = &pNodes[ allocation_index++ ];// allocate a new node
       pNew->global_id = i;
       pNew->position_in_list = j;
       pNode->pNext = pNew;  // link new node onto end of list
       pNode = pNew;   // move to end of list
     }
   }
 }

 cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
 {
   cl_int error = CL_SUCCESS;
   int numCorrect = 0;

   log_info(" and verifying on host ");
   for(cl_uint i=0; i < num_lists; i++)
   {
     Node *pNode = &pNodes[i];
     for(int j=0; j < list_length; j++)
     {
       if( pNode->global_id == i && pNode->position_in_list == j)
       {
         numCorrect++;
       }
       else {
         break;
       }
       pNode = pNode->pNext;
     }
   }
   if(numCorrect != list_length * (cl_uint)num_lists)
   {
     error = -1;
     log_info("Failed\n");
   }
   else
     log_info("Passed\n");

   return error;
 }

 // Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
 // so we create are own context here that has all devices, we use the same platform that the harness used.
 cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list)
 {
   cl_int error;

   cl_platform_id platform_id;
   // find out what platform the harness is using.
   error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
   test_error(error,"clGetDeviceInfo failed");

   error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
   test_error(error, "clGetDeviceIDs failed");

   std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;

   error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
   test_error(error, "clGetDeviceIDs failed");

   devices.push_back(device_from_harness);
   for (size_t i = 0; i < devicesTmp.size(); ++i)
   {
     if (device_from_harness != devicesTmp[i])
       devices.push_back(devicesTmp[i]);
   }

   // Select only the devices that support the SVM level needed for the test.
   // Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
   cl_device_svm_capabilities caps;
   cl_uint num_capable_devices = 0;
   for(cl_uint i = 0; i < *num_devices; i++)
   {
     Version version = get_device_cl_version(devices[i]);

     if(device_from_harness != devices[i] && version < Version(2,0))
     {
       continue;
     }

     error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
     test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
     if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |  CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
     {
       log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
       return -1;
     }
     bool extensions_supported = true;
     for (auto extension : extensions_list)
     {
         if (!is_extension_available(devices[i], extension.c_str()))
         {
             log_error("Required extension not found - device id %d - %s\n", i,
                       extension.c_str());
             extensions_supported = false;
             break;
         }
     }
     if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
     {
       capable_devices.push_back(devices[i]);
       ++num_capable_devices;
     }
   }
   devices = capable_devices;  // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
   *num_devices = num_capable_devices;
   if(num_capable_devices == 0)
     //    if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
   {
     log_info("Requested SVM level or required extensions not supported by any device on this platform, test not executed.\n");
     return 1; // 1 indicates do not execute, but counts as passing.
   }

   cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
   *context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
   test_error(error, "Unable to create context" );

   //    *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );

   for(cl_uint i = 0; i < *num_devices; i++)
   {
     queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
     test_error(error, "clCreateCommandQueue failed");
   }

   if (ppCodeString)
   {
       error =
           create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0);
       test_error(error, "failed to create program");
   }

   return 0;
 }

 test_definition test_list[] = {
     ADD_TEST( svm_byte_granularity),
     ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ),
     ADD_TEST( svm_fine_grain_memory_consistency ),
     ADD_TEST( svm_fine_grain_sync_buffers ),
     ADD_TEST( svm_shared_address_space_fine_grain ),
     ADD_TEST( svm_shared_sub_buffers ),
     ADD_TEST( svm_shared_address_space_fine_grain_buffers ),
     ADD_TEST( svm_allocate_shared_buffer ),
     ADD_TEST( svm_shared_address_space_coarse_grain_old_api ),
     ADD_TEST( svm_shared_address_space_coarse_grain_new_api ),
     ADD_TEST( svm_cross_buffer_pointers_coarse_grain ),
     ADD_TEST( svm_pointer_passing ),
     ADD_TEST( svm_enqueue_api ),
     ADD_TEST_VERSION( svm_migrate, Version(2, 1)),
 };

 const int test_num = ARRAY_SIZE( test_list );

 test_status InitCL(cl_device_id device) {
   auto version = get_device_cl_version(device);
   auto expected_min_version = Version(2, 0);
   if (version < expected_min_version)
   {
       version_expected_info("Test", "OpenCL",
                             expected_min_version.to_string().c_str(),
                             version.to_string().c_str());
       return TEST_SKIP;
   }

   int error;
   cl_device_svm_capabilities svm_caps;
   error = clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
                           sizeof(svm_caps), &svm_caps, NULL);
   if (error != CL_SUCCESS) {
     print_error(error, "Unable to get svm capabilities");
     return TEST_FAIL;
   }

   if ((svm_caps == 0) && (version >= Version(3, 0)))
   {
       return TEST_SKIP;
   }

   return TEST_PASS;
 }

 int main(int argc, const char *argv[])
 {
   return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
 }
	//
	// Copyright (c) 2017 The Khronos Group Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	#include "harness/compat.h"

	#include <stdio.h>
	#include <vector>
	#include <sstream>
	#include "harness/testHarness.h"
	#include "harness/kernelHelpers.h"

	#include "common.h"

	// SVM Atomic wrappers.
	// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
	// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
	// For now only Windows x86 is implemented, add support for other platforms as needed.
	cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
	{
	#if (defined(_MSC_VER) && (defined(_M_IX86) \|\| defined(_M_X64))) \|\| (defined(__GNUC__) && (defined(__i386__) \|\| defined(__x86_64__)))
	return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
	#elif defined(__GNUC__)
	return __sync_add_and_fetch(pValue, 0);
	#else
	log_error("ERROR: AtomicLoadExplicit function not implemented\n");
	return -1;
	#endif
	}
	// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
	cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
	{
	#if (defined(_WIN32) \|\| defined(_WIN64)) && defined(_MSC_VER)
	return InterlockedExchangeAdd( (volatile LONG*) object, operand);
	#elif defined(__GNUC__)
	return __sync_fetch_and_add(object, operand);
	#else
	log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
	return -1;
	#endif
	}

	cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
	{
	#if (defined(_WIN32) \|\| defined(_WIN64)) && defined(_MSC_VER)
	return InterlockedExchange( (volatile LONG*) object, desired);
	#elif defined(__GNUC__)
	return __sync_lock_test_and_set(object, desired);
	#else
	log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
	return -1;
	#endif
	}


	const char *linked_list_create_and_verify_kernels[] = {
	"typedef struct Node {\n"
	" int global_id;\n"
	" int position_in_list;\n"
	" __global struct Node* pNext;\n"
	"} Node;\n"
	"\n"
	// The allocation_index parameter must be initialized on the host to N work-items
	// The first N nodes in pNodes will be the heads of the lists.
	"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
	"{\n"
	" size_t i = get_global_id(0);\n"
	" __global Node *pNode = &pNodes[i];\n"
	"\n"
	" pNode->global_id = i;\n"
	" pNode->position_in_list = 0;\n"
	"\n"
	" __global Node *pNew;\n"
	" for(int j=1; j < list_length; j++)\n"
	" {\n"
	" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
	" pNew->global_id = i;\n"
	" pNew->position_in_list = j;\n"
	" pNode->pNext = pNew; // link new node onto end of list\n"
	" pNode = pNew; // move to end of list\n"
	" }\n"
	"}\n"

	"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
	"{\n"
	" size_t i = get_global_id(0);\n"
	" __global Node *pNode = &pNodes[i];\n"
	"\n"
	" for(int j=0; j < list_length; j++)\n"
	" {\n"
	" if( pNode->global_id == i && pNode->position_in_list == j)\n"
	" {\n"
	" atomic_inc(num_correct);\n"
	" } \n"
	" else {\n"
	" break;\n"
	" }\n"
	" pNode = pNode->pNext;\n"
	" }\n"
	"}\n"
	};


	// The first N nodes in pNodes will be the heads of the lists.
	void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
	{
	size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.

	for(cl_uint i = 0; i < num_lists; i++)
	{
	Node *pNode = &pNodes[i];
	pNode->global_id = i;
	pNode->position_in_list = 0;
	Node *pNew;
	for(int j=1; j < list_length; j++)
	{
	pNew = &pNodes[ allocation_index++ ];// allocate a new node
	pNew->global_id = i;
	pNew->position_in_list = j;
	pNode->pNext = pNew; // link new node onto end of list
	pNode = pNew; // move to end of list
	}
	}
	}

	cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
	{
	cl_int error = CL_SUCCESS;
	int numCorrect = 0;

	log_info(" and verifying on host ");
	for(cl_uint i=0; i < num_lists; i++)
	{
	Node *pNode = &pNodes[i];
	for(int j=0; j < list_length; j++)
	{
	if( pNode->global_id == i && pNode->position_in_list == j)
	{
	numCorrect++;
	}
	else {
	break;
	}
	pNode = pNode->pNext;
	}
	}
	if(numCorrect != list_length * (cl_uint)num_lists)
	{
	error = -1;
	log_info("Failed\n");
	}
	else
	log_info("Passed\n");

	return error;
	}

	// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
	// so we create are own context here that has all devices, we use the same platform that the harness used.
	cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program program, cl_command_queue queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list)
	{
	cl_int error;

	cl_platform_id platform_id;
	// find out what platform the harness is using.
	error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
	test_error(error,"clGetDeviceInfo failed");

	error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
	test_error(error, "clGetDeviceIDs failed");

	std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;

	error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
	test_error(error, "clGetDeviceIDs failed");

	devices.push_back(device_from_harness);
	for (size_t i = 0; i < devicesTmp.size(); ++i)
	{
	if (device_from_harness != devicesTmp[i])
	devices.push_back(devicesTmp[i]);
	}

	// Select only the devices that support the SVM level needed for the test.
	// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
	cl_device_svm_capabilities caps;
	cl_uint num_capable_devices = 0;
	for(cl_uint i = 0; i < *num_devices; i++)
	{
	Version version = get_device_cl_version(devices[i]);

	if(device_from_harness != devices[i] && version < Version(2,0))
	{
	continue;
	}

	error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
	test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
	if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER \| CL_DEVICE_SVM_FINE_GRAIN_BUFFER \| CL_DEVICE_SVM_FINE_GRAIN_SYSTEM \| CL_DEVICE_SVM_ATOMICS)))
	{
	log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
	return -1;
	}
	bool extensions_supported = true;
	for (auto extension : extensions_list)
	{
	if (!is_extension_available(devices[i], extension.c_str()))
	{
	log_error("Required extension not found - device id %d - %s\n", i,
	extension.c_str());
	extensions_supported = false;
	break;
	}
	}
	if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
	{
	capable_devices.push_back(devices[i]);
	++num_capable_devices;
	}
	}
	devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
	*num_devices = num_capable_devices;
	if(num_capable_devices == 0)
	// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
	{
	log_info("Requested SVM level or required extensions not supported by any device on this platform, test not executed.\n");
	return 1; // 1 indicates do not execute, but counts as passing.
	}

	cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
	context = clCreateContext(context_properties, num_devices, &devices[0], NULL, NULL, &error);
	test_error(error, "Unable to create context" );

	// queues = (cl_command_queue ) malloc( num_devices sizeof( cl_command_queue ) );

	for(cl_uint i = 0; i < *num_devices; i++)
	{
	queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
	test_error(error, "clCreateCommandQueue failed");
	}

	if (ppCodeString)
	{
	error =
	create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0);
	test_error(error, "failed to create program");
	}

	return 0;
	}

	test_definition test_list[] = {
	ADD_TEST( svm_byte_granularity),
	ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ),
	ADD_TEST( svm_fine_grain_memory_consistency ),
	ADD_TEST( svm_fine_grain_sync_buffers ),
	ADD_TEST( svm_shared_address_space_fine_grain ),
	ADD_TEST( svm_shared_sub_buffers ),
	ADD_TEST( svm_shared_address_space_fine_grain_buffers ),
	ADD_TEST( svm_allocate_shared_buffer ),
	ADD_TEST( svm_shared_address_space_coarse_grain_old_api ),
	ADD_TEST( svm_shared_address_space_coarse_grain_new_api ),
	ADD_TEST( svm_cross_buffer_pointers_coarse_grain ),
	ADD_TEST( svm_pointer_passing ),
	ADD_TEST( svm_enqueue_api ),
	ADD_TEST_VERSION( svm_migrate, Version(2, 1)),
	};

	const int test_num = ARRAY_SIZE( test_list );

	test_status InitCL(cl_device_id device) {
	auto version = get_device_cl_version(device);
	auto expected_min_version = Version(2, 0);
	if (version < expected_min_version)
	{
	version_expected_info("Test", "OpenCL",
	expected_min_version.to_string().c_str(),
	version.to_string().c_str());
	return TEST_SKIP;
	}

	int error;
	cl_device_svm_capabilities svm_caps;
	error = clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
	sizeof(svm_caps), &svm_caps, NULL);
	if (error != CL_SUCCESS) {
	print_error(error, "Unable to get svm capabilities");
	return TEST_FAIL;
	}

	if ((svm_caps == 0) && (version >= Version(3, 0)))
	{
	return TEST_SKIP;
	}

	return TEST_PASS;
	}

	int main(int argc, const char *argv[])
	{
	return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
	}