blob: c3a190b73bbe66aea82072b3a8c383bc1461009c [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
#include "harness/kernelHelpers.h"
#include "harness/typeWrappers.h"
#include "common.h"
#include "host_atomics.h"
#include <sstream>
#include <vector>
template<typename HostAtomicType, typename HostDataType>
class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
MemoryOrder() == MEMORY_ORDER_ACQ_REL)
return 0; //skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = (HostDataType)whichDestValue;
return true;
}
};
int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
{
public:
using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual std::string ProgramCore()
{
return
" atomic_init(&destMemory[tid], tid);\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
host_atomic_init(&destMemory[tid], (HostDataType)tid);
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = (HostDataType)whichDestValue;
return true;
}
};
int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
MemoryOrder() == MEMORY_ORDER_ACQ_REL)
return 0; //skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
}
virtual std::string ProgramCore()
{
// In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
// should be MEMORY_ORDER_RELEASE
std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
std::string memoryOrderScopeStore =
(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
? (", memory_order_release" + MemoryScopeStr())
: memoryOrderScopeLoad;
std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
return " atomic_store" + postfix + "(&destMemory[tid], tid"
+ memoryOrderScopeStore
+ ");\n"
" oldValues[tid] = atomic_load"
+ postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = (HostDataType)whichDestValue;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
for(cl_uint i = 0; i < threadCount; i++ )
{
if(refValues[i] != (HostDataType)i)
{
log_error("Invalid value for thread %u\n", (cl_uint)i);
correct = false;
return true;
}
}
return true;
}
};
int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(123456);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
" for(int i = 0; i < "+IterationsStr()+"; i++)\n"
" oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
for(int i = 0; i < Iterations(); i++)
oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
correct = true;
/* We are expecting values from 0 to size-1 and initial value from atomic variable */
/* These values must be distributed across refValues array and atomic variable finalVaue[0] */
/* Any repeated value is treated as an error */
std::vector<bool> tidFound(threadCount);
bool startValueFound = false;
cl_uint i;
for(i = 0; i <= threadCount; i++)
{
cl_uint value;
if(i == threadCount)
value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
else
value = (cl_uint)refValues[i];
if(value == (cl_uint)StartValue())
{
// Special initial value
if(startValueFound)
{
log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
correct = false;
return true;
}
startValueFound = true;
continue;
}
if(value >= threadCount)
{
log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
correct = false;
return true;
}
if(tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n", value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(123456);
OldValueCheck(false);
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
return 0; // not allowed as 'failure' argument
if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
(MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
return 0; // failure argument shall be no stronger than the success
if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScope();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" ")+DataType().RegularTypeName()+" expected, previous;\n"
" int successCount = 0;\n"
" oldValues[tid] = tid;\n"
" expected = tid; // force failure at the beginning\n"
" if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
" oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
" else\n"
" {\n"
" for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
" {\n"
" previous = expected;\n"
" if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
" {\n"
" oldValues[tid] = expected;\n"
" successCount++;\n"
" }\n"
" else\n"
" {\n"
" if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
" {\n"
" oldValues[tid] = threadCount; //mark fail with invalid value\n"
" break;\n"
" }\n"
" }\n"
" }\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
HostDataType expected = (HostDataType)StartValue(), previous;
oldValues[tid] = (HostDataType)tid;
for(int i = 0; i < Iterations(); i++)
{
previous = expected;
if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
oldValues[tid] = expected;
else
{
if(previous == expected) // shouldn't occur for 'strong'
{
oldValues[tid] = threadCount; //mark fail with invalid value
}
}
}
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
/* We are expecting values from 0 to size-1 and initial value from atomic variable */
/* These values must be distributed across refValues array and atomic variable finalVaue[0] */
/* Any repeated value is treated as an error */
std::vector<bool> tidFound(threadCount);
bool startValueFound = false;
cl_uint i;
for(i = 0; i <= threadCount; i++)
{
cl_uint value;
if(i == threadCount)
value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
else
value = (cl_uint)refValues[i];
if(value == (cl_uint)StartValue())
{
// Special initial value
if(startValueFound)
{
log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
correct = false;
return true;
}
startValueFound = true;
continue;
}
if(value >= threadCount)
{
if(value == threadCount)
log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
correct = false;
return true;
}
if(tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n", value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
{
public:
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
{
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScope();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" ")+DataType().RegularTypeName()+" expected , previous;\n"
" int successCount = 0;\n"
" oldValues[tid] = tid;\n"
" expected = tid; // force failure at the beginning\n"
" if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
" oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
" else\n"
" {\n"
" for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
" {\n"
" previous = expected;\n"
" if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
" {\n"
" oldValues[tid] = expected;\n"
" successCount++;\n"
" }\n"
" }\n"
" }\n";
}
};
int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+
" atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
" atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
" atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = StartValue();
for(cl_uint i = 0; i < threadCount; i++)
expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
return true;
}
};
int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = StartValue();
for(cl_uint i = 0; i < threadCount; i++)
expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
return true;
}
};
int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
cl_uint numBits = DataType().Size(deviceID) * 8;
return (threadCount + numBits - 1) / numBits;
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
" int whichResult = tid / numBits;\n"
" int bitIndex = tid - (whichResult * numBits);\n"
"\n"
" oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
size_t numBits = sizeof(HostDataType) * 8;
size_t whichResult = tid / numBits;
size_t bitIndex = tid - (whichResult * numBits);
oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
if(whichDestValue < numValues - 1)
{
expected = ~(HostDataType)0;
return true;
}
// Last item doesn't get or'ed on every bit, so we have to mask away
cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
expected = StartValue();
for(cl_uint i = 0; i < numBits; i++)
expected |= ((HostDataType)1 << i);
return true;
}
};
int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue((HostDataType)0x2f08ab418ba0541LL);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
" int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
"\n"
" oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
int numBits = sizeof(HostDataType) * 8;
int bitIndex = (numBits-1)*(tid+1)/threadCount;
oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
int numBits = sizeof(HostDataType)*8;
expected = StartValue();
for(cl_uint i = 0; i < threadCount; i++)
{
int bitIndex = (numBits-1)*(i+1)/threadCount;
expected ^= ((HostDataType)1 << bitIndex);
}
return true;
}
};
int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(~(HostDataType)0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
cl_uint numBits = DataType().Size(deviceID) * 8;
return (threadCount + numBits - 1) / numBits;
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
" int whichResult = tid / numBits;\n"
" int bitIndex = tid - (whichResult * numBits);\n"
"\n"
" oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
size_t numBits = sizeof(HostDataType) * 8;
size_t whichResult = tid / numBits;
size_t bitIndex = tid - (whichResult * numBits);
oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
if(whichDestValue < numValues - 1)
{
expected = 0;
return true;
}
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
expected = StartValue();
for(size_t i = 0; i < numBits; i++)
expected &= ~((HostDataType)1 << i);
return true;
}
};
int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
}
// each thread modifies (with OR and AND operations) and verifies
// only one bit in atomic variable
// other bits are modified by other threads but it must not affect current thread operation
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
" size_t valueInd = tid/bits;\n"
" "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
" oldValues[tid] = 0;\n"
" for(int i = 0; i < "+IterationsStr()+"; i++)\n"
" {\n"
" value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
" if(value & bitMask) // bit should be set to 0\n"
" oldValues[tid]++;\n"
" value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
" if(!(value & bitMask)) // bit should be set to 1\n"
" oldValues[tid]++;\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
int bits = sizeof(HostDataType)*8;
size_t valueInd = tid/bits;
HostDataType value, bitMask = (HostDataType)1 << tid%bits;
oldValues[tid] = 0;
for(int i = 0; i < Iterations(); i++)
{
value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
if(value & bitMask) // bit should be set to 0
oldValues[tid]++;
value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
if(!(value & bitMask)) // bit should be set to 1
oldValues[tid]++;
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
for(cl_uint i = 0; i < threadCount; i++)
{
if(refValues[i] > 0)
{
log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
correct = false;
}
}
return true;
}
};
int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
}
// each thread modifies (with XOR operation) and verifies
// only one bit in atomic variable
// other bits are modified by other threads but it must not affect current thread operation
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
" size_t valueInd = tid/bits;\n"
" "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
" oldValues[tid] = 0;\n"
" for(int i = 0; i < "+IterationsStr()+"; i++)\n"
" {\n"
" value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
" if(value & bitMask) // bit should be set to 0\n"
" oldValues[tid]++;\n"
" value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
" if(!(value & bitMask)) // bit should be set to 1\n"
" oldValues[tid]++;\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
int bits = sizeof(HostDataType)*8;
size_t valueInd = tid/bits;
HostDataType value, bitMask = (HostDataType)1 << tid%bits;
oldValues[tid] = 0;
for(int i = 0; i < Iterations(); i++)
{
value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
if(value & bitMask) // bit should be set to 0
oldValues[tid]++;
value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
if(!(value & bitMask)) // bit should be set to 1
oldValues[tid]++;
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
for(cl_uint i = 0; i < threadCount; i++)
{
if(refValues[i] > 0)
{
log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
correct = false;
}
}
return true;
}
};
int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(DataType().MaxValue());
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
{
for(cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if(sizeof(HostDataType) >= 8)
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
}
return true;
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = StartValue();
for(cl_uint i = 0; i < threadCount; i++)
{
if(startRefValues[ i ] < expected)
expected = startRefValues[ i ];
}
return true;
}
};
int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(DataType().MinValue());
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return
" oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
{
for(cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if(sizeof(HostDataType) >= 8)
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
}
return true;
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = StartValue();
for(cl_uint i = 0; i < threadCount; i++)
{
if(startRefValues[ i ] > expected)
expected = startRefValues[ i ];
}
return true;
}
};
int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(0);
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
TExplicitMemoryOrderType MemoryOrderForClear()
{
// Memory ordering for atomic_flag_clear function
// ("shall not be memory_order_acquire nor memory_order_acq_rel")
if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
return MEMORY_ORDER_RELAXED;
if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
return MEMORY_ORDER_RELEASE;
return MemoryOrder();
}
std::string MemoryOrderScopeStrForClear()
{
std::string orderStr;
if (MemoryOrder() != MEMORY_ORDER_EMPTY)
orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
return orderStr + MemoryScopeStr();
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
// This test assumes support for the memory_scope_device scope in the case
// that LocalMemory() == false. Therefore we should skip this test in that
// configuration on a 3.0 driver since supporting the memory_scope_device
// scope is optionaly.
if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
{
if (!LocalMemory()
&& !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
{
log_info(
"Skipping atomic_flag test due to use of atomic_scope_device "
"which is optionally not supported on this device\n");
return 0; // skip test - not applicable
}
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::ExecuteSingleTest(deviceID,
context,
queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
std::string program =
" uint cnt, stop = 0;\n"
" for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
" {\n"
" bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
program += " atomic_work_item_fence(" +
std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
"memory_order_acquire," +
std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
");\n";
program +=
" if (!set)\n"
" {\n";
if (LocalMemory())
program += " uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
else
program += " uint csIndex = cnt;\n";
std::ostringstream csNotVisited;
csNotVisited << CRITICAL_SECTION_NOT_VISITED;
program +=
" // verify that thread is the first visitor\n"
" if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
" {\n"
" oldValues[csIndex] = tid; // set the winner id for this critical section\n"
" stop = 1;\n"
" }\n";
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
program += " atomic_work_item_fence(" +
std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
"memory_order_release," +
std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
");\n";
program +=
" atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
" }\n"
" }\n";
return program;
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
cl_uint cnt, stop = 0;
for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
{
if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
{
cl_uint csIndex = cnt;
// verify that thread is the first visitor\n"
if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
{
oldValues[csIndex] = tid; // set the winner id for this critical section\n"
stop = 1;
}
host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
}
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
{
expected = StartValue();
return true;
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
{
for(cl_uint i = 0 ; i < threadCount; i++)
startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
/* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
/* These values must be distributed across refValues array */
std::vector<bool> tidFound(threadCount);
cl_uint i;
for (i = 0; i < threadCount; i++)
{
cl_uint value = (cl_uint)refValues[i];
if (value == CRITICAL_SECTION_NOT_VISITED)
{
// Special initial value
log_error("ERROR: Critical section %u not visited\n", i);
correct = false;
return true;
}
if (value >= threadCount)
{
log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
correct = false;
return true;
}
if (tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n", value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
return error;
}
int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
}
int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
}
template<typename HostAtomicType, typename HostDataType>
class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
{
struct TestDefinition {
bool op1IsFence;
TExplicitMemoryOrderType op1MemOrder;
bool op2IsFence;
TExplicitMemoryOrderType op2MemOrder;
};
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{
StartValue(0);
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual cl_uint NumNonAtomicVariablesPerThread()
{
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
return 1;
if (LocalMemory())
{
if (gIsEmbedded)
{
if (CurrentGroupSize() > 1024)
CurrentGroupSize(1024);
return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread
}
else
return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
}
return 256;
}
virtual std::string SingleTestName()
{
std::string testName;
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
testName += "seq_cst fence, ";
else
testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
+ (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
+ std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
+ (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
return testName;
}
virtual bool SVMDataBufferAllSVMConsistent()
{
// Although memory_scope_all_devices doesn't mention SVM it is just an
// alias for memory_scope_all_svm_devices. So both scopes interact with
// SVM allocations, on devices that support those, just the same.
return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
|| MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
}
virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
int error = 0;
// execute 3 (maximum) sub cases for each memory order
for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
{
EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
}
return error;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
{
if(DeclaredInProgram() || UsedInFunction())
return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
return 0; //skip test - not applicable
if((UseSVM() || gHost)
&& LocalMemory())
return 0; // skip test - not applicable for SVM and local memory
struct TestDefinition acqTests[] = {
// {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
};
struct TestDefinition relTests[] = {
{ true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
};
struct TestDefinition arTests[] = {
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
{ true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
};
switch (MemoryOrder())
{
case MEMORY_ORDER_ACQUIRE:
if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
return 0;
_subCase = acqTests[_subCaseId];
break;
case MEMORY_ORDER_RELEASE:
if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
return 0;
_subCase = relTests[_subCaseId];
break;
case MEMORY_ORDER_ACQ_REL:
if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
return 0;
_subCase = arTests[_subCaseId];
break;
case MEMORY_ORDER_SEQ_CST:
if (_subCaseId != 0) // one special case only
return 0;
break;
default:
return 0;
}
LocalRefValues(LocalMemory());
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
}
virtual std::string ProgramHeader(cl_uint maxNumDestItems)
{
std::string header;
if(gOldAPI)
{
if(MemoryScope() == MEMORY_SCOPE_EMPTY)
{
header += "#define atomic_work_item_fence(x,y) mem_fence(x)\n";
}
else
{
header += "#define atomic_work_item_fence(x,y,z) mem_fence(x)\n";
}
}
return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
}
virtual std::string ProgramCore()
{
std::ostringstream naValues;
naValues << NumNonAtomicVariablesPerThread();
std::string program, fenceType, nonAtomic;
if (LocalMemory())
{
program = " size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
fenceType = "CLK_LOCAL_MEM_FENCE";
nonAtomic = "localValues";
}
else
{
program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
fenceType = "CLK_GLOBAL_MEM_FENCE";
nonAtomic = "oldValues";
}
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
// All threads are divided into pairs.
// Each thread has its own atomic variable and performs the following actions:
// - increments its own variable
// - performs fence operation to propagate its value and to see value from other thread
// - reads value from other thread's variable
// - repeats the above steps when both values are the same (and less than 1000000)
// - stores the last value read from other thread (in additional variable)
// At the end of execution at least one thread should know the last value from other thread
program += std::string("") +
" " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
" do {\n"
" myValue++;\n"
" atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
" atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
" hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
" } while(myValue == hisValue && myValue < 1000000);\n"
" " + nonAtomic + "[myId] = hisValue; \n";
}
else
{
// Each thread modifies one of its non-atomic variables, increments value of its atomic variable
// and reads values from another thread in typical synchronizes-with scenario with:
// - non-atomic variable (at index A) modification (value change from 0 to A)
// - release operation (additional fence or within atomic) + atomic variable modification (value A)
// - atomic variable read (value B) + acquire operation (additional fence or within atomic)
// - non-atomic variable (at index B) read (value C)
// Each thread verifies dependency between atomic and non-atomic value read from another thread
// The following condition must be true: B == C
program += std::string("") +
" " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
" do {\n"
" myValue++;\n"
" " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
if (_subCase.op1IsFence)
program += std::string("") +
" atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
" atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
else
program += std::string("") +
" atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
if (_subCase.op2IsFence)
program += std::string("") +
" hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
" atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
else
program += std::string("") +
" hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
program +=
" hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
if (LocalMemory())
program += " hisId = (hisId+1)%get_local_size(0);\n";
else
program += " hisId = (hisId+1)%threadCount;\n";
program +=
" } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
" if(hisAtomicValue != hisValue)\n"
" { // fail\n"
" atomic_store(&destMemory[myId], myValue-1);\n";
if (LocalMemory())
program += " hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
else
program += " hisId = (hisId+threadCount-1)%threadCount;\n";
program +=
" if(myValue+1 < " + naValues.str() + ")\n"
" " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
" if(myValue+2 < " + naValues.str() + ")\n"
" " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
" if(myValue+3 < " + naValues.str() + ")\n"
" " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
if (gDebug)
{
program +=
" printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
}
program +=
" }\n";
}
return program;
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
{
size_t myId = tid, hisId = threadCount - 1 - tid;
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
HostDataType myValue = 0, hisValue;
// CPU thread typically starts faster - wait for GPU thread
myValue++;
host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
do {
myValue++;
host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
host_atomic_thread_fence(MemoryOrder());
hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
} while (myValue == hisValue && hisValue < 1000000);
oldValues[tid] = hisValue;
}
else
{
HostDataType myValue = 0, hisAtomicValue, hisValue;
do {
myValue++;
oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
if (_subCase.op1IsFence)
{
host_atomic_thread_fence(_subCase.op1MemOrder);
host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
}
else
host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
if (_subCase.op2IsFence)
{
hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
host_atomic_thread_fence(_subCase.op2MemOrder);
}
else
hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
hisId = (hisId + 1) % threadCount;
} while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
if(hisAtomicValue != hisValue)
{ // fail
host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
if (gDebug)
{
hisId = (hisId + threadCount - 1) % threadCount;
printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
}
}
}
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
{
for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
startRefValues[i] = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
{
correct = true;
cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
{
if(workOffset+workSize > threadCount)
// last workgroup (host threads)
workSize = threadCount-workOffset;
for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
{
HostAtomicType myValue = finalValues[workOffset + i];
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
HostDataType hisValue = refValues[workOffset + i];
if (myValue == hisValue)
{
// a draw - both threads should reach final value 1000000
if (myValue != 1000000)
{
log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
correct = false;
return true;
}
}
else
{
//slower thread (in total order of seq_cst operations) must know last value written by faster thread
HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
// who is the winner? - thread with lower private counter value
if (myValue == hisRealValue) // forbidden result - fence doesn't work
{
log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
correct = false;
return true;
}
if (myValue > hisRealValue) // I'm slower
{
if (hisRealValue != hisValue)
{
log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
correct = false;
return true;
}
}
else // I'm faster
{
if (myValueReadByHim != myValue)
{
log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
correct = false;
return</