blob: 7281f9040a63ee06fb45333946928eca45b9b10c [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
static const char *astype_kernel_pattern =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
int numElements )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ));
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
// Run the kernel
threads[ 0 ] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get group size to run with" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
}
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
{
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
for( int i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, compSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
continue;
}
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
fflush( stdout );
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
vecSizes[outSizeIdx]*outTypeSize )
{
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
// as_type3(vec4) allowed, as_type4(vec3) not allowed
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
}
}
}
return error;
}