| // |
| // Copyright (c) 2021 The Khronos Group Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #include "test_common.h" |
| |
| |
| cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) { |
| cl_sampler sampler = nullptr; |
| if (test_mipmaps) { |
| cl_sampler_properties properties[] = { |
| CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords, |
| CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode, |
| CL_SAMPLER_FILTER_MODE, sdata->filter_mode, |
| CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode, |
| 0}; |
| sampler = clCreateSamplerWithProperties(context, properties, error); |
| } else { |
| sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error); |
| } |
| return sampler; |
| } |
| |
| void InitFloatCoordsCommon(image_descriptor *imageInfo, |
| image_sampler_data *imageSampler, float *xOffsets, |
| float *yOffsets, float *zOffsets, float xfract, |
| float yfract, float zfract, int normalized_coords, |
| MTdata d, int lod) |
| { |
| size_t i = 0; |
| if (gDisableOffsets) |
| { |
| for (size_t z = 0; z < imageInfo->depth; z++) |
| { |
| for (size_t y = 0; y < imageInfo->height; y++) |
| { |
| for (size_t x = 0; x < imageInfo->width; x++, i++) |
| { |
| xOffsets[i] = (float)(xfract + (double)x); |
| yOffsets[i] = (float)(yfract + (double)y); |
| zOffsets[i] = (float)(zfract + (double)z); |
| } |
| } |
| } |
| } |
| else |
| { |
| for (size_t z = 0; z < imageInfo->depth; z++) |
| { |
| for (size_t y = 0; y < imageInfo->height; y++) |
| { |
| for (size_t x = 0; x < imageInfo->width; x++, i++) |
| { |
| xOffsets[i] = |
| (float)(xfract |
| + (double)((int)x |
| + random_in_range(-10, 10, d))); |
| yOffsets[i] = |
| (float)(yfract |
| + (double)((int)y |
| + random_in_range(-10, 10, d))); |
| zOffsets[i] = |
| (float)(zfract |
| + (double)((int)z |
| + random_in_range(-10, 10, d))); |
| } |
| } |
| } |
| } |
| |
| if (imageSampler->addressing_mode == CL_ADDRESS_NONE) |
| { |
| i = 0; |
| for (size_t z = 0; z < imageInfo->depth; z++) |
| { |
| for (size_t y = 0; y < imageInfo->height; y++) |
| { |
| for (size_t x = 0; x < imageInfo->width; x++, i++) |
| { |
| xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0, |
| (double)imageInfo->width - 1.0); |
| yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0, |
| (double)imageInfo->height - 1.0); |
| zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0, |
| (double)imageInfo->depth - 1.0); |
| } |
| } |
| } |
| } |
| |
| if (normalized_coords || gTestMipmaps) |
| { |
| i = 0; |
| if (lod == 0) |
| { |
| for (size_t z = 0; z < imageInfo->depth; z++) |
| { |
| for (size_t y = 0; y < imageInfo->height; y++) |
| { |
| for (size_t x = 0; x < imageInfo->width; x++, i++) |
| { |
| xOffsets[i] = (float)((double)xOffsets[i] |
| / (double)imageInfo->width); |
| yOffsets[i] = (float)((double)yOffsets[i] |
| / (double)imageInfo->height); |
| zOffsets[i] = (float)((double)zOffsets[i] |
| / (double)imageInfo->depth); |
| } |
| } |
| } |
| } |
| else if (gTestMipmaps) |
| { |
| size_t width_lod, height_lod, depth_lod; |
| |
| width_lod = |
| (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; |
| height_lod = |
| (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1; |
| depth_lod = |
| (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1; |
| |
| for (size_t z = 0; z < depth_lod; z++) |
| { |
| for (size_t y = 0; y < height_lod; y++) |
| { |
| for (size_t x = 0; x < width_lod; x++, i++) |
| { |
| xOffsets[i] = |
| (float)((double)xOffsets[i] / (double)width_lod); |
| yOffsets[i] = |
| (float)((double)yOffsets[i] / (double)height_lod); |
| zOffsets[i] = |
| (float)((double)zOffsets[i] / (double)depth_lod); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| int test_read_image(cl_context context, cl_command_queue queue, |
| cl_kernel kernel, image_descriptor *imageInfo, |
| image_sampler_data *imageSampler, bool useFloatCoords, |
| ExplicitType outputType, MTdata d) |
| { |
| int error; |
| size_t threads[3]; |
| static int initHalf = 0; |
| |
| cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; |
| |
| clMemWrapper xOffsets, yOffsets, zOffsets, results; |
| clSamplerWrapper actualSampler; |
| BufferOwningPtr<char> maxImageUseHostPtrBackingStore; |
| |
| // Create offset data |
| BufferOwningPtr<cl_float> xOffsetValues( |
| malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height |
| * imageInfo->depth)); |
| BufferOwningPtr<cl_float> yOffsetValues( |
| malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height |
| * imageInfo->depth)); |
| BufferOwningPtr<cl_float> zOffsetValues( |
| malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height |
| * imageInfo->depth)); |
| |
| if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT) |
| if (DetectFloatToHalfRoundingMode(queue)) return 1; |
| |
| BufferOwningPtr<char> imageValues; |
| generate_random_image_data(imageInfo, imageValues, d); |
| |
| // Construct testing sources |
| clProtectedImage protImage; |
| clMemWrapper unprotImage; |
| cl_mem image; |
| |
| if (gtestTypesToRun & kReadTests) |
| { |
| image_read_write_flags = CL_MEM_READ_ONLY; |
| } |
| else |
| { |
| image_read_write_flags = CL_MEM_READ_WRITE; |
| } |
| |
| if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR) |
| { |
| // clProtectedImage uses USE_HOST_PTR, so just rely on that for the |
| // testing (via Ian) Do not use protected images for max image size test |
| // since it rounds the row size to a page size |
| if (gTestMaxImages) |
| { |
| generate_random_image_data(imageInfo, |
| maxImageUseHostPtrBackingStore, d); |
| unprotImage = create_image_3d( |
| context, image_read_write_flags | CL_MEM_USE_HOST_PTR, |
| imageInfo->format, imageInfo->width, imageInfo->height, |
| imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), |
| (gEnablePitch ? imageInfo->slicePitch : 0), |
| maxImageUseHostPtrBackingStore, &error); |
| } |
| else |
| { |
| error = protImage.Create(context, image_read_write_flags, |
| imageInfo->format, imageInfo->width, |
| imageInfo->height, imageInfo->depth); |
| } |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to create 3D image of size %d x %d x %d " |
| "(pitch %d, %d ) (%s)", |
| (int)imageInfo->width, (int)imageInfo->height, |
| (int)imageInfo->depth, (int)imageInfo->rowPitch, |
| (int)imageInfo->slicePitch, IGetErrorString(error)); |
| return error; |
| } |
| if (gTestMaxImages) |
| image = (cl_mem)unprotImage; |
| else |
| image = (cl_mem)protImage; |
| } |
| else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR) |
| { |
| // Don't use clEnqueueWriteImage; just use copy host ptr to get the data |
| // in |
| unprotImage = create_image_3d( |
| context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, |
| imageInfo->format, imageInfo->width, imageInfo->height, |
| imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), |
| (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to create 3D image of size %d x %d x %d " |
| "(pitch %d, %d ) (%s)", |
| (int)imageInfo->width, (int)imageInfo->height, |
| (int)imageInfo->depth, (int)imageInfo->rowPitch, |
| (int)imageInfo->slicePitch, IGetErrorString(error)); |
| return error; |
| } |
| image = unprotImage; |
| } |
| else // Either CL_MEM_ALLOC_HOST_PTR or none |
| { |
| // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can |
| // be accessed by the host, but otherwise it works just as if no flag is |
| // specified, so we just do the same thing either way |
| if (!gTestMipmaps) |
| { |
| unprotImage = create_image_3d( |
| context, image_read_write_flags | gMemFlagsToUse, |
| imageInfo->format, imageInfo->width, imageInfo->height, |
| imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0), |
| (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, |
| &error); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to create 3D image of size %d x %d x " |
| "%d (pitch %d, %d ) (%s)", |
| (int)imageInfo->width, (int)imageInfo->height, |
| (int)imageInfo->depth, (int)imageInfo->rowPitch, |
| (int)imageInfo->slicePitch, IGetErrorString(error)); |
| return error; |
| } |
| image = unprotImage; |
| } |
| else |
| { |
| cl_image_desc image_desc = { 0 }; |
| image_desc.image_type = CL_MEM_OBJECT_IMAGE3D; |
| image_desc.image_width = imageInfo->width; |
| image_desc.image_height = imageInfo->height; |
| image_desc.image_depth = imageInfo->depth; |
| image_desc.num_mip_levels = imageInfo->num_mip_levels; |
| |
| |
| unprotImage = |
| clCreateImage(context, image_read_write_flags, |
| imageInfo->format, &image_desc, NULL, &error); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to create %d level mipmapped 3D image " |
| "of size %d x %d x %d (pitch %d, %d ) (%s)", |
| (int)imageInfo->num_mip_levels, (int)imageInfo->width, |
| (int)imageInfo->height, (int)imageInfo->depth, |
| (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, |
| IGetErrorString(error)); |
| return error; |
| } |
| image = unprotImage; |
| } |
| } |
| |
| if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR) |
| { |
| size_t origin[4] = { 0, 0, 0, 0 }; |
| size_t region[3] = { imageInfo->width, imageInfo->height, |
| imageInfo->depth }; |
| |
| if (gDebugTrace) log_info(" - Writing image...\n"); |
| |
| if (!gTestMipmaps) |
| { |
| |
| error = |
| clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, |
| gEnablePitch ? imageInfo->rowPitch : 0, |
| gEnablePitch ? imageInfo->slicePitch : 0, |
| imageValues, 0, NULL, NULL); |
| |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to write to 3D image of size %d x %d " |
| "x %d \n", |
| (int)imageInfo->width, (int)imageInfo->height, |
| (int)imageInfo->depth); |
| return error; |
| } |
| } |
| else |
| { |
| int nextLevelOffset = 0; |
| |
| for (int i = 0; i < imageInfo->num_mip_levels; i++) |
| { |
| origin[3] = i; |
| error = clEnqueueWriteImage( |
| queue, image, CL_TRUE, origin, region, |
| /*gEnablePitch ? imageInfo->rowPitch :*/ 0, |
| /*gEnablePitch ? imageInfo->slicePitch :*/ 0, |
| ((char *)imageValues + nextLevelOffset), 0, NULL, NULL); |
| if (error != CL_SUCCESS) |
| { |
| log_error("ERROR: Unable to write to %d level mipmapped 3D " |
| "image of size %d x %d x %d\n", |
| (int)imageInfo->num_mip_levels, |
| (int)imageInfo->width, (int)imageInfo->height, |
| (int)imageInfo->depth); |
| return error; |
| } |
| nextLevelOffset += region[0] * region[1] * region[2] |
| * get_pixel_size(imageInfo->format); |
| // Subsequent mip level dimensions keep halving |
| region[0] = region[0] >> 1 ? region[0] >> 1 : 1; |
| region[1] = region[1] >> 1 ? region[1] >> 1 : 1; |
| region[2] = region[2] >> 1 ? region[2] >> 1 : 1; |
| } |
| } |
| } |
| |
| xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, |
| sizeof(cl_float) * imageInfo->width |
| * imageInfo->height * imageInfo->depth, |
| xOffsetValues, &error); |
| test_error(error, "Unable to create x offset buffer"); |
| yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, |
| sizeof(cl_float) * imageInfo->width |
| * imageInfo->height * imageInfo->depth, |
| yOffsetValues, &error); |
| test_error(error, "Unable to create y offset buffer"); |
| zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, |
| sizeof(cl_float) * imageInfo->width |
| * imageInfo->height * imageInfo->depth, |
| zOffsetValues, &error); |
| test_error(error, "Unable to create y offset buffer"); |
| results = |
| clCreateBuffer(context, CL_MEM_READ_WRITE, |
| get_explicit_type_size(outputType) * 4 * imageInfo->width |
| * imageInfo->height * imageInfo->depth, |
| NULL, &error); |
| test_error(error, "Unable to create result buffer"); |
| |
| // Create sampler to use |
| actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); |
| test_error(error, "Unable to create image sampler"); |
| |
| // Set arguments |
| int idx = 0; |
| error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image); |
| test_error(error, "Unable to set kernel arguments"); |
| if (!gUseKernelSamplers) |
| { |
| error = |
| clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler); |
| test_error(error, "Unable to set kernel arguments"); |
| } |
| error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets); |
| test_error(error, "Unable to set kernel arguments"); |
| error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets); |
| test_error(error, "Unable to set kernel arguments"); |
| error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets); |
| test_error(error, "Unable to set kernel arguments"); |
| error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results); |
| test_error(error, "Unable to set kernel arguments"); |
| |
| const float float_offsets[] = { 0.0f, |
| MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), |
| 0.25f, |
| 0.3f, |
| 0.5f - FLT_EPSILON / 4.0f, |
| 0.5f, |
| 0.9f, |
| 1.0f - FLT_EPSILON / 2 }; |
| int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]); |
| int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; |
| int loopCount = 2 * float_offset_count; |
| if (!useFloatCoords) loopCount = 1; |
| if (gTestMaxImages) |
| { |
| loopCount = 1; |
| log_info("Testing each size only once with pixel offsets of %g for max " |
| "sized images.\n", |
| float_offsets[0]); |
| } |
| |
| // Get the maximum absolute error for this format |
| double formatAbsoluteError = |
| get_max_absolute_error(imageInfo->format, imageSampler); |
| if (gDebugTrace) |
| log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); |
| |
| if (0 == initHalf |
| && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT) |
| { |
| initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue); |
| if (initHalf) |
| { |
| log_info("Half rounding mode successfully detected.\n"); |
| } |
| } |
| |
| int nextLevelOffset = 0; |
| size_t width_lod = imageInfo->width, height_lod = imageInfo->height, |
| depth_lod = imageInfo->depth; |
| |
| // Loop over all mipmap levels, if we are testing mipmapped images. |
| for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) |
| || (!gTestMipmaps && lod < 1); |
| lod++) |
| { |
| size_t resultValuesSize = width_lod * height_lod * depth_lod |
| * get_explicit_type_size(outputType) * 4; |
| BufferOwningPtr<char> resultValues(malloc(resultValuesSize)); |
| float lod_float = (float)lod; |
| if (gTestMipmaps) |
| { |
| // Set the lod kernel arg |
| if (gDebugTrace) log_info(" - Working at mip level %d\n", lod); |
| error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float); |
| test_error(error, "Unable to set kernel arguments"); |
| } |
| |
| for (int q = 0; q < loopCount; q++) |
| { |
| float offset = float_offsets[q % float_offset_count]; |
| |
| // Init the coordinates |
| InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues, |
| yOffsetValues, zOffsetValues, |
| q >= float_offset_count ? -offset : offset, |
| q >= float_offset_count ? offset : -offset, |
| q >= float_offset_count ? -offset : offset, |
| imageSampler->normalized_coords, d, lod); |
| |
| error = |
| clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0, |
| sizeof(cl_float) * imageInfo->height |
| * imageInfo->width * imageInfo->depth, |
| xOffsetValues, 0, NULL, NULL); |
| test_error(error, "Unable to write x offsets"); |
| error = |
| clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, |
| sizeof(cl_float) * imageInfo->height |
| * imageInfo->width * imageInfo->depth, |
| yOffsetValues, 0, NULL, NULL); |
| test_error(error, "Unable to write y offsets"); |
| error = |
| clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, |
| sizeof(cl_float) * imageInfo->height |
| * imageInfo->width * imageInfo->depth, |
| zOffsetValues, 0, NULL, NULL); |
| test_error(error, "Unable to write z offsets"); |
| |
| |
| memset(resultValues, 0xff, resultValuesSize); |
| clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize, |
| resultValues, 0, NULL, NULL); |
| |
| // Figure out thread dimensions |
| threads[0] = (size_t)width_lod; |
| threads[1] = (size_t)height_lod; |
| threads[2] = (size_t)depth_lod; |
| |
| // Run the kernel |
| error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, |
| NULL, 0, NULL, NULL); |
| test_error(error, "Unable to run kernel"); |
| |
| // Get results |
| error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, |
| width_lod * height_lod * depth_lod |
| * get_explicit_type_size(outputType) |
| * 4, |
| resultValues, 0, NULL, NULL); |
| test_error(error, "Unable to read results from kernel"); |
| if (gDebugTrace) log_info(" results read\n"); |
| |
| // Validate results element by element |
| char *imagePtr = (char *)imageValues + nextLevelOffset; |
| /* |
| * FLOAT output type |
| */ |
| if (is_sRGBA_order(imageInfo->format->image_channel_order) |
| && (outputType == kFloat)) |
| { |
| // Validate float results |
| float *resultPtr = (float *)(char *)resultValues; |
| float expected[4], error = 0.0f; |
| float maxErr = get_max_relative_error( |
| imageInfo->format, imageSampler, 1 /*3D*/, |
| CL_FILTER_LINEAR == imageSampler->filter_mode); |
| |
| for (size_t z = 0, j = 0; z < depth_lod; z++) |
| { |
| for (size_t y = 0; y < height_lod; y++) |
| { |
| for (size_t x = 0; x < width_lod; x++, j++) |
| { |
| // Step 1: go through and see if the results verify |
| // for the pixel For the normalized case on a GPU we |
| // put in offsets to the X, Y and Z to see if we |
| // land on the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization in |
| // OpenCL 1.0. |
| int checkOnlyOnePixel = 0; |
| int found_pixel = 0; |
| float offset = NORM_OFFSET; |
| if (!imageSampler->normalized_coords |
| || imageSampler->filter_mode |
| != CL_FILTER_NEAREST |
| || NORM_OFFSET == 0 |
| #if defined(__APPLE__) |
| // Apple requires its CPU implementation to do |
| // correctly rounded address arithmetic in all |
| // modes |
| || gDeviceType != CL_DEVICE_TYPE_GPU |
| #endif |
| ) |
| offset = 0.0f; // Loop only once |
| |
| for (float norm_offset_x = -offset; |
| norm_offset_x <= offset && !found_pixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -offset; |
| norm_offset_y <= offset && !found_pixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -offset; |
| norm_offset_z <= NORM_OFFSET |
| && !found_pixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| int hasDenormals = 0; |
| FloatPixel maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, 0, |
| &hasDenormals, lod); |
| |
| float err1 = |
| ABS_ERROR(sRGBmap(resultPtr[0]), |
| sRGBmap(expected[0])); |
| float err2 = |
| ABS_ERROR(sRGBmap(resultPtr[1]), |
| sRGBmap(expected[1])); |
| float err3 = |
| ABS_ERROR(sRGBmap(resultPtr[2]), |
| sRGBmap(expected[2])); |
| float err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| // Clamp to the minimum absolute error |
| // for the format |
| if (err1 > 0 |
| && err1 < formatAbsoluteError) |
| { |
| err1 = 0.0f; |
| } |
| if (err2 > 0 |
| && err2 < formatAbsoluteError) |
| { |
| err2 = 0.0f; |
| } |
| if (err3 > 0 |
| && err3 < formatAbsoluteError) |
| { |
| err3 = 0.0f; |
| } |
| if (err4 > 0 |
| && err4 < formatAbsoluteError) |
| { |
| err4 = 0.0f; |
| } |
| float maxErr = 0.5; |
| |
| if (!(err1 <= maxErr) |
| || !(err2 <= maxErr) |
| || !(err3 <= maxErr) |
| || !(err4 <= maxErr)) |
| { |
| // Try flushing the denormals |
| if (hasDenormals) |
| { |
| // If implementation decide to |
| // flush subnormals to zero, max |
| // error needs to be adjusted |
| maxErr += 4 * FLT_MIN; |
| |
| maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, |
| imageSampler, expected, |
| 0, NULL, lod); |
| |
| err1 = ABS_ERROR( |
| sRGBmap(resultPtr[0]), |
| sRGBmap(expected[0])); |
| err2 = ABS_ERROR( |
| sRGBmap(resultPtr[1]), |
| sRGBmap(expected[1])); |
| err3 = ABS_ERROR( |
| sRGBmap(resultPtr[2]), |
| sRGBmap(expected[2])); |
| err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| } |
| } |
| |
| found_pixel = (err1 <= maxErr) |
| && (err2 <= maxErr) |
| && (err3 <= maxErr) |
| && (err4 <= maxErr); |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| |
| // Step 2: If we did not find a match, then print |
| // out debugging info. |
| if (!found_pixel) |
| { |
| // For the normalized case on a GPU we put in |
| // offsets to the X and Y to see if we land on |
| // the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization |
| // in OpenCL 1.0. |
| checkOnlyOnePixel = 0; |
| int shouldReturn = 0; |
| for (float norm_offset_x = -offset; |
| norm_offset_x <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -offset; |
| norm_offset_y <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -offset; |
| norm_offset_z <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| int hasDenormals = 0; |
| FloatPixel maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, imageSampler, |
| expected, 0, &hasDenormals, |
| lod); |
| |
| float err1 = |
| ABS_ERROR(sRGBmap(resultPtr[0]), |
| sRGBmap(expected[0])); |
| float err2 = |
| ABS_ERROR(sRGBmap(resultPtr[1]), |
| sRGBmap(expected[1])); |
| float err3 = |
| ABS_ERROR(sRGBmap(resultPtr[2]), |
| sRGBmap(expected[2])); |
| float err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| float maxErr = 0.6; |
| |
| if (!(err1 <= maxErr) |
| || !(err2 <= maxErr) |
| || !(err3 <= maxErr) |
| || !(err4 <= maxErr)) |
| { |
| // Try flushing the denormals |
| if (hasDenormals) |
| { |
| // If implementation decide |
| // to flush subnormals to |
| // zero, max error needs to |
| // be adjusted |
| maxErr += 4 * FLT_MIN; |
| |
| maxPixel = |
| sample_image_pixel_float( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| imageSampler, |
| expected, 0, NULL, |
| lod); |
| |
| err1 = ABS_ERROR( |
| sRGBmap(resultPtr[0]), |
| sRGBmap(expected[0])); |
| err2 = ABS_ERROR( |
| sRGBmap(resultPtr[1]), |
| sRGBmap(expected[1])); |
| err3 = ABS_ERROR( |
| sRGBmap(resultPtr[2]), |
| sRGBmap(expected[2])); |
| err4 = |
| ABS_ERROR(resultPtr[3], |
| expected[3]); |
| } |
| } |
| |
| if (!(err1 <= maxErr) |
| || !(err2 <= maxErr) |
| || !(err3 <= maxErr) |
| || !(err4 <= maxErr)) |
| { |
| log_error( |
| "FAILED norm_offsets: %g , " |
| "%g , %g:\n", |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z); |
| |
| float tempOut[4]; |
| shouldReturn |= |
| determine_validation_error_offset< |
| float>( |
| imagePtr, imageInfo, |
| imageSampler, resultPtr, |
| expected, error, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, j, |
| numTries, numClamped, |
| true, lod); |
| log_error("Step by step:\n"); |
| FloatPixel temp = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, |
| imageSampler, tempOut, |
| 1 /*verbose*/, |
| &hasDenormals, lod); |
| log_error( |
| "\tulps: %2.2f, %2.2f, " |
| "%2.2f, %2.2f (max " |
| "allowed: %2.2f)\n\n", |
| Ulp_Error(resultPtr[0], |
| expected[0]), |
| Ulp_Error(resultPtr[1], |
| expected[1]), |
| Ulp_Error(resultPtr[2], |
| expected[2]), |
| Ulp_Error(resultPtr[3], |
| expected[3]), |
| Ulp_Error( |
| MAKE_HEX_FLOAT( |
| 0x1.000002p0f, |
| 0x1000002L, -24) |
| + maxErr, |
| MAKE_HEX_FLOAT( |
| 0x1.000002p0f, |
| 0x1000002L, -24))); |
| } |
| else |
| { |
| log_error( |
| "Test error: we should " |
| "have detected this " |
| "passing above.\n"); |
| } |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| if (shouldReturn) return 1; |
| } // if (!found_pixel) |
| |
| resultPtr += 4; |
| } |
| } |
| } |
| } |
| /* |
| * FLOAT output type |
| */ |
| else if (outputType == kFloat) |
| { |
| // Validate float results |
| float *resultPtr = (float *)(char *)resultValues; |
| float expected[4], error = 0.0f; |
| float maxErr = get_max_relative_error( |
| imageInfo->format, imageSampler, 1 /*3D*/, |
| CL_FILTER_LINEAR == imageSampler->filter_mode); |
| |
| for (size_t z = 0, j = 0; z < depth_lod; z++) |
| { |
| for (size_t y = 0; y < height_lod; y++) |
| { |
| for (size_t x = 0; x < width_lod; x++, j++) |
| { |
| // Step 1: go through and see if the results verify |
| // for the pixel For the normalized case on a GPU we |
| // put in offsets to the X, Y and Z to see if we |
| // land on the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization in |
| // OpenCL 1.0. |
| int checkOnlyOnePixel = 0; |
| int found_pixel = 0; |
| float offset = NORM_OFFSET; |
| if (!imageSampler->normalized_coords |
| || imageSampler->filter_mode |
| != CL_FILTER_NEAREST |
| || NORM_OFFSET == 0 |
| #if defined(__APPLE__) |
| // Apple requires its CPU implementation to do |
| // correctly rounded address arithmetic in all |
| // modes |
| || gDeviceType != CL_DEVICE_TYPE_GPU |
| #endif |
| ) |
| offset = 0.0f; // Loop only once |
| |
| for (float norm_offset_x = -offset; |
| norm_offset_x <= offset && !found_pixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -offset; |
| norm_offset_y <= offset && !found_pixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -offset; |
| norm_offset_z <= NORM_OFFSET |
| && !found_pixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| int hasDenormals = 0; |
| FloatPixel maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, 0, |
| &hasDenormals, lod); |
| |
| float err1 = ABS_ERROR(resultPtr[0], |
| expected[0]); |
| float err2 = ABS_ERROR(resultPtr[1], |
| expected[1]); |
| float err3 = ABS_ERROR(resultPtr[2], |
| expected[2]); |
| float err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| // Clamp to the minimum absolute error |
| // for the format |
| if (err1 > 0 |
| && err1 < formatAbsoluteError) |
| { |
| err1 = 0.0f; |
| } |
| if (err2 > 0 |
| && err2 < formatAbsoluteError) |
| { |
| err2 = 0.0f; |
| } |
| if (err3 > 0 |
| && err3 < formatAbsoluteError) |
| { |
| err3 = 0.0f; |
| } |
| if (err4 > 0 |
| && err4 < formatAbsoluteError) |
| { |
| err4 = 0.0f; |
| } |
| float maxErr1 = MAX( |
| maxErr * maxPixel.p[0], FLT_MIN); |
| float maxErr2 = MAX( |
| maxErr * maxPixel.p[1], FLT_MIN); |
| float maxErr3 = MAX( |
| maxErr * maxPixel.p[2], FLT_MIN); |
| float maxErr4 = MAX( |
| maxErr * maxPixel.p[3], FLT_MIN); |
| |
| if (!(err1 <= maxErr1) |
| || !(err2 <= maxErr2) |
| || !(err3 <= maxErr3) |
| || !(err4 <= maxErr4)) |
| { |
| // Try flushing the denormals |
| if (hasDenormals) |
| { |
| // If implementation decide to |
| // flush subnormals to zero, max |
| // error needs to be adjusted |
| maxErr1 += 4 * FLT_MIN; |
| maxErr2 += 4 * FLT_MIN; |
| maxErr3 += 4 * FLT_MIN; |
| maxErr4 += 4 * FLT_MIN; |
| |
| maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, |
| imageSampler, expected, |
| 0, NULL, lod); |
| |
| err1 = ABS_ERROR(resultPtr[0], |
| expected[0]); |
| err2 = ABS_ERROR(resultPtr[1], |
| expected[1]); |
| err3 = ABS_ERROR(resultPtr[2], |
| expected[2]); |
| err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| } |
| } |
| |
| found_pixel = (err1 <= maxErr1) |
| && (err2 <= maxErr2) |
| && (err3 <= maxErr3) |
| && (err4 <= maxErr4); |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| |
| // Step 2: If we did not find a match, then print |
| // out debugging info. |
| if (!found_pixel) |
| { |
| // For the normalized case on a GPU we put in |
| // offsets to the X and Y to see if we land on |
| // the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization |
| // in OpenCL 1.0. |
| checkOnlyOnePixel = 0; |
| int shouldReturn = 0; |
| for (float norm_offset_x = -offset; |
| norm_offset_x <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -offset; |
| norm_offset_y <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -offset; |
| norm_offset_z <= offset |
| && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| int hasDenormals = 0; |
| FloatPixel maxPixel = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, imageSampler, |
| expected, 0, &hasDenormals, |
| lod); |
| |
| float err1 = ABS_ERROR(resultPtr[0], |
| expected[0]); |
| float err2 = ABS_ERROR(resultPtr[1], |
| expected[1]); |
| float err3 = ABS_ERROR(resultPtr[2], |
| expected[2]); |
| float err4 = ABS_ERROR(resultPtr[3], |
| expected[3]); |
| float maxErr1 = |
| MAX(maxErr * maxPixel.p[0], |
| FLT_MIN); |
| float maxErr2 = |
| MAX(maxErr * maxPixel.p[1], |
| FLT_MIN); |
| float maxErr3 = |
| MAX(maxErr * maxPixel.p[2], |
| FLT_MIN); |
| float maxErr4 = |
| MAX(maxErr * maxPixel.p[3], |
| FLT_MIN); |
| |
| |
| if (!(err1 <= maxErr1) |
| || !(err2 <= maxErr2) |
| || !(err3 <= maxErr3) |
| || !(err4 <= maxErr4)) |
| { |
| // Try flushing the denormals |
| if (hasDenormals) |
| { |
| maxErr1 += 4 * FLT_MIN; |
| maxErr2 += 4 * FLT_MIN; |
| maxErr3 += 4 * FLT_MIN; |
| maxErr4 += 4 * FLT_MIN; |
| |
| maxPixel = |
| sample_image_pixel_float( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| imageSampler, |
| expected, 0, NULL, |
| lod); |
| |
| err1 = |
| ABS_ERROR(resultPtr[0], |
| expected[0]); |
| err2 = |
| ABS_ERROR(resultPtr[1], |
| expected[1]); |
| err3 = |
| ABS_ERROR(resultPtr[2], |
| expected[2]); |
| err4 = |
| ABS_ERROR(resultPtr[3], |
| expected[3]); |
| } |
| } |
| |
| if (!(err1 <= maxErr1) |
| || !(err2 <= maxErr2) |
| || !(err3 <= maxErr3) |
| || !(err4 <= maxErr4)) |
| { |
| log_error( |
| "FAILED norm_offsets: %g , " |
| "%g , %g:\n", |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z); |
| |
| float tempOut[4]; |
| shouldReturn |= |
| determine_validation_error_offset< |
| float>( |
| imagePtr, imageInfo, |
| imageSampler, resultPtr, |
| expected, error, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, j, |
| numTries, numClamped, |
| true, lod); |
| log_error("Step by step:\n"); |
| FloatPixel temp = |
| sample_image_pixel_float_offset( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, |
| imageSampler, tempOut, |
| 1 /*verbose*/, |
| &hasDenormals, lod); |
| log_error( |
| "\tulps: %2.2f, %2.2f, " |
| "%2.2f, %2.2f (max " |
| "allowed: %2.2f)\n\n", |
| Ulp_Error(resultPtr[0], |
| expected[0]), |
| Ulp_Error(resultPtr[1], |
| expected[1]), |
| Ulp_Error(resultPtr[2], |
| expected[2]), |
| Ulp_Error(resultPtr[3], |
| expected[3]), |
| Ulp_Error( |
| MAKE_HEX_FLOAT( |
| 0x1.000002p0f, |
| 0x1000002L, -24) |
| + maxErr, |
| MAKE_HEX_FLOAT( |
| 0x1.000002p0f, |
| 0x1000002L, -24))); |
| } |
| else |
| { |
| log_error( |
| "Test error: we should " |
| "have detected this " |
| "passing above.\n"); |
| } |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| if (shouldReturn) return 1; |
| } // if (!found_pixel) |
| |
| resultPtr += 4; |
| } |
| } |
| } |
| } |
| /* |
| * UINT output type |
| */ |
| else if (outputType == kUInt) |
| { |
| // Validate unsigned integer results |
| unsigned int *resultPtr = (unsigned int *)(char *)resultValues; |
| unsigned int expected[4]; |
| float error; |
| for (size_t z = 0, j = 0; z < depth_lod; z++) |
| { |
| for (size_t y = 0; y < height_lod; y++) |
| { |
| for (size_t x = 0; x < width_lod; x++, j++) |
| { |
| // Step 1: go through and see if the results verify |
| // for the pixel For the normalized case on a GPU we |
| // put in offsets to the X, Y and Z to see if we |
| // land on the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization in |
| // OpenCL 1.0. |
| int checkOnlyOnePixel = 0; |
| int found_pixel = 0; |
| for (float norm_offset_x = -NORM_OFFSET; |
| norm_offset_x <= NORM_OFFSET && !found_pixel |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -NORM_OFFSET; |
| norm_offset_y <= NORM_OFFSET |
| && !found_pixel && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -NORM_OFFSET; |
| norm_offset_z <= NORM_OFFSET |
| && !found_pixel && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| // If we are not on a GPU, or we are not |
| // normalized, then only test with |
| // offsets (0.0, 0.0) E.g., test one |
| // pixel. |
| if (!imageSampler->normalized_coords |
| || gDeviceType != CL_DEVICE_TYPE_GPU |
| || NORM_OFFSET == 0) |
| { |
| norm_offset_x = 0.0f; |
| norm_offset_y = 0.0f; |
| norm_offset_z = 0.0f; |
| checkOnlyOnePixel = 1; |
| } |
| |
| sample_image_pixel_offset<unsigned int>( |
| imagePtr, imageInfo, |
| xOffsetValues[j], yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, lod); |
| |
| error = errMax( |
| errMax(abs_diff_uint(expected[0], |
| resultPtr[0]), |
| abs_diff_uint(expected[1], |
| resultPtr[1])), |
| errMax( |
| abs_diff_uint(expected[2], |
| resultPtr[2]), |
| abs_diff_uint(expected[3], |
| resultPtr[3]))); |
| |
| if (error < MAX_ERR) found_pixel = 1; |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| |
| // Step 2: If we did not find a match, then print |
| // out debugging info. |
| if (!found_pixel) |
| { |
| // For the normalized case on a GPU we put in |
| // offsets to the X and Y to see if we land on |
| // the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization |
| // in OpenCL 1.0. |
| checkOnlyOnePixel = 0; |
| int shouldReturn = 0; |
| for (float norm_offset_x = -NORM_OFFSET; |
| norm_offset_x <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -NORM_OFFSET; |
| norm_offset_y <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -NORM_OFFSET; |
| norm_offset_z <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| // If we are not on a GPU, or we are |
| // not normalized, then only test |
| // with offsets (0.0, 0.0) E.g., |
| // test one pixel. |
| if (!imageSampler->normalized_coords |
| || gDeviceType |
| != CL_DEVICE_TYPE_GPU |
| || NORM_OFFSET == 0) |
| { |
| norm_offset_x = 0.0f; |
| norm_offset_y = 0.0f; |
| norm_offset_z = 0.0f; |
| checkOnlyOnePixel = 1; |
| } |
| |
| sample_image_pixel_offset< |
| unsigned int>( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, lod); |
| |
| error = errMax( |
| errMax( |
| abs_diff_uint(expected[0], |
| resultPtr[0]), |
| abs_diff_uint( |
| expected[1], |
| resultPtr[1])), |
| errMax( |
| abs_diff_uint(expected[2], |
| resultPtr[2]), |
| abs_diff_uint( |
| expected[3], |
| resultPtr[3]))); |
| |
| if (error > MAX_ERR) |
| { |
| log_error( |
| "FAILED norm_offsets: %g , " |
| "%g , %g:\n", |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z); |
| shouldReturn |= |
| determine_validation_error_offset< |
| unsigned int>( |
| imagePtr, imageInfo, |
| imageSampler, resultPtr, |
| expected, error, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, j, |
| numTries, numClamped, |
| false, lod); |
| } |
| else |
| { |
| log_error( |
| "Test error: we should " |
| "have detected this " |
| "passing above.\n"); |
| } |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| if (shouldReturn) return 1; |
| } // if (!found_pixel) |
| |
| resultPtr += 4; |
| } |
| } |
| } |
| } |
| else |
| /* |
| * INT output type |
| */ |
| { |
| // Validate integer results |
| int *resultPtr = (int *)(char *)resultValues; |
| int expected[4]; |
| float error; |
| for (size_t z = 0, j = 0; z < depth_lod; z++) |
| { |
| for (size_t y = 0; y < height_lod; y++) |
| { |
| for (size_t x = 0; x < width_lod; x++, j++) |
| { |
| // Step 1: go through and see if the results verify |
| // for the pixel For the normalized case on a GPU we |
| // put in offsets to the X, Y and Z to see if we |
| // land on the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization in |
| // OpenCL 1.0. |
| int checkOnlyOnePixel = 0; |
| int found_pixel = 0; |
| for (float norm_offset_x = -NORM_OFFSET; |
| norm_offset_x <= NORM_OFFSET && !found_pixel |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -NORM_OFFSET; |
| norm_offset_y <= NORM_OFFSET |
| && !found_pixel && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -NORM_OFFSET; |
| norm_offset_z <= NORM_OFFSET |
| && !found_pixel && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| // If we are not on a GPU, or we are not |
| // normalized, then only test with |
| // offsets (0.0, 0.0) E.g., test one |
| // pixel. |
| if (!imageSampler->normalized_coords |
| || gDeviceType != CL_DEVICE_TYPE_GPU |
| || NORM_OFFSET == 0) |
| { |
| norm_offset_x = 0.0f; |
| norm_offset_y = 0.0f; |
| norm_offset_z = 0.0f; |
| checkOnlyOnePixel = 1; |
| } |
| |
| sample_image_pixel_offset<int>( |
| imagePtr, imageInfo, |
| xOffsetValues[j], yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, lod); |
| |
| error = errMax( |
| errMax(abs_diff_int(expected[0], |
| resultPtr[0]), |
| abs_diff_int(expected[1], |
| resultPtr[1])), |
| errMax(abs_diff_int(expected[2], |
| resultPtr[2]), |
| abs_diff_int(expected[3], |
| resultPtr[3]))); |
| |
| if (error < MAX_ERR) found_pixel = 1; |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| |
| // Step 2: If we did not find a match, then print |
| // out debugging info. |
| if (!found_pixel) |
| { |
| // For the normalized case on a GPU we put in |
| // offsets to the X and Y to see if we land on |
| // the right pixel. This addresses the |
| // significant inaccuracy in GPU normalization |
| // in OpenCL 1.0. |
| checkOnlyOnePixel = 0; |
| int shouldReturn = 0; |
| for (float norm_offset_x = -NORM_OFFSET; |
| norm_offset_x <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_x += NORM_OFFSET) |
| { |
| for (float norm_offset_y = -NORM_OFFSET; |
| norm_offset_y <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_y += NORM_OFFSET) |
| { |
| for (float norm_offset_z = -NORM_OFFSET; |
| norm_offset_z <= NORM_OFFSET |
| && !checkOnlyOnePixel; |
| norm_offset_z += NORM_OFFSET) |
| { |
| |
| // If we are not on a GPU, or we are |
| // not normalized, then only test |
| // with offsets (0.0, 0.0) E.g., |
| // test one pixel. |
| if (!imageSampler->normalized_coords |
| || gDeviceType |
| != CL_DEVICE_TYPE_GPU |
| || NORM_OFFSET == 0 |
| || NORM_OFFSET == 0 |
| || NORM_OFFSET == 0) |
| { |
| norm_offset_x = 0.0f; |
| norm_offset_y = 0.0f; |
| norm_offset_z = 0.0f; |
| checkOnlyOnePixel = 1; |
| } |
| |
| sample_image_pixel_offset<int>( |
| imagePtr, imageInfo, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], norm_offset_x, |
| norm_offset_y, norm_offset_z, |
| imageSampler, expected, lod); |
| |
| error = errMax( |
| errMax( |
| abs_diff_int(expected[0], |
| resultPtr[0]), |
| abs_diff_int(expected[1], |
| resultPtr[1])), |
| errMax( |
| abs_diff_int(expected[2], |
| resultPtr[2]), |
| abs_diff_int( |
| expected[3], |
| resultPtr[3]))); |
| |
| if (error > MAX_ERR) |
| { |
| log_error( |
| "FAILED norm_offsets: %g , " |
| "%g , %g:\n", |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z); |
| shouldReturn |= |
| determine_validation_error_offset< |
| int>( |
| imagePtr, imageInfo, |
| imageSampler, resultPtr, |
| expected, error, |
| xOffsetValues[j], |
| yOffsetValues[j], |
| zOffsetValues[j], |
| norm_offset_x, |
| norm_offset_y, |
| norm_offset_z, j, |
| numTries, numClamped, |
| false, lod); |
| } |
| else |
| { |
| log_error( |
| "Test error: we should " |
| "have detected this " |
| "passing above.\n"); |
| } |
| } // norm_offset_z |
| } // norm_offset_y |
| } // norm_offset_x |
| if (shouldReturn) return 1; |
| } // if (!found_pixel) |
| |
| resultPtr += 4; |
| } |
| } |
| } |
| } |
| } |
| { |
| nextLevelOffset += width_lod * height_lod * depth_lod |
| * get_pixel_size(imageInfo->format); |
| width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; |
| height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1; |
| depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1; |
| } |
| } |
| |
| return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; |
| } |
| |
| void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr) |
| { |
| // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555, |
| // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be |
| // either 0 or 1.) |
| if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555) |
| { |
| cl_ushort *temp = (cl_ushort *)resultPtr; |
| temp[0] &= 0x7fff; |
| } |
| } |
| |
| int filter_rounding_errors(int forceCorrectlyRoundedWrites, |
| image_descriptor *imageInfo, float *errors) |
| { |
| // We are allowed 0.6 absolute error vs. infinitely precise for some |
| // normalized formats |
| if (0 == forceCorrectlyRoundedWrites |
| && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8 |
| || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 |
| || imageInfo->format->image_channel_data_type == CL_UNORM_INT16 |
| || imageInfo->format->image_channel_data_type == CL_SNORM_INT8 |
| || imageInfo->format->image_channel_data_type == CL_SNORM_INT16 |
| || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555 |
| || imageInfo->format->image_channel_data_type |
| == CL_UNORM_SHORT_565)) |
| { |
| if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f) |
| && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f)) |
| return 0; |
| } |
| |
| return 1; |
| } |