| // |
| // Copyright 2021 The ANGLE Project Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // cl_types.h: Defines common types for the OpenCL support in ANGLE. |
| // |
| |
| #ifndef LIBANGLE_CLTYPES_H_ |
| #define LIBANGLE_CLTYPES_H_ |
| |
| #ifdef UNSAFE_BUFFERS_BUILD |
| # pragma allow_unsafe_buffers |
| #endif |
| |
| #if defined(ANGLE_ENABLE_CL) |
| # include "libANGLE/CLBitField.h" |
| # include "libANGLE/CLRefPointer.h" |
| # include "libANGLE/Debug.h" |
| # include "libANGLE/angletypes.h" |
| |
| # include "common/PackedCLEnums_autogen.h" |
| # include "common/WorkerThread.h" |
| # include "common/angleutils.h" |
| |
| // Include frequently used standard headers |
| # include <algorithm> |
| # include <array> |
| # include <functional> |
| # include <list> |
| # include <memory> |
| # include <string> |
| # include <utility> |
| # include <vector> |
| |
| namespace cl |
| { |
| |
| class Buffer; |
| class CommandQueue; |
| class Context; |
| class Device; |
| class Event; |
| class Image; |
| class Kernel; |
| class Memory; |
| class Object; |
| class Platform; |
| class Program; |
| class Sampler; |
| |
| using BufferPtr = RefPointer<Buffer>; |
| using CommandQueuePtr = RefPointer<CommandQueue>; |
| using ContextPtr = RefPointer<Context>; |
| using DevicePtr = RefPointer<Device>; |
| using EventPtr = RefPointer<Event>; |
| using KernelPtr = RefPointer<Kernel>; |
| using MemoryPtr = RefPointer<Memory>; |
| using PlatformPtr = RefPointer<Platform>; |
| using ProgramPtr = RefPointer<Program>; |
| using SamplerPtr = RefPointer<Sampler>; |
| |
| using BufferPtrs = std::vector<BufferPtr>; |
| using DevicePtrs = std::vector<DevicePtr>; |
| using EventPtrs = std::vector<EventPtr>; |
| using KernelPtrs = std::vector<KernelPtr>; |
| using MemoryPtrs = std::vector<MemoryPtr>; |
| using PlatformPtrs = std::vector<PlatformPtr>; |
| using ProgramPtrs = std::vector<ProgramPtr>; |
| using SamplerPtrs = std::vector<SamplerPtr>; |
| |
| using WorkgroupSize = std::array<uint32_t, 3>; |
| using GlobalWorkOffset = std::array<uint32_t, 3>; |
| using GlobalWorkSize = std::array<uint32_t, 3>; |
| using WorkgroupCount = std::array<uint32_t, 3>; |
| |
| template <typename T> |
| using EventStatusMap = std::array<T, 3>; |
| |
| using Extents = angle::Extents<size_t>; |
| constexpr Extents kExtentsZero(0, 0, 0); |
| using Offset = angle::Offset<size_t>; |
| constexpr Offset kOffsetZero(0, 0, 0); |
| |
| struct KernelArg |
| { |
| bool isSet; |
| cl_uint index; |
| size_t size; |
| const void *valuePtr; |
| }; |
| |
| struct BufferRect |
| { |
| BufferRect(const Offset &offset, |
| const Extents &size, |
| const size_t row_pitch, |
| const size_t slice_pitch, |
| const size_t element_size = 1) |
| : mOrigin(offset), |
| mSize(size), |
| mRowPitch(row_pitch == 0 ? element_size * size.width : row_pitch), |
| mSlicePitch(slice_pitch == 0 ? mRowPitch * size.height : slice_pitch), |
| mElementSize(element_size) |
| {} |
| bool valid() const |
| { |
| return mSize.width != 0 && mSize.height != 0 && mSize.depth != 0 && |
| mRowPitch >= mSize.width * mElementSize && mSlicePitch >= mRowPitch * mSize.height && |
| mElementSize > 0; |
| } |
| bool operator==(const BufferRect &other) const |
| { |
| return (mOrigin == other.mOrigin && mSize == other.mSize && mRowPitch == other.mRowPitch && |
| mSlicePitch == other.mSlicePitch && mElementSize == other.mElementSize); |
| } |
| bool operator!=(const BufferRect &other) const { return !(*this == other); } |
| |
| size_t getRowOffset(size_t slice, size_t row) const |
| { |
| return ((mRowPitch * (mOrigin.y + row)) + (mOrigin.x * mElementSize)) + // row offset |
| (mSlicePitch * (mOrigin.z + slice)); // slice offset |
| } |
| // Calculates the offset of the starting position of the rectangle |
| size_t getBufferOffset() const { return getRowOffset(0, 0); } |
| |
| size_t getRowPitch() const { return mRowPitch; } |
| size_t getSlicePitch() const { return mSlicePitch; } |
| |
| // Given the offset, row pitch, slice pitch, this returns the size of the buffer in which this |
| // rect sits. |
| // |
| // row_pitch |
| // +------------------------------+ |
| // | origin | |
| // | +---------------+ | |
| // | | |height | |
| // | | | | |
| // | | width | | |
| // +------+---------------+-------+ |
| // - size of the buffer is |
| // - initial offset to start of rect |
| // - + size of the rectangle |
| // - - (extra regions for the last slice and row) |
| size_t getRectSize() const |
| { |
| // Treat: |
| // S: mSlicePitch, R: mRowPitch, O: Offset(xyz_dim), D: mSize.depth, H: mSize.height, |
| // W: mSize.width, E: mElementSize |
| // |
| // strideHeightPadding == (S / R) - H |
| // strideRowPadding == R - (W * E) |
| // |
| // getRectSize() = |
| // getBufferOffset() // initial offset to the start of rect |
| // + (S * D) // size of the rectangle |
| // - (strideHeightPadding * R) // extraneous region for the last slice |
| // - strideRowPadding // extraneous region for the last row |
| // |
| // Where: |
| // getBufferOffset() == getRowOffset(0,0) == S(Oz) + R(Oy) + (Ox * E) |
| // getRowOffset(s, r) == S(Oz + s) + R(Oy + r) + (Ox * E) |
| // |
| // Simplifies to: |
| // getRectSize() = S(Oz) + R(Oy) + (Ox * E) + (S * D) - R(S/R - H) - R - (W * E) |
| // getRectSize() = S(Oz + (D - 1)) + R(Oy + (H - 1)) + (Ox * E) + (W * E) |
| // ... |
| // getRectSize() = getRowOffset((D - 1), (H - 1)) + (W * E) |
| |
| // The end of the rect is the beginning of the last row + the length of the row. |
| // This logic excludes paddings for the last row / slice. |
| return getRowOffset(mSize.depth - 1, mSize.height - 1) + mSize.width * mElementSize; |
| } |
| const Extents &getExtents() const { return mSize; } |
| Offset mOrigin; |
| Extents mSize; |
| size_t mRowPitch; |
| size_t mSlicePitch; |
| size_t mElementSize; |
| }; |
| |
| struct ImageDescriptor |
| { |
| MemObjectType type; |
| size_t width; |
| size_t height; |
| size_t depth; |
| size_t arraySize; |
| size_t rowPitch; |
| size_t slicePitch; |
| cl_uint numMipLevels; |
| cl_uint numSamples; |
| |
| ImageDescriptor(MemObjectType type_, |
| size_t width_, |
| size_t height_, |
| size_t depth_, |
| size_t arraySize_, |
| size_t rowPitch_, |
| size_t slicePitch_, |
| cl_uint numMipLevels_, |
| cl_uint numSamples_) |
| : type(type_), |
| width(width_), |
| height(height_), |
| depth(depth_), |
| arraySize(arraySize_), |
| rowPitch(rowPitch_), |
| slicePitch(slicePitch_), |
| numMipLevels(numMipLevels_), |
| numSamples(numSamples_) |
| { |
| if (type == MemObjectType::Image1D || type == MemObjectType::Image1D_Array || |
| type == MemObjectType::Image1D_Buffer) |
| { |
| depth = 1; |
| height = 1; |
| } |
| if (type == MemObjectType::Image2D || type == MemObjectType::Image2D_Array) |
| { |
| depth = 1; |
| } |
| if (!(type == MemObjectType::Image1D_Array || type == MemObjectType::Image2D_Array)) |
| { |
| arraySize = 1; |
| } |
| } |
| }; |
| |
| struct NDRange |
| { |
| NDRange(cl_uint workDimensionsIn, |
| const size_t *globalWorkOffsetIn, |
| const size_t *globalWorkSizeIn, |
| const size_t *localWorkSizeIn) |
| : workDimensions(workDimensionsIn), |
| globalWorkOffset({0, 0, 0}), |
| globalWorkSize({1, 1, 1}), |
| localWorkSize({1, 1, 1}), |
| nullLocalWorkSize(localWorkSizeIn == nullptr) |
| { |
| for (cl_uint dim = 0; dim < workDimensionsIn; dim++) |
| { |
| if (globalWorkOffsetIn != nullptr) |
| { |
| ASSERT(!(static_cast<uint32_t>((globalWorkOffsetIn[dim] + globalWorkSizeIn[dim])) < |
| globalWorkOffsetIn[dim])); |
| globalWorkOffset[dim] = static_cast<uint32_t>(globalWorkOffsetIn[dim]); |
| } |
| if (globalWorkSizeIn != nullptr) |
| { |
| ASSERT(globalWorkSizeIn[dim] <= UINT32_MAX); |
| globalWorkSize[dim] = static_cast<uint32_t>(globalWorkSizeIn[dim]); |
| } |
| if (localWorkSizeIn != nullptr) |
| { |
| ASSERT(localWorkSizeIn[dim] <= UINT32_MAX); |
| localWorkSize[dim] = static_cast<uint32_t>(localWorkSizeIn[dim]); |
| } |
| } |
| } |
| |
| cl::WorkgroupCount getWorkgroupCount() const |
| { |
| ASSERT(localWorkSize[0] > 0 && localWorkSize[1] > 0 && localWorkSize[2] > 0); |
| return cl::WorkgroupCount{rx::UnsignedCeilDivide(globalWorkSize[0], localWorkSize[0]), |
| rx::UnsignedCeilDivide(globalWorkSize[1], localWorkSize[1]), |
| rx::UnsignedCeilDivide(globalWorkSize[2], localWorkSize[2])}; |
| } |
| |
| bool isUniform() const |
| { |
| for (cl_uint dim = 0; dim < workDimensions; dim++) |
| { |
| if (globalWorkSize[dim] % localWorkSize[dim] != 0) |
| { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| std::vector<NDRange> createUniformRegions( |
| const std::array<uint32_t, 3> maxComputeWorkGroupCount) const |
| { |
| std::vector<NDRange> regions; |
| regions.push_back(*this); |
| regions.front().globalWorkOffset = {0}; |
| uint32_t regionCount = 1; |
| for (uint32_t regionPos = 0; regionPos < regionCount; ++regionPos) |
| { |
| // "Work-group sizes could be non-uniform in multiple dimensions, potentially producing |
| // work-groups of up to 4 different sizes in a 2D range and 8 different sizes in a 3D |
| // range." |
| // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_mapping_work_items_onto_an_nd_range |
| ASSERT(regionPos < 8); |
| |
| for (uint32_t dim = 0; dim < workDimensions; dim++) |
| { |
| NDRange ®ion = regions.at(regionPos); |
| uint32_t remainder = region.globalWorkSize[dim] % region.localWorkSize[dim]; |
| if (remainder != 0) |
| { |
| // Split the range along this dimension. The original range's global work size |
| // (e.g. 19) is clipped to a multiple of the local work size (e.g. 8). A new |
| // range is added for the remainder (in this example 3) where the global and |
| // local work sizes are identical to the remainder (i.e. it's also a uniform |
| // range). |
| NDRange newRegion(region); |
| newRegion.globalWorkSize[dim] = newRegion.localWorkSize[dim] = remainder; |
| region.globalWorkSize[dim] = newRegion.globalWorkOffset[dim] = |
| (region.globalWorkSize[dim] - remainder); |
| regions.push_back(newRegion); |
| regionCount++; |
| } |
| } |
| } |
| // Break into uniform regions that fit into given maxComputeWorkGroupCount (if needed) |
| uint32_t limitRegionCount = 1; |
| std::vector<NDRange> regionsWithinDeviceLimits; |
| for (const auto ®ion : regions) |
| { |
| regionsWithinDeviceLimits.push_back(region); |
| for (uint32_t regionPos = 0; regionPos < limitRegionCount; ++regionPos) |
| { |
| NDRange ¤tRegion = regionsWithinDeviceLimits.at(regionPos); |
| for (uint32_t dim = 0; dim < workDimensions; dim++) |
| { |
| uint32_t maxGwsForRegion = gl::clampCast<uint32_t, uint64_t>( |
| static_cast<uint64_t>(maxComputeWorkGroupCount[dim]) * |
| static_cast<uint64_t>(currentRegion.localWorkSize[dim])); |
| |
| if (currentRegion.globalWorkSize[dim] > maxGwsForRegion) |
| { |
| uint32_t remainderGws = currentRegion.globalWorkSize[dim] - maxGwsForRegion; |
| if (remainderGws > 0) |
| { |
| NDRange remainderRegion = currentRegion; |
| remainderRegion.globalWorkSize[dim] = remainderGws; |
| remainderRegion.globalWorkOffset[dim] = |
| currentRegion.globalWorkOffset[dim] + |
| (currentRegion.globalWorkSize[dim] - remainderGws); |
| currentRegion.globalWorkSize[dim] = maxGwsForRegion; |
| regionsWithinDeviceLimits.push_back(remainderRegion); |
| limitRegionCount++; |
| } |
| } |
| } |
| } |
| } |
| return regionsWithinDeviceLimits; |
| } |
| |
| cl_uint workDimensions; |
| GlobalWorkOffset globalWorkOffset; |
| GlobalWorkSize globalWorkSize; |
| WorkgroupSize localWorkSize; |
| bool nullLocalWorkSize{false}; |
| }; |
| |
| // Memory property element from cl_khr_external_memory |
| struct NameValueProperty |
| { |
| intptr_t name; |
| intptr_t value; |
| }; |
| |
| // this Defer class provides the user with a closure that executes on its destruction |
| template <typename F> |
| class Defer : public angle::Closure |
| { |
| public: |
| Defer(F &&func) : mFunc(std::forward<F>(func)) {} |
| ~Defer() override { operator()(); } |
| void operator()() override { mFunc(); } |
| |
| private: |
| F mFunc; |
| }; |
| |
| } // namespace cl |
| |
| #endif // ANGLE_ENABLE_CL |
| |
| #endif // LIBANGLE_CLTYPES_H_ |