[Graphite]: Use DawnTexelCopyBufferRowAlignment

The default value of fTextureDataRowBytesAlignment is 256. With this
feature enabled, the value can be queried from the device limits, which
can be much smaller than 256. This may reduce memory copy overhead on
Dawn D3D11 backend.

Bug: chromium:378361783
Change-Id: I2176c047730aaef7d7b5c18463a66178f546768c
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/926874
Reviewed-by: Michael Ludwig <michaelludwig@google.com>
Commit-Queue: Jie A Chen <jie.a.chen@intel.com>
Reviewed-by: Greg Daniel <egdaniel@google.com>
diff --git a/src/gpu/graphite/dawn/DawnCaps.cpp b/src/gpu/graphite/dawn/DawnCaps.cpp
index 325ffff..c7b3549 100644
--- a/src/gpu/graphite/dawn/DawnCaps.cpp
+++ b/src/gpu/graphite/dawn/DawnCaps.cpp
@@ -433,6 +433,10 @@
     SkASSERT(limitsSucceeded);
 #endif
 #else
+    wgpu::DawnTexelCopyBufferRowAlignmentLimits alignmentLimits{};
+    if (backendContext.fDevice.HasFeature(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment)) {
+        limits.nextInChain = &alignmentLimits;
+    }
     [[maybe_unused]] wgpu::Status status = backendContext.fDevice.GetLimits(&limits);
     SkASSERT(status == wgpu::Status::Success);
 #endif
@@ -445,6 +449,13 @@
 
     // Dawn requires 256 bytes per row alignment for buffer texture copies.
     fTextureDataRowBytesAlignment = 256;
+#if !defined(__EMSCRIPTEN__)
+    // If the device supports the DawnTexelCopyBufferRowAlignment feature, the alignment can be
+    // queried from its limits.
+    if (backendContext.fDevice.HasFeature(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment)) {
+        fTextureDataRowBytesAlignment = alignmentLimits.minTexelCopyBufferRowAlignment;
+    }
+#endif
 
     fResourceBindingReqs.fUniformBufferLayout = Layout::kStd140;
     // The WGSL generator assumes tightly packed std430 layout for SSBOs which is also the default
diff --git a/src/gpu/graphite/dawn/DawnCommandBuffer.cpp b/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
index 8a9e2cd..3c84d3c 100644
--- a/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
+++ b/src/gpu/graphite/dawn/DawnCommandBuffer.cpp
@@ -1116,9 +1116,6 @@
     wgpu::ImageCopyBuffer dst;
     dst.buffer = wgpuBuffer;
     dst.layout.offset = bufferOffset;
-    // Dawn requires buffer's alignment to be multiples of 256.
-    // https://b.corp.google.com/issues/259264489
-    SkASSERT((bufferRowBytes & 0xFF) == 0);
     dst.layout.bytesPerRow = bufferRowBytes;
 
     wgpu::Extent3D copySize = {
@@ -1146,9 +1143,6 @@
 
     for (int i = 0; i < count; ++i) {
         src.layout.offset = copyData[i].fBufferOffset;
-        // Dawn requires buffer's alignment to be multiples of 256.
-        // https://b.corp.google.com/issues/259264489
-        SkASSERT((copyData[i].fBufferRowBytes & 0xFF) == 0);
         src.layout.bytesPerRow = copyData[i].fBufferRowBytes;
 
         dst.origin.x = copyData[i].fRect.x();
diff --git a/tools/graphite/dawn/GraphiteDawnTestContext.cpp b/tools/graphite/dawn/GraphiteDawnTestContext.cpp
index 7eac2c6..1c5d20d 100644
--- a/tools/graphite/dawn/GraphiteDawnTestContext.cpp
+++ b/tools/graphite/dawn/GraphiteDawnTestContext.cpp
@@ -140,6 +140,9 @@
     if (adapter.HasFeature(wgpu::FeatureName::TimestampQuery)) {
         features.push_back(wgpu::FeatureName::TimestampQuery);
     }
+    if (adapter.HasFeature(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment)) {
+        features.push_back(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment);
+    }
 
     wgpu::DeviceDescriptor desc;
     desc.requiredFeatureCount  = features.size();
diff --git a/tools/window/GraphiteDawnWindowContext.cpp b/tools/window/GraphiteDawnWindowContext.cpp
index 4cf120e..d65bb5c 100644
--- a/tools/window/GraphiteDawnWindowContext.cpp
+++ b/tools/window/GraphiteDawnWindowContext.cpp
@@ -190,6 +190,9 @@
     if (adapter.HasFeature(wgpu::FeatureName::TimestampQuery)) {
         features.push_back(wgpu::FeatureName::TimestampQuery);
     }
+    if (adapter.HasFeature(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment)) {
+        features.push_back(wgpu::FeatureName::DawnTexelCopyBufferRowAlignment);
+    }
 
     wgpu::DeviceDescriptor deviceDescriptor;
     deviceDescriptor.requiredFeatures = features.data();