From 4d9679f996ffe4319327207dec4c5af3552ab37d Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:43:34 -0800 Subject: [PATCH] [ET-VK] Changing texture access pattern for conv2d pw op to improve performance. This diff changes the texture access pattern for conv2d pw op to iterate first on x axis then y and then z to improve performance. Differential Revision: [D67769100](https://our.internmc.facebook.com/intern/diff/D67769100/) [ghstack-poisoned] --- backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index 2393ed33450..23ad912c11a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -43,13 +43,13 @@ shared u16vec2 pos_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroup * size is only 1x1, making it easier to re-use loaded texels from t_kernel. */ void main() { - const uint16_t out_limits_y_scaled = uint16_t((out_limits.y + TILE_SIZE - 1) / TILE_SIZE); + const uvec2 out_limits_scaled = (out_limits.xy + TILE_SIZE - 1) / TILE_SIZE; const uint shared_mem_stride = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; const u16vec3 gpos = u16vec3( - gl_GlobalInvocationID.x / (out_limits_y_scaled * out_limits.z), - (gl_GlobalInvocationID.x / out_limits.z) % out_limits_y_scaled, - gl_GlobalInvocationID.x % out_limits.z); + gl_GlobalInvocationID.x % out_limits_scaled.x, + (gl_GlobalInvocationID.x / out_limits_scaled.x) % out_limits_scaled.y, + gl_GlobalInvocationID.x / (out_limits_scaled.x * out_limits_scaled.y)); // Output position for TILE_SIZE = 2 // +--------+--------+