diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl index af2c8a73221..23cbb1b6527 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl @@ -35,7 +35,11 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * output at a single output location. */ void main() { - const ivec3 pos = idx_to_ipos_x_wise(gl_GlobalInvocationID.x, out_limits.x, out_limits.y); + const uint div_by_x = gl_GlobalInvocationID.x / out_limits.x; + const ivec3 pos = ivec3( + gl_GlobalInvocationID.x % out_limits.x, + div_by_x % out_limits.y, + div_by_x / out_limits.y); if (any(greaterThanEqual(pos, out_limits))) { return; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl index f49bd7bbf19..11b9992a5ff 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl @@ -47,7 +47,11 @@ void main() { // since work size is calculated by x * ((y + B_Y - 1) / B_Y) * z const ivec2 out_limits_xy_scaled = (out_limits.xy + ivec2(BATCH_SIZE_X, BATCH_SIZE_Y) - 1) / ivec2(BATCH_SIZE_X, BATCH_SIZE_Y); - ivec3 pos = idx_to_ipos_x_wise(gl_GlobalInvocationID.x, out_limits_xy_scaled.x, out_limits_xy_scaled.y); + const uint div_by_x = gl_GlobalInvocationID.x / out_limits_xy_scaled.x; + ivec3 pos = ivec3( + gl_GlobalInvocationID.x % out_limits_xy_scaled.x, + div_by_x % out_limits_xy_scaled.y, + div_by_x / out_limits_xy_scaled.y); // scale pos.xy by batch sizes, because that's the top pixel to be processed pos.x *= BATCH_SIZE_X; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index caffcdbbdc0..b50a892cad5 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -44,7 +44,11 @@ void main() { const ivec2 out_limits_scaled = (out_limits.xy + TILE_SIZE - 1) / TILE_SIZE; const uint shared_mem_stride = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; - const ivec3 gpos = idx_to_ipos_x_wise(gl_GlobalInvocationID.x, out_limits_scaled.x, out_limits_scaled.y); + const uint div_by_x = gl_GlobalInvocationID.x / out_limits_scaled.x; + const ivec3 gpos = ivec3( + gl_GlobalInvocationID.x % out_limits_scaled.x, + div_by_x % out_limits_scaled.y, + div_by_x / out_limits_scaled.y); // Output position for TILE_SIZE = 2 // +--------+--------+ diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h index 1d3a60cb293..0b372ab70a4 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing_utils.h @@ -223,11 +223,6 @@ ivec3 lpos_to_pos(const ivec3 lpos, const ivec4 axis_map) { return pos; } -ivec3 idx_to_ipos_x_wise(uint idx, int size_x, int size_y) { - const uint div_by_x = idx / size_x; - return ivec3(idx % size_x, div_by_x % size_y, div_by_x / size_y); -} - #ifdef USING_BUFFER #define load_texel(buf, idx) buf[idx] #elif defined(USING_TEXTURE2D)