From cdf0fde3e7bfcf07a1d2cb0122475456efb9ad66 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 09:48:39 -0700 Subject: [PATCH 1/9] ++ --- impeller/renderer/backend/metal/compute_pass_mtl.mm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/impeller/renderer/backend/metal/compute_pass_mtl.mm b/impeller/renderer/backend/metal/compute_pass_mtl.mm index 5e461a8d0e764..d0033da0c75cd 100644 --- a/impeller/renderer/backend/metal/compute_pass_mtl.mm +++ b/impeller/renderer/backend/metal/compute_pass_mtl.mm @@ -258,8 +258,10 @@ static bool Bind(ComputePassBindingsCache& pass, // Special case for linear processing. if (height == 1) { - int64_t threadGroups = - std::max(width / maxTotalThreadsPerThreadgroup, 1LL); + int64_t threadGroups = std::max( + static_cast( + std::ceil(width * 1.0 / maxTotalThreadsPerThreadgroup * 1.0)), + 1LL); [encoder dispatchThreadgroups:MTLSizeMake(threadGroups, 1, 1) threadsPerThreadgroup:MTLSizeMake(maxTotalThreadsPerThreadgroup, 1, 1)]; From ab061d6ed97484632964034249e78f734ddd98b0 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 09:57:47 -0700 Subject: [PATCH 2/9] fix vulkan computation --- .../backend/vulkan/compute_pass_vk.cc | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index bb1651c83d332..79f19111fbc8a 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -252,14 +252,21 @@ bool ComputePassVK::OnEncodeCommands(const Context& context, int64_t width = grid_size.width; int64_t height = grid_size.height; - while (width > max_wg_size[0]) { - width = std::max(static_cast(1), width / 2); + // Special case for linear processing. + if (height == 1) { + int64_t threadGroups = std::max( + static_cast(std::ceil(width * 1.0 / max_wg_size[0] * 1.0)), + 1LL); + cmd_buffer.dispatch(threadGroups, 1, 1); + } else { + while (width > max_wg_size[0]) { + width = std::max(static_cast(1), width / 2); + } + while (height > max_wg_size[1]) { + height = std::max(static_cast(1), height / 2); + } + cmd_buffer.dispatch(width, height, 1); } - while (height > max_wg_size[1]) { - height = std::max(static_cast(1), height / 2); - } - - cmd_buffer.dispatch(width, height, 1); } } From 8b3fed1281171d6b6ca51d93c71eae0c51ff2f5b Mon Sep 17 00:00:00 2001 From: Jonah Williams Date: Fri, 2 Jun 2023 10:08:48 -0700 Subject: [PATCH 3/9] Update compute_pass_vk.cc --- impeller/renderer/backend/vulkan/compute_pass_vk.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index 79f19111fbc8a..3777883c3ed4d 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -256,7 +256,7 @@ bool ComputePassVK::OnEncodeCommands(const Context& context, if (height == 1) { int64_t threadGroups = std::max( static_cast(std::ceil(width * 1.0 / max_wg_size[0] * 1.0)), - 1LL); + 1); cmd_buffer.dispatch(threadGroups, 1, 1); } else { while (width > max_wg_size[0]) { From 8966c2a796973cd7910bf93bcc84bd15fc310636 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 11:45:06 -0700 Subject: [PATCH 4/9] provide vulkan compute shaders with specialization constant for x wg size --- impeller/renderer/BUILD.gn | 1 + .../backend/vulkan/compute_pass_vk.cc | 2 +- impeller/renderer/backend/vulkan/context_vk.h | 3 ++ .../renderer/backend/vulkan/device_holder.h | 1 + .../backend/vulkan/pipeline_library_vk.cc | 27 ++++++++-- impeller/renderer/compute_pipeline_builder.h | 3 +- impeller/renderer/compute_unittests.cc | 54 +++++++++++++++++++ impeller/renderer/prefix_sum_test.comp | 4 +- .../renderer/threadgroup_sizing_test.comp | 18 +++++++ 9 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 impeller/renderer/threadgroup_sizing_test.comp diff --git a/impeller/renderer/BUILD.gn b/impeller/renderer/BUILD.gn index ec516cd56829c..b43c71a3e34b0 100644 --- a/impeller/renderer/BUILD.gn +++ b/impeller/renderer/BUILD.gn @@ -23,6 +23,7 @@ if (impeller_enable_compute) { "stroke.comp", "path_polyline.comp", "prefix_sum_test.comp", + "threadgroup_sizing_test.comp", ] } diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index 3777883c3ed4d..79f19111fbc8a 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -256,7 +256,7 @@ bool ComputePassVK::OnEncodeCommands(const Context& context, if (height == 1) { int64_t threadGroups = std::max( static_cast(std::ceil(width * 1.0 / max_wg_size[0] * 1.0)), - 1); + 1LL); cmd_buffer.dispatch(threadGroups, 1, 1); } else { while (width > max_wg_size[0]) { diff --git a/impeller/renderer/backend/vulkan/context_vk.h b/impeller/renderer/backend/vulkan/context_vk.h index 53f51757903d3..c8c8cd03b26d8 100644 --- a/impeller/renderer/backend/vulkan/context_vk.h +++ b/impeller/renderer/backend/vulkan/context_vk.h @@ -136,6 +136,9 @@ class ContextVK final : public Context, struct DeviceHolderImpl : public DeviceHolder { // |DeviceHolder| const vk::Device& GetDevice() const override { return device.get(); } + // |DeviceHolder| + const vk::PhysicalDevice& GetPhysicalDevice() const override { return physical_device; } + vk::UniqueInstance instance; vk::PhysicalDevice physical_device; vk::UniqueDevice device; diff --git a/impeller/renderer/backend/vulkan/device_holder.h b/impeller/renderer/backend/vulkan/device_holder.h index cb9fdee248584..9086666ba13ed 100644 --- a/impeller/renderer/backend/vulkan/device_holder.h +++ b/impeller/renderer/backend/vulkan/device_holder.h @@ -12,6 +12,7 @@ class DeviceHolder { public: virtual ~DeviceHolder() = default; virtual const vk::Device& GetDevice() const = 0; + virtual const vk::PhysicalDevice& GetPhysicalDevice() const = 0; }; } // namespace impeller diff --git a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc index c2e26c74bf254..1a786d2233e4e 100644 --- a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc +++ b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc @@ -357,16 +357,35 @@ std::unique_ptr PipelineLibraryVK::CreateComputePipeline( return nullptr; } + std::shared_ptr strong_device = device_holder_.lock(); + if (!strong_device) { + return nullptr; + } + auto device_properties = strong_device->GetPhysicalDevice().getProperties(); + auto max_wg_size = device_properties.limits.maxComputeWorkGroupSize; + + // Give all compute shaders a specialization constant entry for the + // workgroup/threadgroup size. + vk::SpecializationMapEntry specialization_map_entry[1]; + + uint32_t workgroup_size_x = max_wg_size[0]; + specialization_map_entry[0].constantID = 0; + specialization_map_entry[0].offset = 0; + specialization_map_entry[0].size = sizeof(uint32_t); + + vk::SpecializationInfo specialization_info; + specialization_info.mapEntryCount = 1; + specialization_info.pMapEntries = &specialization_map_entry[0]; + specialization_info.dataSize = sizeof(uint32_t); + specialization_info.pData = &workgroup_size_x; + vk::PipelineShaderStageCreateInfo info; info.setStage(vk::ShaderStageFlagBits::eCompute); info.setPName("main"); info.setModule(ShaderFunctionVK::Cast(entrypoint.get())->GetModule()); + info.setPSpecializationInfo(&specialization_info); pipeline_info.setStage(info); - std::shared_ptr strong_device = device_holder_.lock(); - if (!strong_device) { - return nullptr; - } //---------------------------------------------------------------------------- /// Pipeline Layout a.k.a the descriptor sets and uniforms. diff --git a/impeller/renderer/compute_pipeline_builder.h b/impeller/renderer/compute_pipeline_builder.h index f4fe1b8b9763f..7a7d34cbcedcf 100644 --- a/impeller/renderer/compute_pipeline_builder.h +++ b/impeller/renderer/compute_pipeline_builder.h @@ -45,9 +45,8 @@ struct ComputePipelineBuilder { ComputePipelineDescriptor desc; if (InitializePipelineDescriptorDefaults(context, desc)) { return {std::move(desc)}; - } else { - return std::nullopt; } + return std::nullopt; } [[nodiscard]] static bool InitializePipelineDescriptorDefaults( diff --git a/impeller/renderer/compute_unittests.cc b/impeller/renderer/compute_unittests.cc index 2ebb92c96563b..67517a4ebaafa 100644 --- a/impeller/renderer/compute_unittests.cc +++ b/impeller/renderer/compute_unittests.cc @@ -19,6 +19,7 @@ #include "impeller/renderer/compute_pipeline_builder.h" #include "impeller/renderer/pipeline_library.h" #include "impeller/renderer/prefix_sum_test.comp.h" +#include "impeller/renderer/threadgroup_sizing_test.comp.h" namespace impeller { namespace testing { @@ -176,6 +177,59 @@ TEST_P(ComputeTest, CanComputePrefixSum) { latch.Wait(); } +TEST_P(ComputeTest, 1DThreadgroupSizingIsCorrect) { + using CS = ThreadgroupSizingTestComputeShader; + auto context = GetContext(); + ASSERT_TRUE(context); + ASSERT_TRUE(context->GetCapabilities()->SupportsCompute()); + + using SamplePipelineBuilder = ComputePipelineBuilder; + auto pipeline_desc = + SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context); + ASSERT_TRUE(pipeline_desc.has_value()); + auto compute_pipeline = + context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get(); + ASSERT_TRUE(compute_pipeline); + + auto cmd_buffer = context->CreateCommandBuffer(); + auto pass = cmd_buffer->CreateComputePass(); + ASSERT_TRUE(pass && pass->IsValid()); + + static constexpr size_t kCount = 2048; + + pass->SetGridSize(ISize(kCount, 1)); + pass->SetThreadGroupSize(ISize(kCount, 1)); + + ComputeCommand cmd; + cmd.label = "Compute"; + cmd.pipeline = compute_pipeline; + + auto output_buffer = CreateHostVisibleDeviceBuffer>( + context, "Output Buffer"); + + CS::BindOutputData(cmd, output_buffer->AsBufferView()); + + ASSERT_TRUE(pass->AddCommand(std::move(cmd))); + ASSERT_TRUE(pass->EncodeCommands()); + + fml::AutoResetWaitableEvent latch; + ASSERT_TRUE(cmd_buffer->SubmitCommands( + [&latch, output_buffer](CommandBuffer::Status status) { + EXPECT_EQ(status, CommandBuffer::Status::kCompleted); + + auto view = output_buffer->AsBufferView(); + EXPECT_EQ(view.range.length, sizeof(CS::OutputData)); + + CS::OutputData* output = + reinterpret_cast*>(view.contents); + EXPECT_TRUE(output); + EXPECT_EQ(output->data[kCount - 1], kCount - 1); + latch.Signal(); + })); + + latch.Wait(); +} + TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) { using CS = PrefixSumTestComputeShader; diff --git a/impeller/renderer/prefix_sum_test.comp b/impeller/renderer/prefix_sum_test.comp index 0f8bff231ffa5..fb5c21fca29bf 100644 --- a/impeller/renderer/prefix_sum_test.comp +++ b/impeller/renderer/prefix_sum_test.comp @@ -2,9 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -// TODO(dnfield): This should not need to be so small, -// https://github.com/flutter/flutter/issues/119357 -layout(local_size_x = 256, local_size_y = 1) in; +layout(local_size_x = 0) in; layout(std430) buffer; #include diff --git a/impeller/renderer/threadgroup_sizing_test.comp b/impeller/renderer/threadgroup_sizing_test.comp new file mode 100644 index 0000000000000..a6cfba6c0923b --- /dev/null +++ b/impeller/renderer/threadgroup_sizing_test.comp @@ -0,0 +1,18 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Size is passed in via specialization constant. +layout (local_size_x_id = 0) in; + +layout(std430) buffer; + +layout(binding = 1) writeonly buffer OutputData { + uint data[]; +} +output_data; + +void main() { + uint ident = gl_GlobalInvocationID.x; + output_data.data[ident] = ident; +} From 93c1827cfbaa6d75a9fe31ca29f30ba4b1e3e6db Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 11:46:02 -0700 Subject: [PATCH 5/9] ++ --- impeller/renderer/backend/vulkan/context_vk.h | 4 +++- impeller/renderer/backend/vulkan/pipeline_library_vk.cc | 1 - impeller/renderer/threadgroup_sizing_test.comp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/impeller/renderer/backend/vulkan/context_vk.h b/impeller/renderer/backend/vulkan/context_vk.h index c8c8cd03b26d8..4b691c8366ec4 100644 --- a/impeller/renderer/backend/vulkan/context_vk.h +++ b/impeller/renderer/backend/vulkan/context_vk.h @@ -137,7 +137,9 @@ class ContextVK final : public Context, // |DeviceHolder| const vk::Device& GetDevice() const override { return device.get(); } // |DeviceHolder| - const vk::PhysicalDevice& GetPhysicalDevice() const override { return physical_device; } + const vk::PhysicalDevice& GetPhysicalDevice() const override { + return physical_device; + } vk::UniqueInstance instance; vk::PhysicalDevice physical_device; diff --git a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc index 1a786d2233e4e..201f88e32a3ab 100644 --- a/impeller/renderer/backend/vulkan/pipeline_library_vk.cc +++ b/impeller/renderer/backend/vulkan/pipeline_library_vk.cc @@ -386,7 +386,6 @@ std::unique_ptr PipelineLibraryVK::CreateComputePipeline( info.setPSpecializationInfo(&specialization_info); pipeline_info.setStage(info); - //---------------------------------------------------------------------------- /// Pipeline Layout a.k.a the descriptor sets and uniforms. /// diff --git a/impeller/renderer/threadgroup_sizing_test.comp b/impeller/renderer/threadgroup_sizing_test.comp index a6cfba6c0923b..3d2e02af507ca 100644 --- a/impeller/renderer/threadgroup_sizing_test.comp +++ b/impeller/renderer/threadgroup_sizing_test.comp @@ -3,7 +3,7 @@ // found in the LICENSE file. // Size is passed in via specialization constant. -layout (local_size_x_id = 0) in; +layout(local_size_x_id = 0) in; layout(std430) buffer; From 0bb2e9f66b5a53eef66a8d5f47ee6ef286aaecb9 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 11:52:24 -0700 Subject: [PATCH 6/9] ++ --- impeller/renderer/prefix_sum_test.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impeller/renderer/prefix_sum_test.comp b/impeller/renderer/prefix_sum_test.comp index fb5c21fca29bf..7cc940fb1251e 100644 --- a/impeller/renderer/prefix_sum_test.comp +++ b/impeller/renderer/prefix_sum_test.comp @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -layout(local_size_x = 0) in; +layout(local_size_x_id = 0) in; layout(std430) buffer; #include From 3cb5394c833e672e2c484db8e10a9a5c2f1773dc Mon Sep 17 00:00:00 2001 From: Jonah Williams Date: Fri, 2 Jun 2023 12:07:30 -0700 Subject: [PATCH 7/9] Update compute_pass_vk.cc --- impeller/renderer/backend/vulkan/compute_pass_vk.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/impeller/renderer/backend/vulkan/compute_pass_vk.cc b/impeller/renderer/backend/vulkan/compute_pass_vk.cc index 79f19111fbc8a..cb1074f32f9d5 100644 --- a/impeller/renderer/backend/vulkan/compute_pass_vk.cc +++ b/impeller/renderer/backend/vulkan/compute_pass_vk.cc @@ -254,9 +254,10 @@ bool ComputePassVK::OnEncodeCommands(const Context& context, // Special case for linear processing. if (height == 1) { + int64_t minimum = 1; int64_t threadGroups = std::max( static_cast(std::ceil(width * 1.0 / max_wg_size[0] * 1.0)), - 1LL); + minimum); cmd_buffer.dispatch(threadGroups, 1, 1); } else { while (width > max_wg_size[0]) { From d8ed07a6cbaf77fd004f13963f7900291a52758f Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 12:29:08 -0700 Subject: [PATCH 8/9] licenses --- ci/licenses_golden/licenses_flutter | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/licenses_golden/licenses_flutter b/ci/licenses_golden/licenses_flutter index 0e582e6c90aa0..f94f7d3046bf5 100644 --- a/ci/licenses_golden/licenses_flutter +++ b/ci/licenses_golden/licenses_flutter @@ -1601,6 +1601,7 @@ ORIGIN: ../../../flutter/impeller/renderer/snapshot.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/stroke.comp + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/surface.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/surface.h + ../../../flutter/LICENSE +ORIGIN: ../../../flutter/impeller/renderer/threadgroup_sizing_test.comp + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_buffer_builder.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_buffer_builder.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/vertex_descriptor.cc + ../../../flutter/LICENSE @@ -4272,6 +4273,7 @@ FILE: ../../../flutter/impeller/renderer/snapshot.h FILE: ../../../flutter/impeller/renderer/stroke.comp FILE: ../../../flutter/impeller/renderer/surface.cc FILE: ../../../flutter/impeller/renderer/surface.h +FILE: ../../../flutter/impeller/renderer/threadgroup_sizing_test.comp FILE: ../../../flutter/impeller/renderer/vertex_buffer_builder.cc FILE: ../../../flutter/impeller/renderer/vertex_buffer_builder.h FILE: ../../../flutter/impeller/renderer/vertex_descriptor.cc From d02e1bb2fa99d5d9becdb28407be420a159090c6 Mon Sep 17 00:00:00 2001 From: jonahwilliams Date: Fri, 2 Jun 2023 13:42:34 -0700 Subject: [PATCH 9/9] maliocdiff --- impeller/tools/malioc.json | 76 ++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 68afc86f992ed..e99ada0edc0a8 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -13569,9 +13569,9 @@ "load_store" ], "longest_path_cycles": [ - 2.65625, + 2.450000047683716, 0.0, - 2.65625, + 2.450000047683716, 1.0, 72.0, 0.0 @@ -13589,9 +13589,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.9375, + 0.762499988079071, 0.0, - 0.9375, + 0.762499988079071, 0.0, 0.0, 0.0 @@ -13600,9 +13600,9 @@ "load_store" ], "total_cycles": [ - 2.65625, + 2.46875, 0.0, - 2.65625, + 2.46875, 1.0, 72.0, 0.0 @@ -13612,7 +13612,7 @@ "stack_spill_bytes": 0, "thread_occupancy": 100, "uniform_registers_used": 8, - "work_registers_used": 17 + "work_registers_used": 18 } } } @@ -13680,6 +13680,68 @@ } } }, + "flutter/impeller/renderer/threadgroup_sizing_test.comp.vkspv": { + "Mali-G78": { + "core": "Mali-G78", + "filename": "flutter/impeller/renderer/threadgroup_sizing_test.comp.vkspv", + "has_uniform_computation": true, + "type": "Compute", + "variants": { + "Main": { + "fp16_arithmetic": null, + "has_stack_spilling": false, + "performance": { + "longest_path_bound_pipelines": [ + "load_store" + ], + "longest_path_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ], + "pipelines": [ + "arith_total", + "arith_fma", + "arith_cvt", + "arith_sfu", + "load_store", + "texture" + ], + "shortest_path_bound_pipelines": [ + "load_store" + ], + "shortest_path_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ], + "total_bound_pipelines": [ + "load_store" + ], + "total_cycles": [ + 0.03125, + 0.0, + 0.03125, + 0.0, + 1.0, + 0.0 + ] + }, + "shared_storage_used": 0, + "stack_spill_bytes": 0, + "thread_occupancy": 100, + "uniform_registers_used": 2, + "work_registers_used": 4 + } + } + } + }, "flutter/impeller/scene/shaders/gles/skinned.vert.gles": { "Mali-G78": { "core": "Mali-G78",