diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 7c2dbc316a52b..2af4f2b9cf6d2 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -12,6 +12,7 @@ #include "impeller/entity/texture_downsample.frag.h" #include "impeller/entity/texture_fill.frag.h" #include "impeller/entity/texture_fill.vert.h" +#include "impeller/geometry/color.h" #include "impeller/renderer/render_pass.h" #include "impeller/renderer/vertex_buffer_builder.h" @@ -325,7 +326,7 @@ DownsamplePassArgs CalculateDownsamplePassArgs( fml::StatusOr MakeDownsampleSubpass( const ContentContext& renderer, const std::shared_ptr& command_buffer, - std::shared_ptr input_texture, + const std::shared_ptr& input_texture, const SamplerDescriptor& sampler_descriptor, const DownsamplePassArgs& pass_args, Entity::TileMode tile_mode) { @@ -345,7 +346,8 @@ fml::StatusOr MakeDownsampleSubpass( TextureFillVertexShader::FrameInfo frame_info; frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)); - frame_info.texture_sampler_y_coord_scale = 1.0; + frame_info.texture_sampler_y_coord_scale = + input_texture->GetYCoordScale(); TextureFillFragmentShader::FragInfo frag_info; frag_info.alpha = 1.0; @@ -398,7 +400,8 @@ fml::StatusOr MakeDownsampleSubpass( TextureFillVertexShader::FrameInfo frame_info; frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)); - frame_info.texture_sampler_y_coord_scale = 1.0; + frame_info.texture_sampler_y_coord_scale = + input_texture->GetYCoordScale(); TextureDownsampleFragmentShader::FragInfo frag_info; frag_info.edge = edge; @@ -447,16 +450,18 @@ fml::StatusOr MakeBlurSubpass( return input_pass; } - std::shared_ptr input_texture = input_pass.GetRenderTargetTexture(); + const std::shared_ptr& input_texture = + input_pass.GetRenderTargetTexture(); // TODO(gaaclarke): This blurs the whole image, but because we know the clip // region we could focus on just blurring that. ISize subpass_size = input_texture->GetSize(); ContentContext::SubpassCallback subpass_callback = [&](const ContentContext& renderer, RenderPass& pass) { - GaussianBlurVertexShader::FrameInfo frame_info{ - .mvp = Matrix::MakeOrthographic(ISize(1, 1)), - .texture_sampler_y_coord_scale = 1.0}; + GaussianBlurVertexShader::FrameInfo frame_info; + frame_info.mvp = Matrix::MakeOrthographic(ISize(1, 1)), + frame_info.texture_sampler_y_coord_scale = + input_texture->GetYCoordScale(); HostBuffer& host_buffer = renderer.GetTransientsBuffer(); @@ -481,11 +486,9 @@ fml::StatusOr MakeBlurSubpass( linear_sampler_descriptor)); GaussianBlurVertexShader::BindFrameInfo( pass, host_buffer.EmplaceUniform(frame_info)); - GaussianBlurPipeline::FragmentShader::KernelSamples kernel_samples = - LerpHackKernelSamples(GenerateBlurInfo(blur_info)); - FML_CHECK(kernel_samples.sample_count <= kGaussianBlurMaxKernelSize); GaussianBlurFragmentShader::BindKernelSamples( - pass, host_buffer.EmplaceUniform(kernel_samples)); + pass, host_buffer.EmplaceUniform( + LerpHackKernelSamples(GenerateBlurInfo(blur_info)))); return pass.Draw().ok(); }; if (destination_target.has_value()) { @@ -898,7 +901,7 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) { Scalar tally = 0.0f; for (int i = 0; i < result.sample_count; ++i) { int x = x_offset + (i * parameters.step_size) - parameters.blur_radius; - result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{ + result.samples[i] = KernelSample{ .uv_offset = parameters.blur_uv_offset * x, .coefficient = expf(-0.5f * (x * x) / (parameters.blur_sigma * parameters.blur_sigma)) / @@ -919,25 +922,31 @@ KernelSamples GenerateBlurInfo(BlurParameters parameters) { // between the samples. GaussianBlurPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( KernelSamples parameters) { - GaussianBlurPipeline::FragmentShader::KernelSamples result; + GaussianBlurPipeline::FragmentShader::KernelSamples result = {}; result.sample_count = ((parameters.sample_count - 1) / 2) + 1; int32_t middle = result.sample_count / 2; int32_t j = 0; FML_DCHECK(result.sample_count <= kGaussianBlurMaxKernelSize); + static_assert(sizeof(result.sample_data) == + sizeof(std::array)); + for (int i = 0; i < result.sample_count; i++) { if (i == middle) { - result.samples[i] = parameters.samples[j++]; + result.sample_data[i].x = parameters.samples[j].uv_offset.x; + result.sample_data[i].y = parameters.samples[j].uv_offset.y; + result.sample_data[i].z = parameters.samples[j].coefficient; + j++; } else { - GaussianBlurPipeline::FragmentShader::KernelSample left = - parameters.samples[j]; - GaussianBlurPipeline::FragmentShader::KernelSample right = - parameters.samples[j + 1]; - result.samples[i] = GaussianBlurPipeline::FragmentShader::KernelSample{ - .uv_offset = (left.uv_offset * left.coefficient + - right.uv_offset * right.coefficient) / - (left.coefficient + right.coefficient), - .coefficient = left.coefficient + right.coefficient, - }; + KernelSample left = parameters.samples[j]; + KernelSample right = parameters.samples[j + 1]; + + result.sample_data[i].z = left.coefficient + right.coefficient; + + Point uv = (left.uv_offset * left.coefficient + + right.uv_offset * right.coefficient) / + (left.coefficient + right.coefficient); + result.sample_data[i].x = uv.x; + result.sample_data[i].y = uv.y; j += 2; } } diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h index 4408858e08b2c..b24c87f0205d6 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h @@ -9,12 +9,16 @@ #include "impeller/entity/contents/content_context.h" #include "impeller/entity/contents/filters/filter_contents.h" #include "impeller/entity/geometry/geometry.h" +#include "impeller/geometry/color.h" namespace impeller { // Comes from gaussian.frag. static constexpr int32_t kGaussianBlurMaxKernelSize = 50; +static_assert(sizeof(GaussianBlurPipeline::FragmentShader::KernelSamples) == + sizeof(Vector4) * kGaussianBlurMaxKernelSize + sizeof(Vector4)); + struct BlurParameters { Point blur_uv_offset; Scalar blur_sigma; @@ -22,6 +26,11 @@ struct BlurParameters { int step_size; }; +struct KernelSample { + Vector2 uv_offset; + float coefficient; +}; + /// A larger mirror of GaussianBlurPipeline::FragmentShader::KernelSamples. /// /// This is a mirror of GaussianBlurPipeline::FragmentShader::KernelSamples that @@ -30,7 +39,7 @@ struct BlurParameters { struct KernelSamples { static constexpr int kMaxKernelSize = kGaussianBlurMaxKernelSize * 2; int sample_count; - GaussianBlurPipeline::FragmentShader::KernelSample samples[kMaxKernelSize]; + KernelSample samples[kMaxKernelSize]; }; KernelSamples GenerateBlurInfo(BlurParameters parameters); diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index 59618db565d14..26449a2573d50 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -9,6 +9,7 @@ #include "impeller/entity/contents/filters/gaussian_blur_filter_contents.h" #include "impeller/entity/contents/texture_contents.h" #include "impeller/entity/entity_playground.h" +#include "impeller/geometry/color.h" #include "impeller/geometry/geometry_asserts.h" #include "impeller/renderer/testing/mocks.h" @@ -51,6 +52,14 @@ fml::StatusOr LowerBoundNewtonianMethod( return x; } +Scalar GetCoefficient(const Vector4& vec) { + return vec.z; +} + +Vector2 GetUVOffset(const Vector4& vec) { + return vec.xy(); +} + fml::StatusOr CalculateSigmaForBlurRadius( Scalar radius, const Matrix& effect_transform) { @@ -508,27 +517,24 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) { }, }; - GaussianBlurPipeline::FragmentShader::KernelSamples fast_kernel_samples = + GaussianBlurPipeline::FragmentShader::KernelSamples blur_info = LerpHackKernelSamples(kernel_samples); - EXPECT_EQ(fast_kernel_samples.sample_count, 3); + EXPECT_EQ(blur_info.sample_count, 3); - GaussianBlurPipeline::FragmentShader::KernelSample* samples = - kernel_samples.samples; - GaussianBlurPipeline::FragmentShader::KernelSample* fast_samples = - fast_kernel_samples.samples; + KernelSample* samples = kernel_samples.samples; ////////////////////////////////////////////////////////////////////////////// // Check output kernel. - EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333); - EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0); - EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3); - EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0); - EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0); - EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4); - EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333); - EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0); - EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3); + EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[0]), + Point(-1.3333333, 0)); + EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[0]), 0.3); + + EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[1]), Point(0, 0)); + EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[1]), 0.4); + + EXPECT_POINT_NEAR(GetUVOffset(blur_info.sample_data[2]), Point(1.333333, 0)); + EXPECT_FLOAT_EQ(GetCoefficient(blur_info.sample_data[2]), 0.3); ////////////////////////////////////////////////////////////////////////////// // Check output of fast kernel versus original kernel. @@ -549,11 +555,11 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) { } }; Scalar fast_output = - /*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) * - fast_samples[0].coefficient + - /*2nd*/ data[2] * fast_samples[1].coefficient + - /*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) * - fast_samples[2].coefficient; + /*1st*/ lerp(GetUVOffset(blur_info.sample_data[0]), data[0], data[1]) * + GetCoefficient(blur_info.sample_data[0]) + + /*2nd*/ data[2] * GetCoefficient(blur_info.sample_data[1]) + + /*3rd*/ lerp(GetUVOffset(blur_info.sample_data[2]), data[3], data[4]) * + GetCoefficient(blur_info.sample_data[2]); EXPECT_NEAR(original_output, fast_output, 0.01); } @@ -604,9 +610,9 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) { } Scalar fast_output = 0.0; - for (int i = 0; i < fast_kernel_samples.sample_count; ++i) { - auto sample = fast_kernel_samples.samples[i]; - fast_output += sample.coefficient * sampler(sample.uv_offset); + for (int i = 0; i < fast_kernel_samples.sample_count; i++) { + fast_output += GetCoefficient(fast_kernel_samples.sample_data[i]) * + sampler(GetUVOffset(fast_kernel_samples.sample_data[i])); } EXPECT_NEAR(output, fast_output, 0.1); diff --git a/impeller/entity/shaders/filters/gaussian.frag b/impeller/entity/shaders/filters/gaussian.frag index f83a59940896d..a6d58f8e2b0a8 100644 --- a/impeller/entity/shaders/filters/gaussian.frag +++ b/impeller/entity/shaders/filters/gaussian.frag @@ -11,16 +11,13 @@ uniform f16sampler2D texture_sampler; layout(constant_id = 0) const float supports_decal = 1.0; -struct KernelSample { - vec2 uv_offset; - float coefficient; -}; - uniform KernelSamples { - int sample_count; - KernelSample samples[50]; + float sample_count; + + // X, Y are uv offset and Z is Coefficient. W is padding. + vec4 sample_data[50]; } -blur_info; +kernel_samples; f16vec4 Sample(f16sampler2D tex, vec2 coords) { if (supports_decal == 1.0) { @@ -36,11 +33,11 @@ out f16vec4 frag_color; void main() { f16vec4 total_color = f16vec4(0.0hf); - for (int i = 0; i < blur_info.sample_count; ++i) { - float16_t coefficient = float16_t(blur_info.samples[i].coefficient); - total_color += - coefficient * Sample(texture_sampler, - v_texture_coords + blur_info.samples[i].uv_offset); + for (int i = 0; i < int(kernel_samples.sample_count); i++) { + float16_t coefficient = float16_t(kernel_samples.sample_data[i].z); + total_color += coefficient * + Sample(texture_sampler, + v_texture_coords + kernel_samples.sample_data[i].xy); } frag_color = total_color; diff --git a/impeller/geometry/vector.h b/impeller/geometry/vector.h index 31e894cb0a10e..d1358bffef4de 100644 --- a/impeller/geometry/vector.h +++ b/impeller/geometry/vector.h @@ -310,6 +310,8 @@ struct Vector4 { return *this + (v - *this) * t; } + constexpr Vector2 xy() const { return Vector2(x, y); } + std::string ToString() const; }; diff --git a/impeller/renderer/backend/gles/buffer_bindings_gles.cc b/impeller/renderer/backend/gles/buffer_bindings_gles.cc index 4067498173b9e..71d186fb72b30 100644 --- a/impeller/renderer/backend/gles/buffer_bindings_gles.cc +++ b/impeller/renderer/backend/gles/buffer_bindings_gles.cc @@ -279,20 +279,20 @@ bool BufferBindingsGLES::BindUniformBuffer(const ProcTableGLES& gl, auto* buffer_data = reinterpret_cast(buffer_ptr + member.offset); - std::vector array_element_buffer; - if (element_count > 1) { - // When binding uniform arrays, the elements must be contiguous. Copy - // the uniforms to a temp buffer to eliminate any padding needed by the - // other backends. - array_element_buffer.resize(member.size * element_count); + // When binding uniform arrays, the elements must be contiguous. Copy + // the uniforms to a temp buffer to eliminate any padding needed by the + // other backends if the array elements have padding. + std::vector array_element_buffer_; + if (element_count > 1 && element_stride != member.size) { + array_element_buffer_.resize(member.size * element_count); for (size_t element_i = 0; element_i < element_count; element_i++) { - std::memcpy(array_element_buffer.data() + element_i * member.size, + std::memcpy(array_element_buffer_.data() + element_i * member.size, reinterpret_cast(buffer_data) + element_i * element_stride, member.size); } buffer_data = - reinterpret_cast(array_element_buffer.data()); + reinterpret_cast(array_element_buffer_.data()); } switch (member.type) { diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index c6e8eb402db34..65d3b54f4eb3c 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -2581,9 +2581,9 @@ "arith_cvt" ], "shortest_path_cycles": [ - 0.109375, + 0.09375, 0.0, - 0.109375, + 0.09375, 0.0, 0.0, 0.0, @@ -2593,11 +2593,11 @@ "load_store" ], "total_cycles": [ - 0.3125, + 0.265625, 0.09375, - 0.3125, + 0.265625, 0.0, - 2.0, + 1.0, 0.25, 0.25 ] @@ -2641,10 +2641,11 @@ 0.0 ], "total_bound_pipelines": [ + "arithmetic", "load_store" ], "total_cycles": [ - 1.6666666269302368, + 2.0, 2.0, 1.0 ]