From 8168690c1f74f9518477d63b62ef9a354becb5db Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 10:33:59 -0800 Subject: [PATCH 1/7] [Impeller] applied the lerp hack to blur (roughly 2x speedup?) --- .../filters/gaussian_blur_filter_contents.cc | 38 ++++++++- .../filters/gaussian_blur_filter_contents.h | 5 ++ ...gaussian_blur_filter_contents_unittests.cc | 81 +++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 0e97225326ee9..0950da7aa5469 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -106,6 +106,7 @@ fml::StatusOr MakeDownsampleSubpass( SetTileMode(&linear_sampler_descriptor, renderer, tile_mode); linear_sampler_descriptor.mag_filter = MinMagFilter::kLinear; linear_sampler_descriptor.min_filter = MinMagFilter::kLinear; + linear_sampler_descriptor.mip_filter = MipFilter::kLinear; TextureFillVertexShader::BindFrameInfo( pass, host_buffer.EmplaceUniform(frame_info)); TextureFillFragmentShader::BindTextureSampler( @@ -167,6 +168,7 @@ fml::StatusOr MakeBlurSubpass( SamplerDescriptor linear_sampler_descriptor = sampler_descriptor; linear_sampler_descriptor.mag_filter = MinMagFilter::kLinear; linear_sampler_descriptor.min_filter = MinMagFilter::kLinear; + linear_sampler_descriptor.mip_filter = MipFilter::kLinear; GaussianBlurFragmentShader::BindTextureSampler( pass, input_texture, renderer.GetContext()->GetSamplerLibrary()->GetSampler( @@ -174,7 +176,8 @@ fml::StatusOr MakeBlurSubpass( GaussianBlurVertexShader::BindFrameInfo( pass, host_buffer.EmplaceUniform(frame_info)); GaussianBlurFragmentShader::BindKernelSamples( - pass, host_buffer.EmplaceUniform(GenerateBlurInfo(blur_info))); + pass, host_buffer.EmplaceUniform( + LerpHackKernelSamples(GenerateBlurInfo(blur_info)))); return pass.Draw().ok(); }; if (destination_target.has_value()) { @@ -507,4 +510,37 @@ KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo( return result; } +KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( + KernelPipeline::FragmentShader::KernelSamples parameters) { + KernelPipeline::FragmentShader::KernelSamples result; + result.sample_count = ((parameters.sample_count - 1) / 2) + 1; + int32_t middle = result.sample_count / 2; + int32_t j = 0; + Scalar coefficient_tally = 0.0f; + for (int i = 0; i < result.sample_count; i++) { + if (i == middle) { + result.samples[i] = parameters.samples[j++]; + } else { + KernelPipeline::FragmentShader::KernelSample left = parameters.samples[j]; + KernelPipeline::FragmentShader::KernelSample right = + parameters.samples[j + 1]; + Scalar right_coefficient = right.coefficient / left.coefficient; + result.samples[i] = KernelPipeline::FragmentShader::KernelSample{ + .uv_offset = left.uv_offset.Lerp( + right.uv_offset, right_coefficient / (1.0f + right_coefficient)), + .coefficient = left.coefficient, + }; + j += 2; + } + coefficient_tally += result.samples[i].coefficient; + } + + // Normalize. + for (int i = 0; i < result.sample_count; i++) { + result.samples[i].coefficient /= coefficient_tally; + } + + return result; +} + } // namespace impeller diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h index 458e310541f9a..f28e7a22fdbe6 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.h +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.h @@ -21,6 +21,11 @@ struct BlurParameters { KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo( BlurParameters parameters); +/// This will shrink the size of a kernel by roughly half by sampling between +/// samples and relying on linear interpolation between the samples. +KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( + KernelPipeline::FragmentShader::KernelSamples samples); + /// Performs a bidirectional Gaussian blur. /// /// This is accomplished by rendering multiple passes in multiple directions. diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index 369f9cb914c9d..babd983f2b26b 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -487,5 +487,86 @@ TEST(GaussianBlurFilterContentsTest, Coefficients) { } } +TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamples) { + KernelPipeline::FragmentShader::KernelSamples kernel_samples = { + .sample_count = 5, + .samples = + { + { + .uv_offset = Vector2(-2, 0), + .coefficient = 0.1f, + }, + { + .uv_offset = Vector2(-1, 0), + .coefficient = 0.2f, + }, + { + .uv_offset = Vector2(0, 0), + .coefficient = 0.4f, + }, + { + .uv_offset = Vector2(1, 0), + .coefficient = 0.2f, + }, + { + .uv_offset = Vector2(2, 0), + .coefficient = 0.1f, + }, + }, + }; + + KernelPipeline::FragmentShader::KernelSamples fast_kernel_samples = + LerpHackKernelSamples(kernel_samples); + EXPECT_EQ(fast_kernel_samples.sample_count, 3); + + KernelPipeline::FragmentShader::KernelSample* samples = + kernel_samples.samples; + KernelPipeline::FragmentShader::KernelSample* fast_samples = + fast_kernel_samples.samples; + + ////////////////////////////////////////////////////////////////////////////// + // Check output kernel. + + EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333); + EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0); + // 0.1428571429 = 0.1 / (0.1 + 0.4 + 0.2) + EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.1428571429); + EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0); + EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0); + // 0.5714285714 = 0.4 / (0.1 + 0.4 + 0.2) + EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.5714285714); + EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333); + EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0); + // 0.2857142857 = 0.4 / (0.1 + 0.4 + 0.2) + EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.2857142857); + + ////////////////////////////////////////////////////////////////////////////// + // Check output of fast kernel versus original kernel. + + Scalar data[5] = {0.25, 0.5, 0.5, 1.0, 0.2}; + Scalar original_output = + samples[0].coefficient * data[0] + samples[1].coefficient * data[1] + + samples[2].coefficient * data[2] + samples[3].coefficient * data[3] + + samples[4].coefficient * data[4]; + + auto lerp = [](const Point& point, Scalar left, Scalar right) { + Scalar int_part; + Scalar fract = fabsf(modf(point.x, &int_part)); + if (point.x < 0) { + return left * fract + right * (1.0 - fract); + } else { + return left * (1.0 - fract) + right * fract; + } + }; + Scalar fast_output = + /*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) * + fast_samples[0].coefficient + + /*2nd*/ data[2] * fast_samples[1].coefficient + + /*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) * + fast_samples[2].coefficient; + + EXPECT_NEAR(original_output, fast_output, 0.01); +} + } // namespace testing } // namespace impeller From 13b2a318fc5608d218343e04d7177c7aa6e2048b Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 12:54:55 -0800 Subject: [PATCH 2/7] updated docstring --- .../contents/filters/gaussian_blur_filter_contents.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 0950da7aa5469..a4ad416122612 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -510,6 +510,15 @@ KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo( return result; } +// This works by shrinking the kernel size by 2 and relying on lerp to read +// between the samples. +// +// Here is how the math is devised for collapsing 2 samples into 1: +// output = coeff[a] * sample(pos[a]) + coeff[b] * sample(pos[b]) +// output = coeff[a] * (sample(pos[a]) + (coeff[b]/coeff[a]) * sample(pos[b])) +// fract = (coeff[b] / coeff[a]) +// output = coeff[a] * (sample(pos[a]) + fract * sample(pos[b])) +// output = coeff[a] * sample(lerp(pos[a], pos[b], frac / (1 + fract))) KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( KernelPipeline::FragmentShader::KernelSamples parameters) { KernelPipeline::FragmentShader::KernelSamples result; From ea0e72c82dd2a302c43ec49548e57b8cc067a399 Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 14:50:08 -0800 Subject: [PATCH 3/7] wrote up another test that shows quite a bit of rounding off error --- ...gaussian_blur_filter_contents_unittests.cc | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index babd983f2b26b..a4272eeca4bb0 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -487,7 +487,7 @@ TEST(GaussianBlurFilterContentsTest, Coefficients) { } } -TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamples) { +TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) { KernelPipeline::FragmentShader::KernelSamples kernel_samples = { .sample_count = 5, .samples = @@ -568,5 +568,60 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamples) { EXPECT_NEAR(original_output, fast_output, 0.01); } +TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) { + Scalar sigma = 10.0f; + int32_t blur_radius = static_cast( + std::ceil(GaussianBlurFilterContents::CalculateBlurRadius(sigma))); + BlurParameters parameters = {.blur_uv_offset = Point(1, 0), + .blur_sigma = sigma, + .blur_radius = blur_radius, + .step_size = 1}; + KernelPipeline::FragmentShader::KernelSamples kernel_samples = + GenerateBlurInfo(parameters); + EXPECT_EQ(kernel_samples.sample_count, 33); + KernelPipeline::FragmentShader::KernelSamples fast_kernel_samples = + LerpHackKernelSamples(kernel_samples); + EXPECT_EQ(fast_kernel_samples.sample_count, 17); + float data[33]; + srand(0); + for (int i = 0; i < 33; i++) { + data[i] = 255.0 * static_cast(rand()) / RAND_MAX; + } + + auto sampler = [data](Point point) -> Scalar { + FML_CHECK(point.y == 0.0f); + FML_CHECK(point.x >= -16); + FML_CHECK(point.x <= 16); + Scalar fint_part; + Scalar fract = fabsf(modf(point.x, &fint_part)); + if (fract == 0) { + int32_t int_part = static_cast(fint_part) + 16; + return data[int_part]; + } else { + int32_t left = static_cast(floor(point.x)) + 16; + int32_t right = static_cast(ceil(point.x)) + 16; + if (point.x < 0) { + return fract * data[left] + (1.0 - fract) * data[right]; + } else { + return (1.0 - fract) * data[left] + fract * data[right]; + } + } + }; + + Scalar output = 0.0; + for (int i = 0; i < kernel_samples.sample_count; ++i) { + auto sample = kernel_samples.samples[i]; + output += sample.coefficient * sampler(sample.uv_offset); + } + + Scalar fast_output = 0.0; + for (int i = 0; i < fast_kernel_samples.sample_count; ++i) { + auto sample = fast_kernel_samples.samples[i]; + fast_output += sample.coefficient * sampler(sample.uv_offset); + } + + EXPECT_NEAR(output, fast_output, 0.1); +} + } // namespace testing } // namespace impeller From a658655fa420e0cc7156c668fcd2c1ab87df3bf8 Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 15:57:38 -0800 Subject: [PATCH 4/7] fixed math --- .../filters/gaussian_blur_filter_contents.cc | 15 ++++----------- .../gaussian_blur_filter_contents_unittests.cc | 9 +++------ 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index a4ad416122612..74562ddac6a03 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -512,13 +512,6 @@ KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo( // This works by shrinking the kernel size by 2 and relying on lerp to read // between the samples. -// -// Here is how the math is devised for collapsing 2 samples into 1: -// output = coeff[a] * sample(pos[a]) + coeff[b] * sample(pos[b]) -// output = coeff[a] * (sample(pos[a]) + (coeff[b]/coeff[a]) * sample(pos[b])) -// fract = (coeff[b] / coeff[a]) -// output = coeff[a] * (sample(pos[a]) + fract * sample(pos[b])) -// output = coeff[a] * sample(lerp(pos[a], pos[b], frac / (1 + fract))) KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( KernelPipeline::FragmentShader::KernelSamples parameters) { KernelPipeline::FragmentShader::KernelSamples result; @@ -533,11 +526,11 @@ KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( KernelPipeline::FragmentShader::KernelSample left = parameters.samples[j]; KernelPipeline::FragmentShader::KernelSample right = parameters.samples[j + 1]; - Scalar right_coefficient = right.coefficient / left.coefficient; result.samples[i] = KernelPipeline::FragmentShader::KernelSample{ - .uv_offset = left.uv_offset.Lerp( - right.uv_offset, right_coefficient / (1.0f + right_coefficient)), - .coefficient = left.coefficient, + .uv_offset = (left.uv_offset * left.coefficient + + right.uv_offset * right.coefficient) / + (left.coefficient + right.coefficient), + .coefficient = left.coefficient + right.coefficient, }; j += 2; } diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index a4272eeca4bb0..e6e5b9b129aed 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -529,16 +529,13 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) { EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333); EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0); - // 0.1428571429 = 0.1 / (0.1 + 0.4 + 0.2) - EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.1428571429); + EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3); EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0); EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0); - // 0.5714285714 = 0.4 / (0.1 + 0.4 + 0.2) - EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.5714285714); + EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4); EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333); EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0); - // 0.2857142857 = 0.4 / (0.1 + 0.4 + 0.2) - EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.2857142857); + EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3); ////////////////////////////////////////////////////////////////////////////// // Check output of fast kernel versus original kernel. From f3e23d2a50d3cf343a40b64eedc8eab28a99a7e2 Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 16:36:16 -0800 Subject: [PATCH 5/7] removed old stuff that isn't needed anymore --- .../contents/filters/gaussian_blur_filter_contents.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc index 74562ddac6a03..9df1ed5ec6961 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents.cc @@ -106,7 +106,6 @@ fml::StatusOr MakeDownsampleSubpass( SetTileMode(&linear_sampler_descriptor, renderer, tile_mode); linear_sampler_descriptor.mag_filter = MinMagFilter::kLinear; linear_sampler_descriptor.min_filter = MinMagFilter::kLinear; - linear_sampler_descriptor.mip_filter = MipFilter::kLinear; TextureFillVertexShader::BindFrameInfo( pass, host_buffer.EmplaceUniform(frame_info)); TextureFillFragmentShader::BindTextureSampler( @@ -168,7 +167,6 @@ fml::StatusOr MakeBlurSubpass( SamplerDescriptor linear_sampler_descriptor = sampler_descriptor; linear_sampler_descriptor.mag_filter = MinMagFilter::kLinear; linear_sampler_descriptor.min_filter = MinMagFilter::kLinear; - linear_sampler_descriptor.mip_filter = MipFilter::kLinear; GaussianBlurFragmentShader::BindTextureSampler( pass, input_texture, renderer.GetContext()->GetSamplerLibrary()->GetSampler( @@ -518,7 +516,6 @@ KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( result.sample_count = ((parameters.sample_count - 1) / 2) + 1; int32_t middle = result.sample_count / 2; int32_t j = 0; - Scalar coefficient_tally = 0.0f; for (int i = 0; i < result.sample_count; i++) { if (i == middle) { result.samples[i] = parameters.samples[j++]; @@ -534,12 +531,6 @@ KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples( }; j += 2; } - coefficient_tally += result.samples[i].coefficient; - } - - // Normalize. - for (int i = 0; i < result.sample_count; i++) { - result.samples[i].coefficient /= coefficient_tally; } return result; From 13d2f95c5158f32388d9d39f241d80641ac23439 Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Tue, 20 Feb 2024 16:41:01 -0800 Subject: [PATCH 6/7] tidy --- .../contents/filters/gaussian_blur_filter_contents_unittests.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index e6e5b9b129aed..d9cb6b9053f02 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -582,7 +582,7 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) { float data[33]; srand(0); for (int i = 0; i < 33; i++) { - data[i] = 255.0 * static_cast(rand()) / RAND_MAX; + data[i] = 255.0 * static_cast(arc4random()) / RAND_MAX; } auto sampler = [data](Point point) -> Scalar { From 7527b88854ea2fcdee922745699ddb88faaa3a7c Mon Sep 17 00:00:00 2001 From: Aaron Clarke Date: Wed, 21 Feb 2024 08:17:54 -0800 Subject: [PATCH 7/7] tidy --- .../filters/gaussian_blur_filter_contents_unittests.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc index d9cb6b9053f02..7a7de2d62d79f 100644 --- a/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc +++ b/impeller/entity/contents/filters/gaussian_blur_filter_contents_unittests.cc @@ -11,6 +11,12 @@ #include "impeller/geometry/geometry_asserts.h" #include "impeller/renderer/testing/mocks.h" +#if FML_OS_MACOSX +#define IMPELLER_RAND arc4random +#else +#define IMPELLER_RAND rand +#endif + namespace impeller { namespace testing { @@ -582,7 +588,7 @@ TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) { float data[33]; srand(0); for (int i = 0; i < 33; i++) { - data[i] = 255.0 * static_cast(arc4random()) / RAND_MAX; + data[i] = 255.0 * static_cast(IMPELLER_RAND()) / RAND_MAX; } auto sampler = [data](Point point) -> Scalar {