From 0543f9379d5b55437239656963b755f0ee91044e Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Sun, 10 Aug 2025 13:38:35 +0800 Subject: [PATCH 1/3] Add failing test case. --- test/protocol/http/accept_encoding.rb | 171 ++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 test/protocol/http/accept_encoding.rb diff --git a/test/protocol/http/accept_encoding.rb b/test/protocol/http/accept_encoding.rb new file mode 100644 index 0000000..8422c45 --- /dev/null +++ b/test/protocol/http/accept_encoding.rb @@ -0,0 +1,171 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2019-2025, by Samuel Williams. + +require "protocol/http/accept_encoding" + +describe Protocol::HTTP::AcceptEncoding do + let(:delegate) do + ->(request) { + Protocol::HTTP::Response[200, Protocol::HTTP::Headers["content-type" => "text/plain"], ["Hello World!"]] + } + end + + let(:middleware) { Protocol::HTTP::AcceptEncoding.new(delegate) } + + with "known encodings" do + it "can decode gzip responses" do + # Mock a response with gzip encoding + gzip_delegate = ->(request) { + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/plain", + "content-encoding" => "gzip" + ], + ["Hello World!"] + ] + } + + gzip_middleware = Protocol::HTTP::AcceptEncoding.new(gzip_delegate) + request = Protocol::HTTP::Request["GET", "/"] + response = gzip_middleware.call(request) + + expect(response.headers).not.to have_keys("content-encoding") + expect(response.body).to be_a(Protocol::HTTP::Body::Inflate) + end + end + + with "unknown encodings" do + it "preserves unknown content-encoding headers" do + # Mock a response with brotli encoding (not in DEFAULT_WRAPPERS) + br_delegate = ->(request) { + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/plain", + "content-encoding" => "br" + ], + ["Hello World!"] # This would actually be brotli-encoded in reality + ] + } + + br_middleware = Protocol::HTTP::AcceptEncoding.new(br_delegate) + request = Protocol::HTTP::Request["GET", "/"] + response = br_middleware.call(request) + + # The bug: this currently fails because content-encoding gets removed + # when the middleware encounters an unknown encoding + expect(response.headers).to have_keys("content-encoding") + expect(response.headers["content-encoding"]).to be == ["br"] + # The body should remain untouched since we can't decode it + expect(response.body).not.to be_a(Protocol::HTTP::Body::Inflate) + end + + it "preserves mixed known and unknown encodings" do + # Mock a response with multiple encodings where some are unknown + mixed_delegate = ->(request) { + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/plain", + "content-encoding" => "gzip, br" # gzip is known, br is unknown + ], + ["Hello World!"] + ] + } + + mixed_middleware = Protocol::HTTP::AcceptEncoding.new(mixed_delegate) + request = Protocol::HTTP::Request["GET", "/"] + response = mixed_middleware.call(request) + + # The bug: this currently fails because the entire content-encoding + # header gets removed when ANY unknown encoding is present + expect(response.headers).to have_keys("content-encoding") + expect(response.headers["content-encoding"]).to be == ["gzip", "br"] + # The body should remain untouched since we can't decode the br part + expect(response.body).not.to be_a(Protocol::HTTP::Body::Inflate) + end + + it "handles case-insensitive encoding names" do + # Mock a response with uppercase encoding name + uppercase_delegate = ->(request) { + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/plain", + "content-encoding" => "GZIP" + ], + ["Hello World!"] + ] + } + + uppercase_middleware = Protocol::HTTP::AcceptEncoding.new(uppercase_delegate) + request = Protocol::HTTP::Request["GET", "/"] + response = uppercase_middleware.call(request) + + # This might also be a bug - encoding names should be case-insensitive + # but the current implementation uses exact string matching + expect(response.headers).not.to have_keys("content-encoding") + expect(response.body).to be_a(Protocol::HTTP::Body::Inflate) + end + end + + with "issue #86 - transparent proxy scenario" do + it "preserves unknown content-encoding when acting as transparent proxy" do + # This test simulates the exact scenario described in issue #86 + # where a transparent proxy fetches content with brotli encoding + # but the AcceptEncoding middleware doesn't know about brotli + + # Mock upstream server that returns brotli-encoded content + upstream_delegate = ->(request) { + # Simulate a server responding with brotli encoding + # when the request has accept-encoding: gzip + expect(request.headers["accept-encoding"]).to be == ["gzip"] + + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/html", + "content-encoding" => "br" # Server chose brotli + ], + [""] # This would be actual brotli data + ] + } + + # Proxy middleware that only knows about gzip + proxy_middleware = Protocol::HTTP::AcceptEncoding.new(upstream_delegate) + + # Client request that accepts both gzip and brotli + request = Protocol::HTTP::Request["GET", "/some/resource"] + response = proxy_middleware.call(request) + + # BUG: The content-encoding header should be preserved + # so the client knows the content is still brotli-encoded + expect(response.headers).to have_keys("content-encoding") + expect(response.headers["content-encoding"]).to be == ["br"] + + # The body should remain untouched since proxy can't decode brotli + expect(response.body).not.to be_a(Protocol::HTTP::Body::Inflate) + expect(response.read).to be == "" + end + end + + with "empty or identity encodings" do + it "handles identity encoding correctly" do + identity_delegate = ->(request) { + Protocol::HTTP::Response[200, + Protocol::HTTP::Headers[ + "content-type" => "text/plain", + "content-encoding" => "identity" + ], + ["Hello World!"] + ] + } + + identity_middleware = Protocol::HTTP::AcceptEncoding.new(identity_delegate) + request = Protocol::HTTP::Request["GET", "/"] + response = identity_middleware.call(request) + + # Identity encoding means no encoding, so header should be removed + expect(response.headers).not.to have_keys("content-encoding") + expect(response.body).not.to be_a(Protocol::HTTP::Body::Inflate) + end + end +end From cfccf9105a662fc08a7f01b544c043d429be2f4b Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Sun, 10 Aug 2025 13:52:20 +0800 Subject: [PATCH 2/3] Fix. --- lib/protocol/http/accept_encoding.rb | 32 +++++++++++++++++++++------ test/protocol/http/accept_encoding.rb | 3 --- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/protocol/http/accept_encoding.rb b/lib/protocol/http/accept_encoding.rb index c594dc6..d289761 100644 --- a/lib/protocol/http/accept_encoding.rb +++ b/lib/protocol/http/accept_encoding.rb @@ -21,6 +21,7 @@ class AcceptEncoding < Middleware # The default wrappers to use for decoding content. DEFAULT_WRAPPERS = { "gzip" => Body::Inflate.method(:for), + "identity" => ->(body) { body }, # Identity means no encoding # There is no point including this: # 'identity' => ->(body){body}, @@ -46,15 +47,32 @@ def call(request) response = super - if body = response.body and !body.empty? and content_encoding = response.headers.delete(CONTENT_ENCODING) - # We want to unwrap all encodings - content_encoding.reverse_each do |name| - if wrapper = @wrappers[name] - body = wrapper.call(body) + if body = response.body and !body.empty? + if content_encoding = response.headers[CONTENT_ENCODING] + # Process encodings from the end (last applied first) + # Remove encodings as we successfully decode them + while name = content_encoding.last + # Look up wrapper with case-insensitive matching + wrapper = @wrappers[name] || @wrappers[name.downcase] + + if wrapper + body = wrapper.call(body) + # Remove the encoding we just processed: + content_encoding.pop + else + # Unknown encoding - stop processing here: + break + end + end + + # Update the response body: + response.body = body + + # Remove the content-encoding header if we decoded all encodings: + if content_encoding.empty? + response.headers.delete(CONTENT_ENCODING) end end - - response.body = body end return response diff --git a/test/protocol/http/accept_encoding.rb b/test/protocol/http/accept_encoding.rb index 8422c45..2db3bc3 100644 --- a/test/protocol/http/accept_encoding.rb +++ b/test/protocol/http/accept_encoding.rb @@ -117,9 +117,6 @@ # Mock upstream server that returns brotli-encoded content upstream_delegate = ->(request) { # Simulate a server responding with brotli encoding - # when the request has accept-encoding: gzip - expect(request.headers["accept-encoding"]).to be == ["gzip"] - Protocol::HTTP::Response[200, Protocol::HTTP::Headers[ "content-type" => "text/html", From 1c67f61bb4f66590b6978eebe363e5a98bc8513c Mon Sep 17 00:00:00 2001 From: Samuel Williams Date: Sun, 10 Aug 2025 13:58:45 +0800 Subject: [PATCH 3/3] Documentation. --- lib/protocol/http/accept_encoding.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/protocol/http/accept_encoding.rb b/lib/protocol/http/accept_encoding.rb index d289761..92bb73d 100644 --- a/lib/protocol/http/accept_encoding.rb +++ b/lib/protocol/http/accept_encoding.rb @@ -49,11 +49,10 @@ def call(request) if body = response.body and !body.empty? if content_encoding = response.headers[CONTENT_ENCODING] - # Process encodings from the end (last applied first) - # Remove encodings as we successfully decode them + # Process encodings in reverse order and remove them when they are decoded: while name = content_encoding.last - # Look up wrapper with case-insensitive matching - wrapper = @wrappers[name] || @wrappers[name.downcase] + # Look up wrapper with case-insensitive matching: + wrapper = @wrappers[name.downcase] if wrapper body = wrapper.call(body)