Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b4719e1
Chunked encoding decoder
WyriHaximus Jul 4, 2016
91cb3a5
Renamed to more suitable class name
WyriHaximus Jul 4, 2016
fb14e64
Removed test debug
WyriHaximus Jul 4, 2016
d6d2c1e
Removed assert true as it wasn't useful to the test
WyriHaximus Jul 4, 2016
dd04416
Swap stream when transfer encoding is chunked
WyriHaximus Jul 4, 2016
897c4a1
Removed chunked transfer encoding from todo
WyriHaximus Jul 4, 2016
b51cdb4
Response test to ensure chunked encoding decoder is used when it should
WyriHaximus Jul 5, 2016
e1a3708
Lower case checking as suggested by @jsor https://github.com/reactphp…
WyriHaximus Jul 6, 2016
bfba037
Chunked encoding extensions as mentioned by @jsor https://github.com/…
WyriHaximus Jul 7, 2016
a3b9c80
Don't emit empty data chunks
WyriHaximus Jul 7, 2016
4ca65b3
Pass the extra's from the stream through the response
WyriHaximus Jul 8, 2016
0eab14b
Changed emitted extra to be more explicit as suggested by @jsor at ht…
WyriHaximus Aug 3, 2016
f9067ce
chunkedExtension => chunkExtension
WyriHaximus Aug 3, 2016
85b0c93
Store CRLF position into a variable, suggested by @clue at https://gi…
WyriHaximus Aug 3, 2016
03d271a
Downgraded to a ReadableStreamInterface from the duplex as suggested …
WyriHaximus Aug 3, 2016
18f044a
Removed write stream `end` event forward as suggested by @clue at htt…
WyriHaximus Aug 3, 2016
cd4ba40
Removed chunk extensions as suggested by @clue as they don't have a u…
WyriHaximus Aug 3, 2016
537de43
Enure chunk length header is in as suggested by @clue at https://gith…
WyriHaximus Aug 4, 2016
42201c8
Ensure CRLF in the middle of bodies don't cause unexpected behavior b…
WyriHaximus Aug 4, 2016
48763cf
Removed surplus arguments via @clue https://github.com/reactphp/http-…
WyriHaximus Aug 9, 2016
b32a188
Marking method internal via @clue https://github.com/reactphp/http-cl…
WyriHaximus Aug 9, 2016
6e1ebcc
Removed dead end method via @clue https://github.com/reactphp/http-cl…
WyriHaximus Aug 9, 2016
69ab9c6
Implement ReadableStreamInterface and added missing methods for it vi…
WyriHaximus Aug 9, 2016
d4eda4d
Removed transfer encoding header as suggested by @joelwurtz at https:…
WyriHaximus Aug 15, 2016
804902f
No need to unset when we're not using it from there on
WyriHaximus Aug 15, 2016
29e283b
Implement chunked encoding overflow vulnerability prevention suggeste…
WyriHaximus Aug 16, 2016
0b43386
Removed accidental committed debug code
WyriHaximus Aug 16, 2016
07f649a
Close decoder stream after emitting error via @clue https://github.co…
WyriHaximus Aug 17, 2016
3ca85d6
Implemented max content length limit, by default set at 32KB, va @clu…
WyriHaximus Aug 17, 2016
7f303d8
Don't iterate the buffer when it is smaller then three bytes (removed…
WyriHaximus Aug 17, 2016
01ecdcb
Made it smaller then two instead of three, as two can still still be …
WyriHaximus Aug 17, 2016
37dfdf9
Renamed $maxBufferLength to $maxChunkLength
WyriHaximus Aug 20, 2016
61fb8a3
Removed maximum chunk length as data is flowing through and not buffe…
WyriHaximus Aug 20, 2016
e4bf1ea
Limit chunk header length to 1024 bytes https://github.com/reactphp/h…
WyriHaximus Aug 20, 2016
69e3335
Unnecessary "
WyriHaximus Aug 20, 2016
6aa86db
When the underlying stream ends emit an error when we have unprocesse…
WyriHaximus Aug 20, 2016
eab3c71
In case a substr fails reinitialize buffer as empty string
WyriHaximus Aug 20, 2016
34ee759
Added note about chunked encoded response to readme
WyriHaximus Aug 20, 2016
4f95569
Use write instead of end to let the stream detect the end properly
WyriHaximus Aug 21, 2016
09cde22
End reading from this stream when the end of the chunked encoding has…
WyriHaximus Aug 21, 2016
d7d0473
Close stream regardless https://github.com/reactphp/http-client/pull/…
WyriHaximus Aug 23, 2016
95277ff
Prevent CRLF DOS https://github.com/reactphp/http-client/pull/58#disc…
WyriHaximus Aug 23, 2016
9d4896f
Ignore chunked encoding trailers https://github.com/reactphp/http-cli…
WyriHaximus Aug 23, 2016
8540b4e
Mark ChunkedStreamDecoder as internal
WyriHaximus Sep 13, 2016
415d790
Gave names to exception test data
WyriHaximus Sep 13, 2016
49e9e9e
Gave names to non-exception test data
WyriHaximus Sep 13, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,5 @@ $loop->run();
## TODO

* gzip content encoding
* chunked transfer encoding
* keep-alive connections
* following redirections
105 changes: 105 additions & 0 deletions src/ChunkedStreamDecoder.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
<?php

namespace React\HttpClient;

use Evenement\EventEmitterTrait;
use React\Stream\DuplexStreamInterface;
use React\Stream\Util;

class ChunkedStreamDecoder
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably implement ReadableStreamInterface? (e.g. https://github.com/clue/php-utf8-react/blob/master/src/Sequencer.php#L13)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should implement ReadableStreamInterface?

{
const CRLF = "\r\n";

use EventEmitterTrait;

/**
* @var string
*/
protected $buffer = '';

/**
* @var int
*/
protected $remainingLength = 0;

/**
* @var bool
*/
protected $nextChunkIsLength = true;

/**
* @var DuplexStreamInterface
*/
protected $stream;

/**
* @param DuplexStreamInterface $stream
*/
public function __construct(DuplexStreamInterface $stream)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably expect a ReadableStreamInterface instead? (e.g. https://github.com/clue/php-utf8-react/blob/master/src/Sequencer.php#L21)

For the reference: The DuplexStreamInterface extends the ReadableStreamInterface, so this should not affect how it works. However, semantically this class only deals with the readable side of a stream, so it makes sense to support read-only streams as well.

{
$this->stream = $stream;
$this->stream->on('data', array($this, 'handleData'));
Util::forwardEvents($this->stream, $this, [
'error',
'end',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The end event comes from a writable streams, this should probably not be handled here afaict (see also above).

]);
}

public function handleData($data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/** @internal */ (just in case so we can change this in the future without introducing a BC break)

{
$this->buffer .= $data;

do {
$this->iterateBuffer();
} while (strlen($this->buffer) > 0 && strpos($this->buffer, static::CRLF) !== false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it could cause an infinite loop if an incomplete chunk includes a CRLF in its body. We should probably add a test for this.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turned out it didn't, but added extra safeguards to be sure. Also added extra unit tests to ensure it is behaving as expected

}

protected function iterateBuffer()
{
if ($this->nextChunkIsLength) {
$this->nextChunkIsLength = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block does not currently check the chunk header is actually complete. Should add a test for an incomplete chunk header without a trailing CRLF?

$this->remainingLength = hexdec(substr($this->buffer, 0, strpos($this->buffer, static::CRLF)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should test for chunk extensions here.

If chunk extensions are provided, the chunk size is terminated by a semicolon and followed by the parameters, each also delimited by semicolons. Each parameter is encoded as an extension name followed by an optional equal sign and value. These parameters could be used for a running message digest or digital signature, or to indicate an estimated transfer progress, for instance.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, shouldn't ba hard to deal with. But what do we do with the values? Emit them while emitting data? Or emit them separately? Or ignore them? Personally I like to emit them with them separately to announce a new chunk

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If i read it correctly, node ignores it: https://github.com/nodejs/http-parser/blob/master/http_parser.c#L1989
I'd vote for ignoring it for now. If requested it could be emitted as a third parameter of the data event.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think you're right, who uses this. All the issues I'm reading about it seem to suggest no one really uses it

$this->buffer = substr($this->buffer, strpos($this->buffer, static::CRLF) + 2);
}

if ($this->remainingLength > 0) {
$chunkLength = $this->getChunkLength();
$this->emit('data', array(
substr($this->buffer, 0, $chunkLength),
$this
));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sold on the idea we should emit the chunk extension as part of the data message.
Do we have any specific use case for this right now?

Personally, I would rather remove this for now (reduces complexity). We can still think about introducing this once we see an actual need and we can figure out a decent API without introducing a BC break.

Also, in my projects I only emit the actual data as the other event attributes have always been unreliable across the React ecosystem and it's very easy to use a closure to add arbitrary references to the event handler anyway (e.g. https://github.com/clue/php-utf8-react/blob/master/src/Sequencer.php#L111)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very valid point, especially since there is no use case. I'll remove it

$this->remainingLength -= $chunkLength;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a design decision here: Do we really want to emit incomplete chunks or do we honor the chunk header and then only emit complete chunks?

I'm currently undecided on this, but would likely have suggested to honor the header.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reasoning behind not honoring the header is to keep as less data floating around somewhere in a buffer and handing it over to who ever is using the client as soon as possible.

I don't have any strong preference regarding this. Can write up a benchmark to see which method performs best in various situations, but not sure if that is worth the effort 😄 .

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point: If the header says the next chunk is 2 GB in size, we should probably not allocate a 2 GB buffer :)

Let's keep this as-is, but perhaps consider adding some documentation and/or tests? 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add docs and tests 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests have been added by the way

$this->buffer = substr($this->buffer, $chunkLength);
return;
}

$this->nextChunkIsLength = true;
$this->buffer = substr($this->buffer, 2);
}

protected function getChunkLength()
{
$bufferLength = strlen($this->buffer);

if ($bufferLength >= $this->remainingLength) {
return $this->remainingLength;
}

return $bufferLength;
}

public function end($data = null)
{
$this->stream->end($data);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The end() method is not implemented on ReadableStreamInterface (can safely be removed).

}

public function pause()
{
$this->stream->pause();
}

public function resume()
{
$this->stream->resume();
}
}
10 changes: 7 additions & 3 deletions src/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,13 @@ public function __construct(DuplexStreamInterface $stream, $protocol, $version,
$this->reasonPhrase = $reasonPhrase;
$this->headers = $headers;

$stream->on('data', array($this, 'handleData'));
$stream->on('error', array($this, 'handleError'));
$stream->on('end', array($this, 'handleEnd'));
if (isset($this->headers['transfer-encoding']) && $this->headers['transfer-encoding'] == 'chunked') {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nitpicking: I'd like to see strict === comparision here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The headers should be normalized to lowercase keys and values before checking.

$normalizedHeaders = array_change_key_case($headers, CASE_LOWER);

if (isset($normalizedHeaders['transfer-encoding']) && strtolower($normalizedHeaders['transfer-encoding']) === 'chunked') {
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that is something I've thought about. Currently thinking about making lowercase the default for the entire package and releasing that in 0.5.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense for response headers, request headers should be used as passed in by the user (imho).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes absolutely, was only talking about the response headers, sorry for the confusion.

$this->stream = new ChunkedStreamDecoder($stream);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, this introduces a BC break.

How can a consumer of this library tell if the Transfer-Encoding has already been processed?

Should we raise to v0.5.0?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe remove this header on the request if it has been processed ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like a solid way to go, what do you think @clue?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also what I've had in mind, let's give it a try 👍

Documentation and/or tests would be much appreciated 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do 👍


$this->stream->on('data', array($this, 'handleData'));
$this->stream->on('error', array($this, 'handleError'));
$this->stream->on('end', array($this, 'handleEnd'));
}

public function getProtocol()
Expand Down
40 changes: 40 additions & 0 deletions tests/DecodeChunkedStreamTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

namespace React\Tests\HttpClient;

use React\HttpClient\ChunkedStreamDecoder;
use React\Stream\ThroughStream;

class DecodeChunkedStreamTest extends TestCase
{
public function provideChunkedEncoding()
{
return [
[["4\r\nWiki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\nWiki\r\n", "5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\nWiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\nWiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n", "\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n", "\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne\r\n", " in\r\n", "\r\nchunks.\r\n0\r\n\r\n"]],
[["4\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne\r\n", " in\r\n", "\r\nchunks.\r\n", "0\r\n\r\n"]],
];
}

/**
* @test
* @dataProvider provideChunkedEncoding
*/
public function testChunkedEncoding(array $strings)
{
$stream = new ThroughStream();
$response = new ChunkedStreamDecoder($stream);
$buffer = '';
$response->on('data', function ($data) use (&$buffer) {
$buffer .= $data;
});
foreach ($strings as $string) {
$stream->write($string);
}
$this->assertSame("Wikipedia in\r\n\r\nchunks.", $buffer);
}
}
28 changes: 28 additions & 0 deletions tests/ResponseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace React\Tests\HttpClient;

use React\HttpClient\Response;
use React\Stream\ThroughStream;

class ResponseTest extends TestCase
{
Expand Down Expand Up @@ -73,5 +74,32 @@ public function closedResponseShouldNotBeResumedOrPaused()
$response->resume();
$response->pause();
}

/** @test */
public function chunkedEncodingResponse()
{
$stream = new ThroughStream();
$response = new Response(
$stream,
'http',
'1.0',
'200',
'ok',
[
'content-type' => 'text/plain',
'transfer-encoding' => 'chunked',
]
);

$buffer = '';
$response->on('data', function ($data) use (&$buffer) {
$buffer.= $data;
});
$this->assertSame('', $buffer);
$stream->write("4\r\n");
$this->assertSame('', $buffer);
$stream->write("Wiki\r\n");
$this->assertSame('Wiki', $buffer);
}
}