From 4ba0849a2337c5efd4252007c554f832d95947b7 Mon Sep 17 00:00:00 2001 From: Simon Frings Date: Wed, 2 Mar 2022 15:03:47 +0100 Subject: [PATCH] Update documentation and examples to match version in early access --- .gitignore | 3 + README.md | 418 +++++++++++++++++++++++++++++++++- examples/benchmark-insert.php | 58 +++++ examples/benchmark-read.php | 37 +++ examples/insert.php | 15 ++ examples/query-stream.php | 31 +++ examples/query.php | 25 ++ examples/search.php | 19 ++ 8 files changed, 597 insertions(+), 9 deletions(-) create mode 100644 .gitignore create mode 100644 examples/benchmark-insert.php create mode 100644 examples/benchmark-read.php create mode 100644 examples/insert.php create mode 100644 examples/query-stream.php create mode 100644 examples/query.php create mode 100644 examples/search.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..843ed49 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/composer.lock +/examples/.env +/vendor/ diff --git a/README.md b/README.md index 7f82dd5..1c6b80e 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,41 @@ It is written in pure PHP and does not require any extensions. **Table of contents** +* [Support us](#support-us) * [Quickstart example](#quickstart-example) +* [Usage](#usage) + * [Database API](#database-api) + * [Promises](#promises) + * [Cancellation](#cancellation) + * [Timeouts](#timeouts) + * [Blocking](#blocking) + * [Streaming](#streaming) +* [API](#api) + * [ClickHouseClient](#clickhouseclient) + * [query()](#query) + * [queryStream()](#querystream) + * [insert()](#insert) + * [insertStream()](#insertstream) + * [withSession()](#withsession) * [Install](#install) +* [Tests](#tests) * [License](#license) +## Support us + +[![A clue·access project](https://raw.githubusercontent.com/clue-access/clue-access/main/clue-access.png)](https://github.com/clue-access/clue-access) + +*This project is currently under active development, +you're looking at a temporary placeholder repository.* + +The code is available in early access to my sponsors here: https://github.com/clue-access/reactphp-clickhouse + +Do you sponsor me on GitHub? Thank you for supporting sustainable open-source, you're awesome! ❤️ Have fun with the code! 🎉 + +Seeing a 404 (Not Found)? Sounds like you're not in the early access group. Consider becoming a [sponsor on GitHub](https://github.com/sponsors/clue) for early access. Check out [clue·access](https://github.com/clue-access/clue-access) for more details. + +This way, more people get a chance to take a look at the code before the public release. + ## Quickstart example Once [installed](#install), you can use the following code to connect to your @@ -47,26 +78,395 @@ $client->query('SELECT id, name FROM users')->then(function (Clue\React\ClickHou }); ``` +See also the [examples](examples). + +## Usage + +### Database API + +Most importantly, this project provides a [`ClickHouseClient`](#clickhouseclient) object that offers +several methods that allow you to access your ClickHouse database: + +```php +$db = new Clue\React\ClickHouse\ClickHouseClient('http://localhost:8123/'); + +$db->query($query); +$db->insert($table, $data); +``` + +Each of the above methods supports async operation and either *fulfills* with +its result or *rejects* with an `Exception`. +Please see the following chapter about [promises](#promises) for more details. + +### Promises + +Sending queries is async (non-blocking), so you can actually send multiple queries in parallel. +ClickHouse will respond to each query with a result, the order is not guaranteed. +Sending queries uses a [Promise](https://github.com/reactphp/promise)-based interface +that makes it easy to react to when a query is completed (i.e. either successfully fulfilled or rejected with an error). + +```php +$db->query('SELECT COUNT(*) FROM users')->then( + function (Clue\React\ClickHouse\Result $result) { + // results received + }, + function (Exception $e) { + // an error occured while executing the query + } +}); +``` + +If this looks strange to you, you can also use the more traditional [blocking API](#blocking). + +### Cancellation + +The returned Promise is implemented in such a way that it can be cancelled +when it is still pending. +Cancelling a pending promise will reject its value with an Exception and +clean up any underlying resources. + +```php +$promise = $db->query('SELECT COUNT(*) FROM users'); + +Loop::addTimer(2.0, function () use ($promise) { + $promise->cancel(); +}); +``` + +### Timeouts + +This library uses a very efficient HTTP implementation, so most queries +should usually be completed in mere milliseconds. However, when sending queries +over an unreliable network (the internet), there are a number of things +that can go wrong and may cause the request to fail after a time. As such, +timeouts are handled by the underlying HTTP library and this library respects +PHP's `default_socket_timeout` setting (default 60s) as a timeout for sending the +outgoing query and waiting for a successful result and will otherwise +cancel the pending request and reject its value with an `Exception`. + +Note that this timeout value covers creating the underlying transport connection, +sending the request, waiting for the database to process the request +and receiving the full response. To use a custom timeout value, you can +pass the timeout to the [underlying `Browser`](https://github.com/reactphp/http#timeouts) +like this: + +```php +$browser = new React\Http\Browser(); +$browser = $browser->withTimeout(10.0); + +$db = new Clue\React\ClickHouse\ClickHouseClient($url, $browser); + +$db->query('SELECT COUNT(*) AS count FROM users')->then(function (Clue\React\ClickHouse\Result $result) { + // results received within 10 seconds maximum + echo 'Number of users: '$result->data[0]['count'] . PHP_EOL; +}); +``` + +Similarly, you can use a negative timeout value to not apply a timeout at all +or use a `null` value to restore the default handling. Note that the underlying +connection may still impose a different timeout value. See also the underlying +[timeouts documentation](https://github.com/reactphp/http#timeouts) for more details. + +### Blocking + +As stated above, this library provides you a powerful, async API by default. + +If, however, you want to integrate this into your traditional, blocking environment, +you should look into also using [clue/reactphp-block](https://github.com/clue/reactphp-block). + +The resulting blocking code could look something like this: + +```php +use Clue\React\Block; +use React\EventLoop\Loop; + +$db = new Clue\React\ClickHouse\ClickHouseClient('http://localhost:8123/'); + +$promise = $db->query('SELECT COUNT(*) FROM users'); + +try { + $result = Block\await($promise, Loop::get()); + // results received +} catch (Exception $e) { + // an error occured while executing the query +} +``` + +Similarly, you can also process multiple queries concurrently and await an array of results: + +```php +$promises = [ + $db->query('SELECT COUNT(*) FROM users'), + $db->query('SELECT name, email FROM users ORDER BY name LIMIT 10') +]; + +$results = Block\awaitAll($promises, Loop::get()); +``` + +Please refer to [clue/reactphp-block](https://github.com/clue/reactphp-block#readme) for more details. + +### Streaming + +The following API endpoint exposes the result set as an object containing all rows: + +```php +$db->query($query); +```` + +Keep in mind that this means the whole result set has to be kept in memory. +This is easy to get started and works reasonably well for smaller result sets. + +For bigger result sets it's usually a better idea to use a streaming approach, +where only small chunks have to be kept in memory. +This works for (any number of) rows of arbitrary sizes. + +The [`ClickHouseClient::queryStream()`](#querystream) method complements the default +Promise-based [`ClickHouseClient::query()`](#query) API and returns an instance implementing +[`ReadableStreamInterface`](https://github.com/reactphp/stream#readablestreaminterface) instead: + +```php +$stream = $db->queryStream('SELECT name, email FROM users'); + +$stream->on('data', function (array $row) { + echo $row['name'] . ': ' . $row['email'] . PHP_EOL; +}); + +$stream->on('error', function (Exception $error) { + echo 'Error: ' . $error->getMessage() . PHP_EOL; +}); + +$stream->on('close', function () { + echo '[DONE]' . PHP_EOL; +}); +``` + +The [`ClickHouseClient::insertStream()`](#insertstream) method complements the default +Promise-based [`ClickHouseClient::insert()`](#insert) API and returns an instance implementing +[`WritableStreamInterface`](https://github.com/reactphp/stream#writablestreaminterface) instead: + +```php +$stream = $db->insertStream('users'); + +$stream->write(['name' => 'Alice', 'email' => 'alice@example.com']); +$stream->end(['name' => 'Bob', 'email' => 'bob@example.com']); + +$stream->on('error', function (Exception $error) { + echo 'Error: ' . $error->getMessage() . PHP_EOL; +}); + +$stream->on('close', function () { + echo '[CLOSED]' . PHP_EOL; +}); +``` + +## API + +### ClickHouseClient + +The `ClickHouseClient` is responsible for communicating with your ClickHouse database +and for sending your database queries and exposing results from the database. + +Its constructor simply requires the URL to your ClickHouse database: + +```php +$db = new Clue\React\ClickHouse\ClickHouseClient('http://localhost:8123/'); +``` + +This class takes an optional `Browser|null $browser` parameter that can be used to +pass the browser instance to use for this object. +If you need custom connector settings (DNS resolution, TLS parameters, timeouts, +proxy servers etc.), you can explicitly pass a custom instance of the +[`ConnectorInterface`](https://github.com/reactphp/socket#connectorinterface) +to the [`Browser`](https://github.com/reactphp/http#browser) instance +and pass it as an additional argument to the `ClickHouseClient` like this: + +```php +$connector = new React\Socket\Connector([ + 'dns' => '127.0.0.1', + 'tcp' => [ + 'bindto' => '192.168.10.1:0' + ], + 'tls' => [ + 'verify_peer' => false, + 'verify_peer_name' => false + ] +]); + +$browser = new React\Http\Browser($connector); +$db = new Clue\React\ClickHouse\ClickHouseClient('http://localhost:8123/', $browser); +``` + +#### query() + +The `query(string $sql, array $params = []): PromiseInterface` method can be used to +perform an async query. + +```php +$db->query('SELECT name, email FROM users')->then(function (Clue\React\ClickHouse\Result $result) { + echo count($result->data) . ' row(s) in set' . PHP_EOL; + foreach ($result->data as $user) { + echo $user['name'] . ': ' . $user['email'] . PHP_EOL; + } +}, function (Exception $error) { + // the query was not executed successfully + echo 'Error: ' . $error->getMessage() . PHP_EOL; +}); +``` + +You can optionally pass an array of `$params` that will be bound to the +query like this: + +```php +$promise = $db->query( + 'SELECT name, email FROM users WHERE name LIKE {search:String}', + ['search' => '%a%'] +); +``` + +#### queryStream() + +The `queryStream(string $sql, array $params = []): ReadableStreamInterface` method can be used to +perform a streaming query. + +```php +$stream = $db->queryStream('SELECT name, email FROM users'); + +$stream->on('data', function (array $row) { + echo $row['name'] . ': ' . $row['email'] . PHP_EOL; +}); + +$stream->on('error', function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); +``` + +You can optionally pass an array of `$params` that will be bound to the +query like this: + +```php +$stream = $db->queryStream( + 'SELECT name, email FROM users WHERE name LIKE {search:String}', + ['search' => '%a%'] +); +``` + +#### insert() + +The `insert(string $table, array $data): PromiseInterface` method can be used to +insert a new row. + +```php +$db->insert('users', [ + 'name' => 'Alice', + 'email' => 'alice@example.com' +]); +``` + +#### insertStream() + +The `insertStream(string $table): WritableStreamInterface>` method can be used to +insert any number of rows from a stream. + +```php +$stream = $db->insertStream('users'); + +$stream->write([ + 'name' => 'Alice', + 'email' => 'alice@example.com' +]); +$stream->end(); +``` + +#### withSession() + +The `withSession(?string = null): self` method can be used to +assign a session identifier to use for all subsequent queries. + +```php +$db = $db->withSession(); +``` + +Optionally, you can an explicit session identifier to use. +If you do not pass an explicit session identifier, a random session +identifier will be used. + +```php +$db = $db->withSession('imports'); +``` + +You can unset the session identifier by passing an empty string. The new +client will no longer use a session identifier for any subsequent queries. + +```php +$db = $db->withSession(''); +``` + ## Install -[![A clue·access project](https://raw.githubusercontent.com/clue-access/clue-access/main/clue-access.png)](https://github.com/clue-access/clue-access) +The recommended way to install this library is [through Composer](https://getcomposer.org/). +[New to Composer?](https://getcomposer.org/doc/00-intro.md) -*This project is currently under active development, -you're looking at a temporary placeholder repository.* +This project does not yet follow [SemVer](https://semver.org/). +This will install the latest supported version: -The code is available in early access to my sponsors here: https://github.com/clue-access/reactphp-clickhouse +While in [early access](#support-us), you first have to manually change your +`composer.json` to include these lines to access the supporters-only repository: -Do you sponsor me on GitHub? Thank you for supporting sustainable open-source, you're awesome! ❤️ Have fun with the code! 🎉 +```json +{ + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/clue-access/reactphp-clickhouse" + } + ] +} +``` -Seeing a 404 (Not Found)? Sounds like you're not in the early access group. Consider becoming a [sponsor on GitHub](https://github.com/sponsors/clue) for early access. Check out [clue·access](https://github.com/clue-access/clue-access) for more details. +Then install this package as usual: -This way, more people get a chance to take a look at the code before the public release. +```bash +$ composer require clue/reactphp-clickhouse:dev-main +``` + +This project aims to run on any platform and thus does not require any PHP +extensions and supports running on PHP 7.0 through current PHP 8+. + +## Tests + +To run the test suite, you first need to clone this repo and then install all +dependencies [through Composer](https://getcomposer.org/): + +```bash +$ composer install +``` + +To run the test suite, go to the project root and run: + +```bash +$ vendor/bin/phpunit +``` + +The test suite contains both unit tests and functional integration tests. +The functional tests require access to a running ClickHouse database server +instance and will be skipped by default. +If you want to also run the functional tests, you need to supply *your* ClickHouse +database credentials in an environment variable like this: + +```bash +$ URL=http://localhost:8123 vendor/bin/phpunit +``` + +You can run a temporary ClickHouse database server in a Docker container like this: + +``` +$ docker run -it --rm --net=host yandex/clickhouse-server +``` -Rock on 🤘 ## License -This project will be released under the permissive [MIT license](LICENSE). +This project is released under the permissive [MIT license](LICENSE). > Did you know that I offer custom development services and issuing invoices for sponsorships of releases and for contributions? Contact me (@clue) for details. diff --git a/examples/benchmark-insert.php b/examples/benchmark-insert.php new file mode 100644 index 0000000..7d1c375 --- /dev/null +++ b/examples/benchmark-insert.php @@ -0,0 +1,58 @@ +insertStream('foos'); + +$count = 0; +$fill = function () use (&$count, $n, $stream, &$fill) { + $continue = true; + while ($count < $n && $continue === true) { + $continue = $stream->write(['bar' => 'now ' . mt_rand()]); + ++$count; + } + + if ($count < $n) { + //echo 'stop after ' . $count.'/'.$n . PHP_EOL; + $stream->once('drain', function () use ($n, $fill) { + //echo 'continue' . PHP_EOL; + Loop::futureTick($fill); + }); + } else { + //echo 'done' . $count . PHP_EOL; + $stream->end(); + } +}; +$fill(); + +$stream->on('error', function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); + +$start = microtime(true); +$report = Loop::addPeriodicTimer(0.05, function () use (&$count, $start) { + printf("\r%d records in %0.3fs...", $count, microtime(true) - $start); +}); + +$stream->on('close', function () use (&$count, $report, $start) { + $now = microtime(true); + Loop::cancelTimer($report); + + printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start)); +}); diff --git a/examples/benchmark-read.php b/examples/benchmark-read.php new file mode 100644 index 0000000..e0f40e5 --- /dev/null +++ b/examples/benchmark-read.php @@ -0,0 +1,37 @@ + 30" + +use React\EventLoop\Loop; + +require __DIR__ . '/../vendor/autoload.php'; + +if (extension_loaded('xdebug')) { + echo 'NOTICE: The "xdebug" extension is loaded, this has a major impact on performance.' . PHP_EOL; +} + +$db = new Clue\React\ClickHouse\ClickHouseClient('http://localhost:8123/'); + +$stream = $db->queryStream($argv[1] ?? 'SELECT toUInt8(number) FROM system.numbers LIMIT 1000000'); + +$count = 0; +$stream->on('data', function () use (&$count) { + ++$count; +}); +$stream->on('error', function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); + +$start = microtime(true); +$report = Loop::addPeriodicTimer(0.05, function () use (&$count, $start) { + printf("\r%d records in %0.3fs...", $count, microtime(true) - $start); +}); + +$stream->on('close', function () use (&$count, $report, $start) { + $now = microtime(true); + Loop::cancelTimer($report); + + printf("\r%d records in %0.3fs => %d records/s\n", $count, $now - $start, $count / ($now - $start)); +}); + diff --git a/examples/insert.php b/examples/insert.php new file mode 100644 index 0000000..d1f3557 --- /dev/null +++ b/examples/insert.php @@ -0,0 +1,15 @@ +query('CREATE TABLE IF NOT EXISTS foos (bar String) ENGINE = Memory'); +// $promise->then(null, 'printf'); + +$value = isset($argv[1]) ? $argv[1] : date(DATE_ATOM); +$db->insert('foos', ['bar' => $value])->then(function () use ($value) { + echo 'Inserted: ' . $value . PHP_EOL; +}, function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); diff --git a/examples/query-stream.php b/examples/query-stream.php new file mode 100644 index 0000000..2111646 --- /dev/null +++ b/examples/query-stream.php @@ -0,0 +1,31 @@ +queryStream($query); + +$first = true; +$stream->on('data', function (array $row) use (&$first){ + if ($first) { + echo implode("\t", array_keys($row)) . PHP_EOL; + $first = false; + } + echo implode("\t", $row) . PHP_EOL; +}); + +$stream->on('error', function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); + + $stream->on('close', function (){ + echo 'CLOSED' . PHP_EOL; +}); diff --git a/examples/query.php b/examples/query.php new file mode 100644 index 0000000..a2f8595 --- /dev/null +++ b/examples/query.php @@ -0,0 +1,25 @@ +query($query)->then(function (Clue\React\ClickHouse\Result $result) { + echo 'Found ' . count($result->data) . ' data: ' . PHP_EOL; + echo implode("\t", array_column($result->meta, "name")) . PHP_EOL; + foreach ($result->data as $row) { + echo implode("\t", $row) . PHP_EOL; + } +}, function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +}); diff --git a/examples/search.php b/examples/search.php new file mode 100644 index 0000000..9bea4fd --- /dev/null +++ b/examples/search.php @@ -0,0 +1,19 @@ +query( + 'SELECT * FROM foos WHERE bar LIKE {search:String}', + ['search' => '%' . $search . '%'] +)->then(function (Clue\React\ClickHouse\Result $result) { + echo 'Found ' . count($result->data) . ' rows: ' . PHP_EOL; + echo implode("\t", array_column($result->meta, 'name')) . PHP_EOL; + foreach ($result->data as $row) { + echo implode("\t", $row) . PHP_EOL; + } +}, function (Exception $e) { + echo 'Error: ' . $e->getMessage() . PHP_EOL; +});