Skip to content

Commit 0e074fd

Browse files
supporting select partial columns from external kafka stream (#307)
Co-authored-by: Haohang <[email protected]>
1 parent ab4c8af commit 0e074fd

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

src/Storages/ExternalStream/Kafka/KafkaSource.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ KafkaSource::KafkaSource(
4343
, max_block_size(max_block_size_)
4444
, log(log_)
4545
, header(header_)
46+
, non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized())
4647
, consume_ctx(kafka->topic(), shard, offset)
4748
, read_buffer("", 0)
4849
, virtual_time_columns_calc(header.columns(), nullptr)
@@ -178,24 +179,29 @@ void KafkaSource::parseRaw(const rd_kafka_message_t * kmessage)
178179
void KafkaSource::parseFormat(const rd_kafka_message_t * kmessage)
179180
{
180181
assert(format_executor);
182+
assert(convert_non_virtual_to_physical_action);
181183

182184
ReadBufferFromMemory buffer(static_cast<const char *>(kmessage->payload), kmessage->len);
183185
auto new_rows = format_executor->execute(buffer);
184186
if (!new_rows)
185187
return;
186188

189+
auto result_block = non_virtual_header.cloneWithColumns(format_executor->getResultColumns());
190+
convert_non_virtual_to_physical_action->execute(result_block);
191+
192+
MutableColumns new_data(result_block.mutateColumns());
193+
187194
if (!request_virtual_columns)
188195
{
189196
if (!current_batch.empty())
190197
{
191198
/// Merge all data in the current batch into the same chunk to avoid too many small chunks
192-
auto new_data(format_executor->getResultColumns());
193199
for (size_t pos = 0; pos < current_batch.size(); ++pos)
194200
current_batch[pos]->insertRangeFrom(*new_data[pos], 0, new_rows);
195201
}
196202
else
197203
{
198-
current_batch = format_executor->getResultColumns();
204+
current_batch = std::move(new_data);
199205
}
200206
}
201207
else
@@ -206,7 +212,6 @@ void KafkaSource::parseFormat(const rd_kafka_message_t * kmessage)
206212
assert(current_batch.size() == virtual_time_columns_calc.size());
207213

208214
/// slower path
209-
auto new_data(format_executor->getResultColumns());
210215
for (size_t i = 0, j = 0, n = virtual_time_columns_calc.size(); i < n; ++i)
211216
{
212217
if (!virtual_time_columns_calc[i])
@@ -224,7 +229,6 @@ void KafkaSource::parseFormat(const rd_kafka_message_t * kmessage)
224229
else
225230
{
226231
/// slower path
227-
auto new_data(format_executor->getResultColumns());
228232
for (size_t i = 0, j = 0, n = virtual_time_columns_calc.size(); i < n; ++i)
229233
{
230234
if (!virtual_time_columns_calc[i])
@@ -272,10 +276,17 @@ void KafkaSource::initFormatExecutor(const Kafka * kafka)
272276
if (!data_format.empty())
273277
{
274278
auto input_format
275-
= FormatFactory::instance().getInputFormat(data_format, read_buffer, physical_header, query_context, max_block_size);
279+
= FormatFactory::instance().getInputFormat(data_format, read_buffer, non_virtual_header, query_context, max_block_size);
276280

277281
format_executor = std::make_unique<StreamingFormatExecutor>(
278-
physical_header, std::move(input_format), [](const MutableColumns &, Exception &) -> size_t { return 0; });
282+
non_virtual_header, std::move(input_format), [](const MutableColumns &, Exception &) -> size_t { return 0; });
283+
284+
auto converting_dag = ActionsDAG::makeConvertingActions(
285+
non_virtual_header.cloneEmpty().getColumnsWithTypeAndName(),
286+
physical_header.cloneEmpty().getColumnsWithTypeAndName(),
287+
ActionsDAG::MatchColumnsMode::Name);
288+
289+
convert_non_virtual_to_physical_action = std::make_shared<ExpressionActions>(std::move(converting_dag));
279290
}
280291
}
281292

src/Storages/ExternalStream/Kafka/KafkaSource.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,12 @@ class KafkaSource final : public ISource
6262
Poco::Logger * log;
6363

6464
Block header;
65+
const Block non_virtual_header;
6566
Block physical_header;
6667
Chunk header_chunk;
6768

69+
std::shared_ptr<ExpressionActions> convert_non_virtual_to_physical_action = nullptr;
70+
6871
klog::KafkaWALSimpleConsumerPtr consumer;
6972
klog::KafkaWALContext consume_ctx;
7073

0 commit comments

Comments
 (0)