88#pragma once
99#include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
1010#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11- #include < executorch/backends/qualcomm/qc_binary_info_generated.h>
1211#include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1312#include < executorch/backends/qualcomm/runtime/Logging.h>
1413#include < executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1514#include < executorch/backends/qualcomm/runtime/QnnManager.h>
15+ #include < executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
1616#include < pybind11/numpy.h>
1717#include < pybind11/pybind11.h>
1818#include < pybind11/stl.h>
@@ -50,46 +50,64 @@ class PyQnnManager {
5050 qnn_executorch_options, qnn_executorch_context_binary_);
5151 }
5252
53- // used for loading multiple graphs in qcir
53+ // used during stage 2 of multi-graph mode
5454 explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
5555 : qnn_executorch_option_ptr_(buffer) {
5656 auto qnn_executorch_options = GetQnnExecuTorchOptions (
5757 qnn_executorch_option_ptr_.cast <std::string_view>().data ());
5858
5959 // merge multiple qcirs into one context with multiple graphs
6060
61- // this makes it easier to do subtraction for offsets
61+ // We start retrieving tensor from offsets = 0.
6262 std::vector<uint32_t > offsets (1 , 0 );
63- std::vector<const flatbuffers::Vector64<uint8_t >*> tensor_data;
64- fb_opt_.max_size = FLATBUFFERS_MAX_64_BUFFER_SIZE;
63+ std::vector<uint8_t > tensor_data;
64+ std::vector<uint8_t *> tensor_ptr;
65+ std::vector<uint64_t > tensor_size;
66+ uint64_t total_tensor_size = 0 ;
6567 for (size_t i = 0 ; i < qcirs.size (); ++i) {
6668 py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
67- flatbuffers::Verifier verifier_binary_info (
68- static_cast <const uint8_t * const >(info.ptr ),
69- info.size * info.itemsize ,
70- fb_opt_);
71- if (!qnn_delegate::VerifyBinaryInfoBuffer (verifier_binary_info)) {
72- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify binary info" );
73- return ;
74- }
75- auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
76- tensor_data.push_back (binary_info->tensor_data ());
77-
78- flatbuffers::Verifier verifier_qcir (
79- binary_info->context_data ()->Data (),
80- binary_info->context_data ()->size ());
81- if (!qcir::VerifyContextBuffer (verifier_qcir)) {
82- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
69+
70+ uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
71+ QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72+ auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73+ qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
74+ qcir_custom_buffer_ptr);
75+
76+ if (status != Error::Ok) {
77+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
8378 return ;
8479 }
85- offsets.push_back (offsets.back () + binary_info->tensor_data ()->size ());
80+
81+ tensor_ptr.push_back (static_cast <uint8_t *>(qcir_tensor_ptr));
82+ tensor_size.push_back (qcir_tensor_size);
83+ total_tensor_size += qcir_tensor_size;
84+ offsets.push_back (offsets.back () + qcir_tensor_size);
85+ }
86+
87+ tensor_data.resize (total_tensor_size);
88+
89+ // store multiple graphs tensor in a contiguous memory space
90+ for (size_t i = 0 ; i < tensor_ptr.size (); ++i) {
91+ std::memcpy (
92+ tensor_data.data () + offsets[i], tensor_ptr[i], tensor_size[i]);
8693 }
8794
8895 std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
8996 for (size_t i = 0 ; i < qcirs.size (); ++i) {
9097 py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
91- auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
92- auto context = qcir::GetContext (binary_info->context_data ()->Data ());
98+
99+ uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
100+ QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101+ auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102+ qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
103+ qcir_custom_buffer_ptr);
104+
105+ if (status != Error::Ok) {
106+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
107+ return ;
108+ }
109+
110+ auto context = qcir::GetContext (qcir_fbs_ptr);
93111 for (const auto & graph : *context->graphs ()) {
94112 std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
95113 for (const auto tensor : *graph->tensors ()) {
@@ -138,7 +156,9 @@ class PyQnnManager {
138156 QnnExecuTorchContextBinary qcir_bin (
139157 {builder_.GetBufferPointer (), builder_.GetSize ()});
140158
141- qnn_executorch_context_binary_ = MakeBinaryInfo (qcir_bin, tensor_data);
159+ // Init QnnQcirCustomProtocol binary
160+ qnn_executorch_context_binary_ =
161+ MakeQcirCustomBinaryInfo (qcir_bin, tensor_data);
142162 qnn_manager_ = std::make_shared<QnnManager>(
143163 qnn_executorch_options, qnn_executorch_context_binary_);
144164 }
@@ -152,7 +172,7 @@ class PyQnnManager {
152172 return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
153173 }
154174
155- // this method is specific for compiling multi-graphs
175+ // this method is specific for stage 2 of compiling multi-graphs
156176 py::array_t <char > Compile () {
157177 if (qnn_manager_->CompileQcir () != Error::Ok) {
158178 QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
@@ -271,7 +291,13 @@ class PyQnnManager {
271291
272292 QnnExecuTorchContextBinary qcir_binary (
273293 {builder_.GetBufferPointer (), builder_.GetSize ()});
274- binary_info = MakeBinaryInfo (qcir_binary, tensor_data);
294+
295+ custom_qcir_protocol_buffer_ =
296+ QnnQcirCustomProtocol (qcir_binary.nbytes , tensor_data.size ());
297+ custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (
298+ qcir_binary, tensor_data);
299+ std::tie (binary_info.buffer , binary_info.nbytes ) =
300+ custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
275301 } else {
276302 if (qnn_manager_->Compile (graph_name, op_wrappers) !=
277303 executorch::runtime::Error::Ok) {
@@ -338,101 +364,41 @@ class PyQnnManager {
338364 return qnn_manager_->GetSpillFillBufferSize ();
339365 }
340366
367+ QnnExecuTorchContextBinary MakeQcirCustomBinaryInfo (
368+ const QnnExecuTorchContextBinary& ctx_bin,
369+ const std::vector<uint8_t >& tensor_data) {
370+ custom_qcir_protocol_buffer_ =
371+ QnnQcirCustomProtocol (ctx_bin.nbytes , tensor_data.size ());
372+ custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (ctx_bin, tensor_data);
373+ auto [ptr, size] = custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
374+ return {ptr, size};
375+ }
376+
341377 py::array_t <char > MakeBinaryInfo (const py::bytes& ctx_bin) {
342378 py::buffer_info info (py::buffer (ctx_bin).request ());
343379 QnnExecuTorchContextBinary binary (
344380 {info.ptr , static_cast <uint64_t >(info.size * info.itemsize )});
345- std::vector<uint8_t > tensor_data;
346- auto binary_info = MakeBinaryInfo (binary, tensor_data);
347- auto result = py::array_t <char >(binary_info.nbytes );
381+
382+ auto qnn_context_custom_protocol = QnnContextCustomProtocol (binary.nbytes );
383+ qnn_context_custom_protocol.BuildContextCustomBuffer (binary);
384+ auto [custom_buffer_ptr, custom_buffer_size] =
385+ qnn_context_custom_protocol.GetCustomProtocolBuffer ();
386+
387+ auto result = py::array_t <char >(custom_buffer_size);
348388 auto result_buffer = result.request ();
349- std::memcpy (result_buffer.ptr , binary_info. buffer , binary_info. nbytes );
389+ std::memcpy (result_buffer.ptr , custom_buffer_ptr, custom_buffer_size );
350390 return result;
351391 }
352392
353393 private:
354- std::string signature () {
355- return std::to_string (
356- std::chrono::high_resolution_clock::now ().time_since_epoch ().count ());
357- };
358-
359- QnnExecuTorchContextBinary MakeBinaryInfo (
360- const QnnExecuTorchContextBinary& ctx_bin,
361- const std::vector<const flatbuffers::Vector64<uint8_t >*>& tensor_data) {
362- // the build order matters, 64 bit data is required to be shipped first
363- // add context data
364- builder64_.Reset ();
365- auto offset_context = builder64_.CreateVector <
366- uint8_t ,
367- flatbuffers::Offset64,
368- flatbuffers::Vector64>(
369- static_cast <const uint8_t *>(ctx_bin.buffer ), ctx_bin.nbytes );
370- // add tensor data
371- // this is a little bit tricky but have smallest memory footprint in AoT
372- size_t buffer_size = 0 ;
373- for (auto & td : tensor_data) {
374- buffer_size += td->size ();
375- }
376- builder64_.StartVector <
377- uint8_t ,
378- flatbuffers::Offset64,
379- flatbuffers::Vector64<uint8_t >::size_type>(buffer_size);
380- for (int i = tensor_data.size () - 1 ; i >= 0 ; --i) {
381- builder64_.PushBytes (tensor_data[i]->Data (), tensor_data[i]->size ());
382- }
383- auto offset_tensor = flatbuffers::Offset64<flatbuffers::Vector64<uint8_t >>(
384- builder64_.EndVector <
385- flatbuffers::Vector64<uint8_t >::size_type,
386- flatbuffers::Offset64<flatbuffers::Vector64<uint8_t >>::offset_type>(
387- buffer_size));
388- // add signature to binary for cache reuse in runtime
389- auto offset_signature = builder64_.CreateString (signature ().c_str ());
390- // build binary info
391- auto binary_info = qnn_delegate::CreateBinaryInfo (
392- builder64_, offset_signature, offset_context, offset_tensor);
393- builder64_.Finish (binary_info);
394-
395- return QnnExecuTorchContextBinary (
396- {builder64_.GetBufferPointer (), builder64_.GetSize ()});
397- }
398-
399- QnnExecuTorchContextBinary MakeBinaryInfo (
400- const QnnExecuTorchContextBinary& ctx_bin,
401- const std::vector<uint8_t >& tensor_data) {
402- // the build order matters, 64 bit data is required to be shipped first
403- // add context data
404- builder64_.Reset ();
405-
406- auto offset_context = builder64_.CreateVector <
407- uint8_t ,
408- flatbuffers::Offset64,
409- flatbuffers::Vector64>(
410- static_cast <const uint8_t *>(ctx_bin.buffer ), ctx_bin.nbytes );
411- // add tensor data
412- auto offset_tensor = builder64_.CreateVector <
413- uint8_t ,
414- flatbuffers::Offset64,
415- flatbuffers::Vector64>(
416- static_cast <const uint8_t *>(tensor_data.data ()), tensor_data.size ());
417- // add signature to binary for cache reuse in runtime
418- auto offset_signature = builder64_.CreateString (signature ().c_str ());
419- // build binary info
420- auto binary_info = qnn_delegate::CreateBinaryInfo (
421- builder64_, offset_signature, offset_context, offset_tensor);
422- builder64_.Finish (binary_info);
423-
424- return QnnExecuTorchContextBinary (
425- {builder64_.GetBufferPointer (), builder64_.GetSize ()});
426- }
427-
428394 // Store the bytes object instead of a raw pointer so that this module will
429395 // keep the bytes alive.
430396 const py::bytes qnn_executorch_option_ptr_;
431397 QnnExecuTorchContextBinary qnn_executorch_context_binary_;
432398 std::shared_ptr<QnnManager> qnn_manager_;
433- flatbuffers::FlatBufferBuilder64 builder64_;
399+ QnnQcirCustomProtocol custom_qcir_protocol_buffer_;
400+ QnnContextCustomProtocol custom_context_custom_buffer_;
434401 flatbuffers::FlatBufferBuilder builder_;
435- flatbuffers::Verifier::Options fb_opt_;
436402};
437403} // namespace qnn
438404} // namespace backends
0 commit comments