Expose method name as part of backend init context (#6622)

cccclai · facebook-github-bot · commit 1c6409d9fa8a · 2024-11-12T12:25:38.000-08:00
Summary: Provide the method name to backend so they can load the corresponding method name accordingly. The most immediate need is that the qnn context binary can include two methods, one for prefill and one for decode. Since we don't allow backend access multi methods at the moment, we do it in a hacky way via following ## AOT: ``` class LLama_transformer(): def prefill() def decode() ``` Then we will have two custom ops from two to_backends ops, and both will have two context binary ``` QAT (prefill) -> to_backend(...) => prefill.qcir flatbuffers QAT (decode) -> to_backend(...) => decode.qcir flatbuffers => graph prefill( custom_op_prefill() -> context_binary (two graphs) ) graph decode() custom_op_decode() -> context_binary (two graphs) ) ``` Since two context binary from these two customs ops will be exactly the same and they can be deduplicate during emit via these two lines https://github.com/pytorch/executorch/blob/d4a9ca01eb5bb786ecbfbcd8302253eb7797e8bb/exir/emit/_emitter.py#L136 and here https://github.com/pytorch/executorch/blob/d4a9ca01eb5bb786ecbfbcd8302253eb7797e8bb/exir/emit/_emitter.py#L1065-L1066 ``` .pte instrucions [ "prefill" [instructions: call_delegate(prefill_input)] "decode": [instructions: call_delegate(decode_input)] "delegate_payload:: Dict[bytes, index]) ] ``` ## Runtime After we expose the method name via this change, the backend can access the method name, and load the same method as the top level method ``` Result<DelegateHandle*> QNNBackend::init( BackendInitContext& context, FreeableBuffer* processed, ArrayRef<CompileSpec> compile_specs) { const char* method_name = context.get_method_name() // for example, "prefill" handle = qnn_backend.load(method_name) return handle } ``` This is to unblock sharing weight between prefill and decode for using htp backend. Reviewed By: dbort Differential Revision: D65386597
diff --git a/runtime/backend/backend_execution_context.h b/runtime/backend/backend_execution_context.h
@@ -21,8 +21,11 @@ class BackendExecutionContext final {
  public:
   BackendExecutionContext(
       EventTracer* event_tracer = nullptr,
-      MemoryAllocator* temp_allocator = nullptr)
-      : event_tracer_(event_tracer), temp_allocator_(temp_allocator) {}
+      MemoryAllocator* temp_allocator = nullptr,
+      const char* method_name = nullptr)
+      : event_tracer_(event_tracer),
+        temp_allocator_(temp_allocator),
+        method_name_(method_name) {}
 
   /**
    * Returns a pointer to an instance of EventTracer to do profiling/debugging
@@ -52,9 +55,17 @@ class BackendExecutionContext final {
     return temp_allocator_;
   }
 
+  /**
+   * Get the name of the executing method from the ExecuTorch runtime.
+   */
+  const char* get_method_name() const {
+    return method_name_;
+  }
+
  private:
   EventTracer* event_tracer_ = nullptr;
   MemoryAllocator* temp_allocator_ = nullptr;
+  const char* method_name_ = nullptr;
 };
 
 } // namespace runtime
diff --git a/runtime/backend/backend_init_context.h b/runtime/backend/backend_init_context.h
@@ -18,8 +18,10 @@ namespace runtime {
  */
 class BackendInitContext final {
  public:
-  explicit BackendInitContext(MemoryAllocator* runtime_allocator)
-      : runtime_allocator_(runtime_allocator) {}
+  explicit BackendInitContext(
+      MemoryAllocator* runtime_allocator,
+      const char* method_name = nullptr)
+      : runtime_allocator_(runtime_allocator), method_name_(method_name) {}
 
   /** Get the runtime allocator passed from Method. It's the same runtime
    * executor used by the standard executor runtime and the life span is the
@@ -29,8 +31,20 @@ class BackendInitContext final {
     return runtime_allocator_;
   }
 
+  /** Get the loaded method name from ExecuTorch runtime. Usually it's
+   * "forward", however, if there are multiple methods in the .pte file, it can
+   * be different. One example is that we may have prefill and decode methods in
+   * the same .pte file. In this case, when client loads "prefill" method, the
+   * `get_method_name` function will return "prefill", when client loads
+   * "decode" method, the `get_method_name` function will return "decode".
+   */
+  const char* get_method_name() const {
+    return method_name_;
+  }
+
  private:
   MemoryAllocator* runtime_allocator_ = nullptr;
+  const char* method_name_ = nullptr;
 };
 
 } // namespace runtime
diff --git a/runtime/executor/method.cpp b/runtime/executor/method.cpp
@@ -626,7 +626,9 @@ Error Method::init(executorch_flatbuffer::ExecutionPlan* s_plan) {
 
     for (size_t i = 0; i < n_delegate; ++i) {
       const auto& delegate = *delegates->Get(i);
-      BackendInitContext backend_init_context(method_allocator);
+      BackendInitContext backend_init_context(
+          method_allocator,
+          /*method_name=*/serialization_plan_->name()->c_str());
       Error err = BackendDelegate::Init(
           delegate, program_, backend_init_context, &delegates_[i]);
       if (err != Error::Ok) {
@@ -1097,8 +1099,9 @@ Error Method::execute_instruction() {
           n_delegate_,
           step_state_.instr_idx);
       BackendExecutionContext backend_execution_context(
-          /*event_tracer*/ event_tracer_,
-          /*temp_allocator*/ temp_allocator_);
+          /*event_tracer=*/event_tracer_,
+          /*temp_allocator=*/temp_allocator_,
+          /*method_name=*/serialization_plan_->name()->c_str());
       err = delegates_[delegate_idx].Execute(
           backend_execution_context,
           chain.argument_lists_[step_state_.instr_idx].data());
diff --git a/runtime/executor/test/backend_integration_test.cpp b/runtime/executor/test/backend_integration_test.cpp
@@ -95,7 +95,7 @@ class StubBackend final : public BackendInterface {
   }
 
   Error execute(
-      ET_UNUSED BackendExecutionContext& context,
+      BackendExecutionContext& context,
       DelegateHandle* handle,
       EValue** args) const override {
     if (execute_fn_) {
@@ -530,6 +530,48 @@ TEST_P(BackendIntegrationTest, SegmentInfoIsPassedIntoDataLoader) {
   EXPECT_EQ(backend_load_was_called, using_segments());
 }
 
+TEST_P(BackendIntegrationTest, GetMethodNameDuringInitSuccess) {
+  Result<FileDataLoader> loader = FileDataLoader::from(program_path());
+  ASSERT_EQ(loader.error(), Error::Ok);
+  const void* processed_data = nullptr;
+  StubBackend::singleton().install_init(
+      [&](FreeableBuffer* processed,
+          ET_UNUSED ArrayRef<CompileSpec> compile_specs,
+          ET_UNUSED BackendInitContext& backend_init_context)
+          -> Result<DelegateHandle*> {
+        auto method_name = backend_init_context.get_method_name();
+        // Ensure that we can get the method name during init via context
+        EXPECT_STREQ(method_name, "forward");
+        processed_data = processed->data();
+        return nullptr;
+      });
+  Result<Program> program = Program::load(&loader.get());
+  ManagedMemoryManager mmm(kDefaultNonConstMemBytes, kDefaultRuntimeMemBytes);
+  Result<Method> method = program->load_method("forward", &mmm.get());
+  EXPECT_TRUE(method.ok());
+  ASSERT_EQ(program.error(), Error::Ok);
+}
+
+TEST_P(BackendIntegrationTest, GetMethodNameDuringExecuteSuccess) {
+  Result<FileDataLoader> loader = FileDataLoader::from(program_path());
+  ASSERT_EQ(loader.error(), Error::Ok);
+  StubBackend::singleton().install_execute(
+      [&](BackendExecutionContext& backend_execution_context,
+          ET_UNUSED DelegateHandle* handle,
+          ET_UNUSED EValue** args) -> Error {
+        // Ensure that we can get the method name during execution via context
+        auto method_name = backend_execution_context.get_method_name();
+        EXPECT_STREQ(method_name, "forward");
+        return Error::Ok;
+      });
+  Result<Program> program = Program::load(&loader.get());
+  ManagedMemoryManager mmm(kDefaultNonConstMemBytes, kDefaultRuntimeMemBytes);
+  Result<Method> method = program->load_method("forward", &mmm.get());
+  EXPECT_TRUE(method.ok());
+  Error err = method->execute();
+  ASSERT_EQ(err, Error::Ok);
+}
+
 // TODO: Add more tests for the runtime-to-backend interface. E.g.:
 // - Errors during init() or execute() result in runtime init/execution failures
 // - Correct values are passed to init()/execute()