samples: Add a TFLU Ethos-U sample program

kristofer-jonsson-arm · carlescufi · commit e35e98156443 · 2022-11-15T14:47:43.000+01:00
Add a sample program that demonstates how to run inferences on
Arm Ethos-U.

Signed-off-by: Kristofer Jonsson &lt;kristofer.jonsson@arm.com&gt;
Signed-off-by: Fredrik Knutsson &lt;fredrik.knutsson@arm.com&gt;
Signed-off-by: Carles Cufi &lt;carles.cufi@nordicsemi.no&gt;
diff --git a/samples/modules/tflite-micro/tflm_ethosu/CMakeLists.txt b/samples/modules/tflite-micro/tflm_ethosu/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2021-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+
+find_package(Zephyr HINTS $ENV{ZEPHYR_BASE})
+
+project(tflm_ethosu_app)
+
+target_include_directories(app PRIVATE src/models/keyword_spotting_cnn_small_int8)
+
+target_sources(app PRIVATE src/main.cpp src/inference_process.cpp)
+
+zephyr_linker_sources(SECTIONS linker.ld)
diff --git a/samples/modules/tflite-micro/tflm_ethosu/Kconfig b/samples/modules/tflite-micro/tflm_ethosu/Kconfig
@@ -0,0 +1,9 @@
+# Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-License-Identifier: Apache-2.0
+
+config TFLM_ETHOSU_TAINT_BLOBS
+	bool
+	default y
+	select TAINT_BLOBS
+
+source "Kconfig.zephyr"
diff --git a/samples/modules/tflite-micro/tflm_ethosu/README.rst b/samples/modules/tflite-micro/tflm_ethosu/README.rst
@@ -0,0 +1,37 @@
+.. _tflm_ethosu:
+
+Arm(R) Ethos(TM)-U Tensorflow Lite for Microcontrollers test application
+########################################################################
+
+A sample application that demonstrates how to run an inference using the TFLM
+framework and the Arm Ethos-U NPU.
+
+The sample application runs a model that has been downloaded from the
+`Arm model zoo <https://github.com/ARM-software/ML-zoo>`_. This model has then
+been optimized using the
+`Vela compiler <https://git.mlplatform.org/ml/ethos-u/ethos-u-vela.git>`_.
+
+Vela takes a tflite file as input and produces another tflite file as output,
+where the operators supported by Ethos-U have been replaced by an Ethos-U custom
+operator. In an ideal case the complete network would be replaced by a single
+Ethos-U custom operator.
+
+Building and running
+********************
+
+This application can be built and run on any Arm Ethos-U capable platform, for
+example Corstone(TM)-300. A reference implementation of Corstone-300 can be
+downloaded either as a FPGA bitfile for the
+`MPS3 FPGA prototyping board <https://developer.arm.com/tools-and-software/development-boards/fpga-prototyping-boards/mps3>`_,
+or as a
+`Fixed Virtual Platform <https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps>`_
+that can be emulated on a host machine.
+
+Assuming that the Corstone-300 FVP has been downloaded, installed and added to
+the ``PATH`` variable, then building and testing can be done with following
+commands.
+
+.. code-block:: bash
+
+    $ west build -b mps3_an547 zephyr/samples/tflm_ethosu
+    $ FVP_Corstone_SSE-300_Ethos-U55 build/zephyr/zephyr.elf
diff --git a/samples/modules/tflite-micro/tflm_ethosu/linker.ld b/samples/modules/tflite-micro/tflm_ethosu/linker.ld
@@ -0,0 +1,17 @@
+/*
+ * Copyright 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#if DT_NODE_HAS_STATUS(DT_NODELABEL(ddr4), okay)
+GROUP_START(DDR4)
+
+	SECTION_DATA_PROLOGUE(_DDR4_SECTION_NAME,,SUBALIGN(16))
+	{
+		. = ALIGN(16);
+		*(tflm_model tflm_arena tflm_input tflm_output)
+	} GROUP_LINK_IN(DDR4)
+
+GROUP_END(DDR4)
+#endif
diff --git a/samples/modules/tflite-micro/tflm_ethosu/prj.conf b/samples/modules/tflite-micro/tflm_ethosu/prj.conf
@@ -0,0 +1,9 @@
+#application default configuration
+# include TFLM based on CMSIS NN optimization and ETHOSU acceleration
+CONFIG_CPLUSPLUS=y
+CONFIG_LIB_CPLUSPLUS=y
+CONFIG_NEWLIB_LIBC=y
+CONFIG_TENSORFLOW_LITE_MICRO=y
+CONFIG_ARM_ETHOS_U=y
+CONFIG_HEAP_MEM_POOL_SIZE=16384
+CONFIG_LOG=y
diff --git a/samples/modules/tflite-micro/tflm_ethosu/sample.yaml b/samples/modules/tflite-micro/tflm_ethosu/sample.yaml
@@ -0,0 +1,8 @@
+sample:
+  description: Demonstration of the Arm Ethos-U NPU
+  name: Arm Ethos-U NPU sample
+tests:
+  sample.drivers.tflm_ethosu:
+    tags: NPU
+    filter: dt_compat_enabled("arm,ethos-u")
+    build_only: true
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.cpp b/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.cpp
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "inference_process.hpp"
+
+#include <tensorflow/lite/micro/all_ops_resolver.h>
+#include <tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h>
+#include <tensorflow/lite/micro/micro_error_reporter.h>
+#include <tensorflow/lite/micro/micro_interpreter.h>
+#include <tensorflow/lite/micro/micro_profiler.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <cmsis_compiler.h>
+#include <inttypes.h>
+#include <zephyr/kernel.h>
+
+using namespace std;
+
+namespace
+{
+bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst)
+{
+	if (dst.data == nullptr) {
+		return false;
+	}
+
+	if (src.bytes > dst.size) {
+		printk("Tensor size mismatch (bytes): actual=%d, expected%d.\n", src.bytes,
+		       dst.size);
+		return true;
+	}
+
+	copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
+	dst.size = src.bytes;
+
+	return false;
+}
+
+} /* namespace */
+
+namespace InferenceProcess
+{
+DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size)
+{
+}
+
+void DataPtr::invalidate()
+{
+#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+	SCB_InvalidateDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
+#endif
+}
+
+void DataPtr::clean()
+{
+#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
+	SCB_CleanDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
+#endif
+}
+
+InferenceJob::InferenceJob()
+{
+}
+
+InferenceJob::InferenceJob(const string &_name, const DataPtr &_networkModel,
+			   const vector<DataPtr> &_input, const vector<DataPtr> &_output,
+			   const vector<DataPtr> &_expectedOutput)
+	: name(_name), networkModel(_networkModel), input(_input), output(_output),
+	  expectedOutput(_expectedOutput)
+{
+}
+
+void InferenceJob::invalidate()
+{
+	networkModel.invalidate();
+
+	for (auto &it : input) {
+		it.invalidate();
+	}
+
+	for (auto &it : output) {
+		it.invalidate();
+	}
+
+	for (auto &it : expectedOutput) {
+		it.invalidate();
+	}
+}
+
+void InferenceJob::clean()
+{
+	networkModel.clean();
+
+	for (auto &it : input) {
+		it.clean();
+	}
+
+	for (auto &it : output) {
+		it.clean();
+	}
+
+	for (auto &it : expectedOutput) {
+		it.clean();
+	}
+}
+
+bool InferenceProcess::runJob(InferenceJob &job)
+{
+	/* Get model handle and verify that the version is correct */
+	const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
+	if (model->version() != TFLITE_SCHEMA_VERSION) {
+		printk("Model schema version unsupported: version=%" PRIu32 ", supported=%d.\n",
+		       model->version(), TFLITE_SCHEMA_VERSION);
+		return true;
+	}
+
+	/* Create the TFL micro interpreter */
+	tflite::AllOpsResolver resolver;
+	tflite::MicroErrorReporter errorReporter;
+
+	tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize,
+					     &errorReporter);
+
+	/* Allocate tensors */
+	TfLiteStatus allocate_status = interpreter.AllocateTensors();
+	if (allocate_status != kTfLiteOk) {
+		printk("Failed to allocate tensors for inference. job=%p\n", &job);
+		return true;
+	}
+
+	if (job.input.size() != interpreter.inputs_size()) {
+		printk("Number of job and network inputs do not match. input=%zu, network=%zu\n",
+		       job.input.size(), interpreter.inputs_size());
+		return true;
+	}
+
+	/* Copy input data */
+	for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
+		const DataPtr &input = job.input[i];
+		const TfLiteTensor *tensor = interpreter.input(i);
+
+		if (input.size != tensor->bytes) {
+			printk("Input tensor size mismatch. index=%zu, input=%zu, network=%u\n", i,
+			       input.size, tensor->bytes);
+			return true;
+		}
+
+		copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size,
+		     tensor->data.uint8);
+	}
+
+	/* Run the inference */
+	TfLiteStatus invoke_status = interpreter.Invoke();
+	if (invoke_status != kTfLiteOk) {
+		printk("Invoke failed for inference. job=%s\n", job.name.c_str());
+		return true;
+	}
+
+	/* Copy output data */
+	if (job.output.size() > 0) {
+		if (interpreter.outputs_size() != job.output.size()) {
+			printk("Number of job and network outputs do not match. job=%zu, network=%u\n",
+			       job.output.size(), interpreter.outputs_size());
+			return true;
+		}
+
+		for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
+			if (copyOutput(*interpreter.output(i), job.output[i])) {
+				return true;
+			}
+		}
+	}
+
+	if (job.expectedOutput.size() > 0) {
+		if (job.expectedOutput.size() != interpreter.outputs_size()) {
+			printk("Number of job and network expected outputs do not match. job=%zu, network=%zu\n",
+			       job.expectedOutput.size(), interpreter.outputs_size());
+			return true;
+		}
+
+		for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
+			const DataPtr &expected = job.expectedOutput[i];
+			const TfLiteTensor *output = interpreter.output(i);
+
+			if (expected.size != output->bytes) {
+				printk("Expected output tensor size mismatch. index=%u, expected=%zu, network=%zu\n",
+				       i, expected.size, output->bytes);
+				return true;
+			}
+
+			for (unsigned int j = 0; j < output->bytes; ++j) {
+				if (output->data.uint8[j] !=
+				    static_cast<uint8_t *>(expected.data)[j]) {
+					printk("Expected output tensor data mismatch. index=%u, offset=%u, expected=%02x, network=%02x\n",
+					       i, j, static_cast<uint8_t *>(expected.data)[j],
+					       output->data.uint8[j]);
+					return true;
+				}
+			}
+		}
+	}
+
+	return false;
+}
+
+} /* namespace InferenceProcess */
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.hpp b/samples/modules/tflite-micro/tflm_ethosu/src/inference_process.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2019-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <array>
+#include <queue>
+#include <stdlib.h>
+#include <string>
+#include <vector>
+
+namespace InferenceProcess
+{
+struct DataPtr {
+	void *data;
+	size_t size;
+
+	DataPtr(void *data = nullptr, size_t size = 0);
+
+	void invalidate();
+	void clean();
+};
+
+struct InferenceJob {
+	std::string name;
+	DataPtr networkModel;
+	std::vector<DataPtr> input;
+	std::vector<DataPtr> output;
+	std::vector<DataPtr> expectedOutput;
+
+	InferenceJob();
+	InferenceJob(const std::string &name, const DataPtr &networkModel,
+		     const std::vector<DataPtr> &input, const std::vector<DataPtr> &output,
+		     const std::vector<DataPtr> &expectedOutput);
+
+	void invalidate();
+	void clean();
+};
+
+class InferenceProcess {
+    public:
+	InferenceProcess(uint8_t *_tensorArena, size_t _tensorArenaSize)
+		: tensorArena(_tensorArena), tensorArenaSize(_tensorArenaSize)
+	{
+	}
+
+	bool runJob(InferenceJob &job);
+
+    private:
+	uint8_t *tensorArena;
+	const size_t tensorArenaSize;
+};
+} /* namespace InferenceProcess */
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/main.cpp b/samples/modules/tflite-micro/tflm_ethosu/src/main.cpp
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/input.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/input.h
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/model.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/model.h
diff --git a/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/output.h b/samples/modules/tflite-micro/tflm_ethosu/src/models/keyword_spotting_cnn_small_int8/output.h