Skip to content

Commit e35e981

Browse files
kristofer-jonsson-armcarlescufi
authored andcommitted
samples: Add a TFLU Ethos-U sample program
Add a sample program that demonstates how to run inferences on Arm Ethos-U. Signed-off-by: Kristofer Jonsson <[email protected]> Signed-off-by: Fredrik Knutsson <[email protected]> Signed-off-by: Carles Cufi <[email protected]>
1 parent 0a02a7a commit e35e981

File tree

12 files changed

+5826
-0
lines changed

12 files changed

+5826
-0
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright 2021-2022 Arm Limited and/or its affiliates <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
cmake_minimum_required(VERSION 3.20.0)
6+
7+
find_package(Zephyr HINTS $ENV{ZEPHYR_BASE})
8+
9+
project(tflm_ethosu_app)
10+
11+
target_include_directories(app PRIVATE src/models/keyword_spotting_cnn_small_int8)
12+
13+
target_sources(app PRIVATE src/main.cpp src/inference_process.cpp)
14+
15+
zephyr_linker_sources(SECTIONS linker.ld)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Copyright 2022 Arm Limited and/or its affiliates <[email protected]>
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
config TFLM_ETHOSU_TAINT_BLOBS
5+
bool
6+
default y
7+
select TAINT_BLOBS
8+
9+
source "Kconfig.zephyr"
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
.. _tflm_ethosu:
2+
3+
Arm(R) Ethos(TM)-U Tensorflow Lite for Microcontrollers test application
4+
########################################################################
5+
6+
A sample application that demonstrates how to run an inference using the TFLM
7+
framework and the Arm Ethos-U NPU.
8+
9+
The sample application runs a model that has been downloaded from the
10+
`Arm model zoo <https://github.com/ARM-software/ML-zoo>`_. This model has then
11+
been optimized using the
12+
`Vela compiler <https://git.mlplatform.org/ml/ethos-u/ethos-u-vela.git>`_.
13+
14+
Vela takes a tflite file as input and produces another tflite file as output,
15+
where the operators supported by Ethos-U have been replaced by an Ethos-U custom
16+
operator. In an ideal case the complete network would be replaced by a single
17+
Ethos-U custom operator.
18+
19+
Building and running
20+
********************
21+
22+
This application can be built and run on any Arm Ethos-U capable platform, for
23+
example Corstone(TM)-300. A reference implementation of Corstone-300 can be
24+
downloaded either as a FPGA bitfile for the
25+
`MPS3 FPGA prototyping board <https://developer.arm.com/tools-and-software/development-boards/fpga-prototyping-boards/mps3>`_,
26+
or as a
27+
`Fixed Virtual Platform <https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps>`_
28+
that can be emulated on a host machine.
29+
30+
Assuming that the Corstone-300 FVP has been downloaded, installed and added to
31+
the ``PATH`` variable, then building and testing can be done with following
32+
commands.
33+
34+
.. code-block:: bash
35+
36+
$ west build -b mps3_an547 zephyr/samples/tflm_ethosu
37+
$ FVP_Corstone_SSE-300_Ethos-U55 build/zephyr/zephyr.elf
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* Copyright 2022 Arm Limited and/or its affiliates <[email protected]>
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#if DT_NODE_HAS_STATUS(DT_NODELABEL(ddr4), okay)
8+
GROUP_START(DDR4)
9+
10+
SECTION_DATA_PROLOGUE(_DDR4_SECTION_NAME,,SUBALIGN(16))
11+
{
12+
. = ALIGN(16);
13+
*(tflm_model tflm_arena tflm_input tflm_output)
14+
} GROUP_LINK_IN(DDR4)
15+
16+
GROUP_END(DDR4)
17+
#endif
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#application default configuration
2+
# include TFLM based on CMSIS NN optimization and ETHOSU acceleration
3+
CONFIG_CPLUSPLUS=y
4+
CONFIG_LIB_CPLUSPLUS=y
5+
CONFIG_NEWLIB_LIBC=y
6+
CONFIG_TENSORFLOW_LITE_MICRO=y
7+
CONFIG_ARM_ETHOS_U=y
8+
CONFIG_HEAP_MEM_POOL_SIZE=16384
9+
CONFIG_LOG=y
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
sample:
2+
description: Demonstration of the Arm Ethos-U NPU
3+
name: Arm Ethos-U NPU sample
4+
tests:
5+
sample.drivers.tflm_ethosu:
6+
tags: NPU
7+
filter: dt_compat_enabled("arm,ethos-u")
8+
build_only: true
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
/*
2+
* Copyright 2019-2022 Arm Limited and/or its affiliates <[email protected]>
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#include "inference_process.hpp"
8+
9+
#include <tensorflow/lite/micro/all_ops_resolver.h>
10+
#include <tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h>
11+
#include <tensorflow/lite/micro/micro_error_reporter.h>
12+
#include <tensorflow/lite/micro/micro_interpreter.h>
13+
#include <tensorflow/lite/micro/micro_profiler.h>
14+
#include <tensorflow/lite/schema/schema_generated.h>
15+
16+
#include <cmsis_compiler.h>
17+
#include <inttypes.h>
18+
#include <zephyr/kernel.h>
19+
20+
using namespace std;
21+
22+
namespace
23+
{
24+
bool copyOutput(const TfLiteTensor &src, InferenceProcess::DataPtr &dst)
25+
{
26+
if (dst.data == nullptr) {
27+
return false;
28+
}
29+
30+
if (src.bytes > dst.size) {
31+
printk("Tensor size mismatch (bytes): actual=%d, expected%d.\n", src.bytes,
32+
dst.size);
33+
return true;
34+
}
35+
36+
copy(src.data.uint8, src.data.uint8 + src.bytes, static_cast<uint8_t *>(dst.data));
37+
dst.size = src.bytes;
38+
39+
return false;
40+
}
41+
42+
} /* namespace */
43+
44+
namespace InferenceProcess
45+
{
46+
DataPtr::DataPtr(void *_data, size_t _size) : data(_data), size(_size)
47+
{
48+
}
49+
50+
void DataPtr::invalidate()
51+
{
52+
#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
53+
SCB_InvalidateDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
54+
#endif
55+
}
56+
57+
void DataPtr::clean()
58+
{
59+
#if defined(__DCACHE_PRESENT) && (__DCACHE_PRESENT == 1U)
60+
SCB_CleanDCache_by_Addr(reinterpret_cast<uint32_t *>(data), size);
61+
#endif
62+
}
63+
64+
InferenceJob::InferenceJob()
65+
{
66+
}
67+
68+
InferenceJob::InferenceJob(const string &_name, const DataPtr &_networkModel,
69+
const vector<DataPtr> &_input, const vector<DataPtr> &_output,
70+
const vector<DataPtr> &_expectedOutput)
71+
: name(_name), networkModel(_networkModel), input(_input), output(_output),
72+
expectedOutput(_expectedOutput)
73+
{
74+
}
75+
76+
void InferenceJob::invalidate()
77+
{
78+
networkModel.invalidate();
79+
80+
for (auto &it : input) {
81+
it.invalidate();
82+
}
83+
84+
for (auto &it : output) {
85+
it.invalidate();
86+
}
87+
88+
for (auto &it : expectedOutput) {
89+
it.invalidate();
90+
}
91+
}
92+
93+
void InferenceJob::clean()
94+
{
95+
networkModel.clean();
96+
97+
for (auto &it : input) {
98+
it.clean();
99+
}
100+
101+
for (auto &it : output) {
102+
it.clean();
103+
}
104+
105+
for (auto &it : expectedOutput) {
106+
it.clean();
107+
}
108+
}
109+
110+
bool InferenceProcess::runJob(InferenceJob &job)
111+
{
112+
/* Get model handle and verify that the version is correct */
113+
const tflite::Model *model = ::tflite::GetModel(job.networkModel.data);
114+
if (model->version() != TFLITE_SCHEMA_VERSION) {
115+
printk("Model schema version unsupported: version=%" PRIu32 ", supported=%d.\n",
116+
model->version(), TFLITE_SCHEMA_VERSION);
117+
return true;
118+
}
119+
120+
/* Create the TFL micro interpreter */
121+
tflite::AllOpsResolver resolver;
122+
tflite::MicroErrorReporter errorReporter;
123+
124+
tflite::MicroInterpreter interpreter(model, resolver, tensorArena, tensorArenaSize,
125+
&errorReporter);
126+
127+
/* Allocate tensors */
128+
TfLiteStatus allocate_status = interpreter.AllocateTensors();
129+
if (allocate_status != kTfLiteOk) {
130+
printk("Failed to allocate tensors for inference. job=%p\n", &job);
131+
return true;
132+
}
133+
134+
if (job.input.size() != interpreter.inputs_size()) {
135+
printk("Number of job and network inputs do not match. input=%zu, network=%zu\n",
136+
job.input.size(), interpreter.inputs_size());
137+
return true;
138+
}
139+
140+
/* Copy input data */
141+
for (size_t i = 0; i < interpreter.inputs_size(); ++i) {
142+
const DataPtr &input = job.input[i];
143+
const TfLiteTensor *tensor = interpreter.input(i);
144+
145+
if (input.size != tensor->bytes) {
146+
printk("Input tensor size mismatch. index=%zu, input=%zu, network=%u\n", i,
147+
input.size, tensor->bytes);
148+
return true;
149+
}
150+
151+
copy(static_cast<char *>(input.data), static_cast<char *>(input.data) + input.size,
152+
tensor->data.uint8);
153+
}
154+
155+
/* Run the inference */
156+
TfLiteStatus invoke_status = interpreter.Invoke();
157+
if (invoke_status != kTfLiteOk) {
158+
printk("Invoke failed for inference. job=%s\n", job.name.c_str());
159+
return true;
160+
}
161+
162+
/* Copy output data */
163+
if (job.output.size() > 0) {
164+
if (interpreter.outputs_size() != job.output.size()) {
165+
printk("Number of job and network outputs do not match. job=%zu, network=%u\n",
166+
job.output.size(), interpreter.outputs_size());
167+
return true;
168+
}
169+
170+
for (unsigned i = 0; i < interpreter.outputs_size(); ++i) {
171+
if (copyOutput(*interpreter.output(i), job.output[i])) {
172+
return true;
173+
}
174+
}
175+
}
176+
177+
if (job.expectedOutput.size() > 0) {
178+
if (job.expectedOutput.size() != interpreter.outputs_size()) {
179+
printk("Number of job and network expected outputs do not match. job=%zu, network=%zu\n",
180+
job.expectedOutput.size(), interpreter.outputs_size());
181+
return true;
182+
}
183+
184+
for (unsigned int i = 0; i < interpreter.outputs_size(); i++) {
185+
const DataPtr &expected = job.expectedOutput[i];
186+
const TfLiteTensor *output = interpreter.output(i);
187+
188+
if (expected.size != output->bytes) {
189+
printk("Expected output tensor size mismatch. index=%u, expected=%zu, network=%zu\n",
190+
i, expected.size, output->bytes);
191+
return true;
192+
}
193+
194+
for (unsigned int j = 0; j < output->bytes; ++j) {
195+
if (output->data.uint8[j] !=
196+
static_cast<uint8_t *>(expected.data)[j]) {
197+
printk("Expected output tensor data mismatch. index=%u, offset=%u, expected=%02x, network=%02x\n",
198+
i, j, static_cast<uint8_t *>(expected.data)[j],
199+
output->data.uint8[j]);
200+
return true;
201+
}
202+
}
203+
}
204+
}
205+
206+
return false;
207+
}
208+
209+
} /* namespace InferenceProcess */
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright 2019-2022 Arm Limited and/or its affiliates <[email protected]>
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#pragma once
8+
9+
#include <array>
10+
#include <queue>
11+
#include <stdlib.h>
12+
#include <string>
13+
#include <vector>
14+
15+
namespace InferenceProcess
16+
{
17+
struct DataPtr {
18+
void *data;
19+
size_t size;
20+
21+
DataPtr(void *data = nullptr, size_t size = 0);
22+
23+
void invalidate();
24+
void clean();
25+
};
26+
27+
struct InferenceJob {
28+
std::string name;
29+
DataPtr networkModel;
30+
std::vector<DataPtr> input;
31+
std::vector<DataPtr> output;
32+
std::vector<DataPtr> expectedOutput;
33+
34+
InferenceJob();
35+
InferenceJob(const std::string &name, const DataPtr &networkModel,
36+
const std::vector<DataPtr> &input, const std::vector<DataPtr> &output,
37+
const std::vector<DataPtr> &expectedOutput);
38+
39+
void invalidate();
40+
void clean();
41+
};
42+
43+
class InferenceProcess {
44+
public:
45+
InferenceProcess(uint8_t *_tensorArena, size_t _tensorArenaSize)
46+
: tensorArena(_tensorArena), tensorArenaSize(_tensorArenaSize)
47+
{
48+
}
49+
50+
bool runJob(InferenceJob &job);
51+
52+
private:
53+
uint8_t *tensorArena;
54+
const size_t tensorArenaSize;
55+
};
56+
} /* namespace InferenceProcess */

0 commit comments

Comments
 (0)