Skip to content

Commit 71fba59

Browse files
authored
Fail per-channel quantized FullyConnected layers (#2602)
The TFLiteConverter recently switched over to using per-channel quantization for all Dense/FullyConnected layers. TFLite-Micro does not yet have support for this, and was using incorrect quantization parameters for FullyConnected layers on newly converted models. Unsurprisingly, this leads to invalid output. While we intend to add per-channel quantization support for FullyConnected, this PR adds a runtime check for per-channel quantization until it can be supported by individual kernels. If you encounter this runtime error, you can disable the new Converter behavior by setting: `TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers = True` https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674 BUG=b/324385802
1 parent be11bd7 commit 71fba59

File tree

3 files changed

+23
-3
lines changed

3 files changed

+23
-3
lines changed

tensorflow/lite/micro/kernels/fully_connected_common.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ TfLiteStatus CalculateOpDataFullyConnected(
5757
TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
5858
const TfLiteTensor* bias, TfLiteTensor* output,
5959
OpDataFullyConnected* data) {
60+
// TODO(b/324385802): Support per-channel quantization for FullyConnected.
61+
// If you have hit this failure message, you will need to disable this
62+
// behavior. This can be done by setting the following flag to true:
63+
// TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers
64+
// https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674
65+
if (filter->quantization.type == kTfLiteAffineQuantization &&
66+
filter->quantization.params != nullptr) {
67+
TfLiteAffineQuantization* affine_quantization =
68+
reinterpret_cast<TfLiteAffineQuantization*>(
69+
filter->quantization.params);
70+
TF_LITE_ENSURE(context, affine_quantization->scale);
71+
TF_LITE_ENSURE_MSG(
72+
context, affine_quantization->scale->size == 1,
73+
"FullyConnected per-channel quantization not yet supported. Please set "
74+
"converter._experimental_disable_per_channel_quantization_for_dense_"
75+
"layers = True.");
76+
}
77+
6078
if (data_type != kTfLiteFloat32) {
6179
double real_multiplier = 0.0;
6280
TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(

tensorflow/lite/micro/kernels/testdata/lstm_test_data.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,9 @@ class LstmNodeContent {
390390
int state_size_[3] = {2, batch_size, state_dimension};
391391

392392
// see lstm_shared.h for tensor names, the last tensor is the output tensor
393-
TfLiteTensor tensors_[24 + 1];
393+
TfLiteTensor tensors_[24 + 1] = {};
394394
// Use for internel kernel testing
395-
TfLiteEvalTensor eval_tensors_[24 + 1];
395+
TfLiteEvalTensor eval_tensors_[24 + 1] = {};
396396
// indices for the tensors inside the node (required by kernel runner)
397397
int input_tensor_indices_[1 + 24] = {};
398398
// single output (last in the tensors array)

tensorflow/lite/micro/tools/requantize_flatbuffer_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
2525

2626

27-
#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
27+
# TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
2828
def create_simple_fc_model():
2929
'''Create a simple model with two fully connected(fc) layers'''
3030
model = tf.keras.models.Sequential([
@@ -60,6 +60,8 @@ def convert_tfl_converter(keras_model,
6060
EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
6161
]
6262
converter.representative_dataset = representative_dataset_gen
63+
# TODO(b/324385802): Support per-channel quantization for FullyConnected.
64+
converter._experimental_disable_per_channel_quantization_for_dense_layers = True
6365
return converter.convert()
6466

6567

0 commit comments

Comments
 (0)