Fail per-channel quantized FullyConnected layers (#2602)

rascani · web-flow · commit 71fba59b5beb · 2024-05-29T04:05:42.000Z
The TFLiteConverter recently switched over to using per-channel quantization for all Dense/FullyConnected layers. TFLite-Micro does not yet have support for this, and was using incorrect quantization parameters for FullyConnected layers on newly converted models. Unsurprisingly, this leads to invalid output. While we intend to add per-channel quantization support for FullyConnected, this PR adds a runtime check for per-channel quantization until it can be supported by individual kernels. If you encounter this runtime error, you can disable the new Converter behavior by setting: `TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers = True` https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674 BUG=b/324385802
diff --git a/tensorflow/lite/micro/kernels/fully_connected_common.cc b/tensorflow/lite/micro/kernels/fully_connected_common.cc
@@ -57,6 +57,24 @@ TfLiteStatus CalculateOpDataFullyConnected(
     TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter,
     const TfLiteTensor* bias, TfLiteTensor* output,
     OpDataFullyConnected* data) {
+  // TODO(b/324385802): Support per-channel quantization for FullyConnected.
+  // If you have hit this failure message, you will need to disable this
+  // behavior. This can be done by setting the following flag to true:
+  // TfLiteConverter._experimental_disable_per_channel_quantization_for_dense_layers
+  // https://github.com/tensorflow/tensorflow/blob/377f47694fa790e98db6665b9adecde00b5e0d68/tensorflow/lite/python/lite.py#L674
+  if (filter->quantization.type == kTfLiteAffineQuantization &&
+      filter->quantization.params != nullptr) {
+    TfLiteAffineQuantization* affine_quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(
+            filter->quantization.params);
+    TF_LITE_ENSURE(context, affine_quantization->scale);
+    TF_LITE_ENSURE_MSG(
+        context, affine_quantization->scale->size == 1,
+        "FullyConnected per-channel quantization not yet supported. Please set "
+        "converter._experimental_disable_per_channel_quantization_for_dense_"
+        "layers = True.");
+  }
+
   if (data_type != kTfLiteFloat32) {
     double real_multiplier = 0.0;
     TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
diff --git a/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h b/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h
@@ -390,9 +390,9 @@ class LstmNodeContent {
   int state_size_[3] = {2, batch_size, state_dimension};
 
   // see lstm_shared.h for tensor names, the last tensor is the output tensor
-  TfLiteTensor tensors_[24 + 1];
+  TfLiteTensor tensors_[24 + 1] = {};
   // Use for internel kernel testing
-  TfLiteEvalTensor eval_tensors_[24 + 1];
+  TfLiteEvalTensor eval_tensors_[24 + 1] = {};
   // indices for the tensors inside the node (required by kernel runner)
   int input_tensor_indices_[1 + 24] = {};
   // single output (last in the tensors array)
diff --git a/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py b/tensorflow/lite/micro/tools/requantize_flatbuffer_test.py
@@ -24,7 +24,7 @@
 from tflite_micro.tensorflow.lite.tools import flatbuffer_utils
 
 
-#TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
+# TODO(b/248061370): replace the keras model creation process with flatbuffer manipulation to speed up test
 def create_simple_fc_model():
   '''Create a simple model with two fully connected(fc) layers'''
   model = tf.keras.models.Sequential([
@@ -60,6 +60,8 @@ def convert_tfl_converter(keras_model,
         EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
     ]
   converter.representative_dataset = representative_dataset_gen
+  # TODO(b/324385802): Support per-channel quantization for FullyConnected.
+  converter._experimental_disable_per_channel_quantization_for_dense_layers = True
   return converter.convert()