Added f8E4M3FN and f8E5M2 support to existing QuantizedTypes (17.x) (#37)

AndreiFilipIntel · sramasit · commit 7961ba81dc56 · 2025-10-06T17:26:54.000-07:00
* added integral type check
* changed to f8e_m_ format, removed isF8 helper method, formatting
* added negative tests, removed duplicate test
* Added endline in parse-uniform-invalid.mlir
* changed to f8E5M2/f8E4M3FN format, enabled default type parsing
* updated parse-any-invalid error message checks
diff --git a/mlir/include/mlir/Dialect/Quant/IR/QuantTypes.h b/mlir/include/mlir/Dialect/Quant/IR/QuantTypes.h
@@ -82,6 +82,18 @@ class QuantizedType : public Type {
     return llvm::maxUIntN(integralWidth);
   }
 
+  static constexpr int64_t getDefaultMaximumForF8E4M3FN() { return 448; }
+
+  static constexpr int64_t getDefaultMinimumForF8E4M3FN() {
+    return -getDefaultMaximumForF8E4M3FN();
+  }
+
+  static constexpr int64_t getDefaultMaximumForF8E5M2() { return 57344; }
+
+  static constexpr int64_t getDefaultMinimumForF8E5M2() {
+    return -getDefaultMaximumForF8E5M2();
+  }
+
   /// Gets the original expressed type that this quantized type approximates.
   /// Note that this presumes that the quantized type was always derived from
   /// a floating point type, which in the broadest definition, is not true (i.e.
diff --git a/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp b/mlir/lib/Dialect/Quant/IR/QuantTypes.cpp
@@ -49,28 +49,38 @@ QuantizedType::verifyInvariants(function_ref<InFlightDiagnostic()> emitError,
                                 unsigned flags, Type storageType,
                                 Type expressedType, int64_t storageTypeMin,
                                 int64_t storageTypeMax) {
-  // Verify that the storage type is integral.
-  // This restriction may be lifted at some point in favor of using bf16
-  // or f16 as exact representations on hardware where that is advantageous.
-  auto intStorageType = llvm::dyn_cast<IntegerType>(storageType);
-  if (!intStorageType)
-    return emitError() << "storage type must be integral";
-  unsigned integralWidth = intStorageType.getWidth();
-
-  // Verify storage width.
-  if (integralWidth == 0 || integralWidth > MaxStorageBits)
-    return emitError() << "illegal storage type size: " << integralWidth;
-
-  // Verify storageTypeMin and storageTypeMax.
+                                 
   bool isSigned =
       (flags & QuantizationFlags::Signed) == QuantizationFlags::Signed;
-  int64_t defaultIntegerMin =
-      getDefaultMinimumForInteger(isSigned, integralWidth);
-  int64_t defaultIntegerMax =
-      getDefaultMaximumForInteger(isSigned, integralWidth);
-  if (storageTypeMax - storageTypeMin <= 0 ||
-      storageTypeMin < defaultIntegerMin ||
-      storageTypeMax > defaultIntegerMax) {
+
+  // Integral storage type width checks
+  if (storageType.isa<IntegerType>()) {
+    unsigned integralWidth =
+        llvm::dyn_cast<IntegerType>(storageType).getWidth();
+
+    if (integralWidth == 0 || integralWidth > MaxStorageBits)
+      return emitError() << "illegal storage type size: " << integralWidth;
+  }
+
+  int64_t defaultMin, defaultMax;
+  if (storageType.isa<IntegerType>()) {
+    const auto width = llvm::dyn_cast<IntegerType>(storageType).getWidth();
+    defaultMin = QuantizedType::getDefaultMinimumForInteger(isSigned, width);
+    defaultMax = QuantizedType::getDefaultMaximumForInteger(isSigned, width);
+  } else if (storageType.isa<Float8E5M2Type>()) {
+    defaultMin = QuantizedType::getDefaultMinimumForF8E5M2();
+    defaultMax = QuantizedType::getDefaultMaximumForF8E5M2();
+  } else if (storageType.isa<Float8E4M3FNType>()) {
+    defaultMin = QuantizedType::getDefaultMinimumForF8E4M3FN();
+    defaultMax = QuantizedType::getDefaultMaximumForF8E4M3FN();
+  } else {
+    return emitError() << "illegal storage type, supported types are: integral "
+                          "types, Float8E4M3FNType and Float8E5M2Type ";
+  }
+
+  // Verify storageTypeMin and storageTypeMax.
+  if (storageTypeMax - storageTypeMin <= 0 || storageTypeMin < defaultMin ||
+      storageTypeMax > defaultMax) {
     return emitError() << "illegal storage min and storage max: ("
                        << storageTypeMin << ":" << storageTypeMax << ")";
   }
diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
@@ -21,9 +21,9 @@
 using namespace mlir;
 using namespace quant;
 
-static IntegerType parseStorageType(DialectAsmParser &parser, bool &isSigned) {
+static Type parseStorageType(DialectAsmParser &parser, bool &isSigned) {
   auto typeLoc = parser.getCurrentLocation();
-  IntegerType type;
+  Type type;
 
   // Parse storage type (alpha_ident, integer_literal).
   StringRef identifier;
@@ -32,20 +32,32 @@ static IntegerType parseStorageType(DialectAsmParser &parser, bool &isSigned) {
   if (result.has_value()) {
     if (!succeeded(*result))
       return nullptr;
-    isSigned = !type.isUnsigned();
-    storageTypeWidth = type.getWidth();
-  } else if (succeeded(parser.parseKeyword(&identifier))) {
-    // Otherwise, this must be an unsigned integer (`u` integer-literal).
-    if (!identifier.consume_front("u")) {
-      parser.emitError(typeLoc, "illegal storage type prefix");
+    if (auto intType = llvm::dyn_cast<IntegerType>(type)) {
+      isSigned = !intType.isUnsigned();
+      storageTypeWidth = intType.getWidth();
+    } else if (llvm::dyn_cast<Float8E5M2Type>(type) ||
+               llvm::dyn_cast<Float8E4M3FNType>(type)) {
+      storageTypeWidth = 8;
+      isSigned = true;
+    } else {
+      parser.emitError(typeLoc, "illegal quantized storage type alias");
       return nullptr;
     }
-    if (identifier.getAsInteger(10, storageTypeWidth)) {
-      parser.emitError(typeLoc, "expected storage type width");
+  } else if (succeeded(parser.parseKeyword(&identifier))) {
+    // Otherwise, this must be an unsigned integer (`u` integer-literal)
+    if (identifier.consume_front("u")) {
+      if (identifier.getAsInteger(10, storageTypeWidth)) {
+        parser.emitError(typeLoc, "expected storage type width");
+        return nullptr;
+      }
+      isSigned = false;
+      type = parser.getBuilder().getIntegerType(storageTypeWidth);
+
+    } else {
+      parser.emitError(typeLoc, "illegal quantized storage type alias");
       return nullptr;
     }
-    isSigned = false;
-    type = parser.getBuilder().getIntegerType(storageTypeWidth);
+
   } else {
     return nullptr;
   }
@@ -60,35 +72,56 @@ static IntegerType parseStorageType(DialectAsmParser &parser, bool &isSigned) {
   return type;
 }
 
-static ParseResult parseStorageRange(DialectAsmParser &parser,
-                                     IntegerType storageType, bool isSigned,
-                                     int64_t &storageTypeMin,
+static ParseResult
+checkStorageRange(DialectAsmParser &parser, int64_t storageTypeMin,
+                  int64_t storageTypeMax, int64_t defaultStorageTypeMin,
+                  int64_t defaultStorageTypeMax, SMLoc minLoc, SMLoc maxLoc) {
+  if (storageTypeMin < defaultStorageTypeMin) {
+    return parser.emitError(minLoc, "illegal storage type minimum: ")
+           << storageTypeMin;
+  }
+  if (storageTypeMax > defaultStorageTypeMax) {
+    return parser.emitError(maxLoc, "illegal storage type maximum: ")
+           << storageTypeMax;
+  }
+  return success();
+}
+
+static ParseResult parseStorageRange(DialectAsmParser &parser, Type storageType,
+                                     bool isSigned, int64_t &storageTypeMin,
                                      int64_t &storageTypeMax) {
-  int64_t defaultIntegerMin = QuantizedType::getDefaultMinimumForInteger(
-      isSigned, storageType.getWidth());
-  int64_t defaultIntegerMax = QuantizedType::getDefaultMaximumForInteger(
-      isSigned, storageType.getWidth());
+  int64_t defaultMin, defaultMax;
+  if (storageType.isa<IntegerType>()) {
+    const auto width = llvm::dyn_cast<IntegerType>(storageType).getWidth();
+    defaultMin = QuantizedType::getDefaultMinimumForInteger(isSigned, width);
+    defaultMax = QuantizedType::getDefaultMaximumForInteger(isSigned, width);
+  } else if (storageType.isa<Float8E5M2Type>()) {
+    defaultMin = QuantizedType::getDefaultMinimumForF8E5M2();
+    defaultMax = QuantizedType::getDefaultMaximumForF8E5M2();
+  } else if (storageType.isa<Float8E4M3FNType>()) {
+    defaultMin = QuantizedType::getDefaultMinimumForF8E4M3FN();
+    defaultMax = QuantizedType::getDefaultMaximumForF8E4M3FN();
+  } else {
+    defaultMin = std::numeric_limits<int64_t>::max();
+    defaultMax = std::numeric_limits<int64_t>::min();
+  }
+
   if (failed(parser.parseOptionalLess())) {
-    storageTypeMin = defaultIntegerMin;
-    storageTypeMax = defaultIntegerMax;
+    storageTypeMin = defaultMin;
+    storageTypeMax = defaultMax;
     return success();
   }
 
   // Explicit storage min and storage max.
+  // F8 min and max values are integers, so parseInteger() is used.
   SMLoc minLoc = parser.getCurrentLocation(), maxLoc;
   if (parser.parseInteger(storageTypeMin) || parser.parseColon() ||
       parser.getCurrentLocation(&maxLoc) ||
       parser.parseInteger(storageTypeMax) || parser.parseGreater())
     return failure();
-  if (storageTypeMin < defaultIntegerMin) {
-    return parser.emitError(minLoc, "illegal storage type minimum: ")
-           << storageTypeMin;
-  }
-  if (storageTypeMax > defaultIntegerMax) {
-    return parser.emitError(maxLoc, "illegal storage type maximum: ")
-           << storageTypeMax;
-  }
-  return success();
+
+  return checkStorageRange(parser, storageTypeMin, storageTypeMax, defaultMin,
+                           defaultMax, minLoc, maxLoc);
 }
 
 static FloatType parseExpressedTypeAndRange(DialectAsmParser &parser,
@@ -118,7 +151,7 @@ static FloatType parseExpressedTypeAndRange(DialectAsmParser &parser,
 ///   storage-type ::= (`i` | `u`) integer-literal
 ///   expressed-type-spec ::= `:` `f` integer-literal
 static Type parseAnyType(DialectAsmParser &parser) {
-  IntegerType storageType;
+  Type storageType;
   FloatType expressedType;
   unsigned typeFlags = 0;
   int64_t storageTypeMin;
@@ -192,7 +225,7 @@ static ParseResult parseQuantParams(DialectAsmParser &parser, double &scale,
 ///   scale-zero ::= float-literal `:` integer-literal
 ///   scale-zero-list ::= `{` scale-zero (`,` scale-zero)* `}`
 static Type parseUniformType(DialectAsmParser &parser) {
-  IntegerType storageType;
+  Type storageType;
   FloatType expressedType;
   unsigned typeFlags = 0;
   int64_t storageTypeMin;
@@ -339,14 +372,37 @@ static void printStorageType(QuantizedType type, DialectAsmPrinter &out) {
   // storage type
   unsigned storageWidth = type.getStorageTypeIntegralWidth();
   bool isSigned = type.isSigned();
-  if (isSigned) {
+  if (type.getStorageType().isa<Float8E5M2Type>()) {
+    out << "f8E5M2";
+  } else if (type.getStorageType().isa<Float8E4M3FNType>()) {
+    out << "f8E4M3FN";
+  } else if (isSigned) {
     out << "i" << storageWidth;
   } else {
     out << "u" << storageWidth;
   }
 
   // storageTypeMin and storageTypeMax if not default.
-  if (type.hasStorageTypeBounds()) {
+  int64_t defaultMin =
+      type.getStorageType().isa<IntegerType>()
+          ? QuantizedType::getDefaultMinimumForInteger(isSigned, storageWidth)
+      : type.getStorageType().isa<Float8E5M2Type>()
+          ? QuantizedType::getDefaultMinimumForF8E5M2()
+      : type.getStorageType().isa<Float8E4M3FNType>()
+          ? QuantizedType::getDefaultMinimumForF8E4M3FN()
+          : std::numeric_limits<int64_t>::max();
+
+  int64_t defaultMax =
+      type.getStorageType().isa<IntegerType>()
+          ? QuantizedType::getDefaultMaximumForInteger(isSigned, storageWidth)
+      : type.getStorageType().isa<Float8E5M2Type>()
+          ? QuantizedType::getDefaultMaximumForF8E5M2()
+      : type.getStorageType().isa<Float8E4M3FNType>()
+          ? QuantizedType::getDefaultMaximumForF8E4M3FN()
+          : std::numeric_limits<int64_t>::min();
+
+  if (defaultMin != type.getStorageTypeMin() ||
+      defaultMax != type.getStorageTypeMax()) {
     out << "<" << type.getStorageTypeMin() << ":" << type.getStorageTypeMax()
         << ">";
   }
diff --git a/mlir/test/Dialect/Quant/parse-any-invalid.mlir b/mlir/test/Dialect/Quant/parse-any-invalid.mlir
@@ -17,12 +17,12 @@
 
 // -----
 // Unrecognized storage type: illegal prefix
-// expected-error@+1 {{illegal storage type prefix}}
+// expected-error@+1 {{illegal quantized storage type alias}}
 !qalias = !quant.any<int8<-4:3>:f32>
 
 // -----
 // Unrecognized storage type: no width
-// expected-error@+1 {{illegal storage type prefix}}
+// expected-error@+1 {{illegal quantized storage type alias}}
 !qalias = !quant.any<i<-4:3>:f32>
 
 // -----
diff --git a/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir b/mlir/test/Dialect/Quant/parse-uniform-invalid.mlir
@@ -37,12 +37,12 @@
 
 // -----
 // Unrecognized storage type: illegal prefix
-// expected-error@+1 {{illegal storage type prefix}}
+// expected-error@+1 {{illegal quantized storage type alias}}
 !qalias = !quant.uniform<int8<-4:3>:f32, 0.99872:127>
 
 // -----
 // Unrecognized storage type: no width
-// expected-error@+1 {{illegal storage type prefix}}
+// expected-error@+1 {{illegal quantized storage type alias}}
 !qalias = !quant.uniform<i<-4:3>:f32, 0.99872:127>
 
 // -----
@@ -52,7 +52,7 @@
 
 // -----
 // Unrecognized storage type: storage size < 0
-// expected-error@+1 {{illegal storage type prefix}}
+// expected-error@+1 {{illegal quantized storage type alias}}
 !qalias = !quant.uniform<i-1<-4:3>:f32, 0.99872:127>
 
 // -----
@@ -80,6 +80,26 @@
 // expected-error@+1 {{illegal storage type minimum: -9}}
 !qalias = !quant.uniform<i4<-9:1>:f32, 0.99872:127>
 
+// -----
+// Illegal storage min/max: max > defaultMax
+// expected-error@+1 {{illegal storage type maximum: 60000}}
+!qalias = !quant.uniform<f8E5M2<-57344:60000>:f32, 0.99872:127>
+
+// -----
+// Illegal storage min/max: min < defaultMin
+// expected-error@+1 {{illegal storage type minimum: -60000}}
+!qalias = !quant.uniform<f8E5M2<-60000:57344>:f32, 0.99872:127>
+
+// -----
+// Illegal storage min/max: max > defaultMax
+// expected-error@+1 {{illegal storage type maximum: 500}}
+!qalias = !quant.uniform<f8E4M3FN<-448:500>:f32, 0.99872:127>
+
+// -----
+// Illegal storage min/max: min < defaultMin
+// expected-error@+1 {{illegal storage type minimum: -500}}
+!qalias = !quant.uniform<f8E4M3FN<-500:448>:f32, 0.99872:127>
+
 // -----
 // Illegal uniform params: invalid scale
 // expected-error@+1 {{expected floating point literal}}
diff --git a/mlir/test/Dialect/Quant/parse-uniform.mlir b/mlir/test/Dialect/Quant/parse-uniform.mlir
@@ -19,6 +19,33 @@ func.func @parse() -> !qalias {
   return %0 : !qalias
 }
 
+// -----
+// Default min/max value optimization for integers.
+// CHECK: !quant.uniform<i8:f32, 9.987200e-01:127>
+!qalias = !quant.uniform<i8<-128:127>:f32, 0.99872:127  >
+func.func @parse() -> !qalias {
+  %0 = "foo"() : () -> !qalias
+  return %0 : !qalias
+}
+
+// -----
+// Default min/max value optimization for f8E5M2.
+// CHECK: !quant.uniform<f8E5M2:f32, 9.987200e-01:127>
+!qalias = !quant.uniform<f8E5M2<-57344:57344>:f32, 0.99872:127  >
+func.func @parse() -> !qalias {
+  %0 = "foo"() : () -> !qalias
+  return %0 : !qalias
+}
+
+// -----
+// Default min/max value optimization for f8E4M3FN.
+// CHECK: !quant.uniform<f8E4M3FN:f32, 9.987200e-01:127>
+!qalias = !quant.uniform<f8E4M3FN<-448:448>:f32, 0.99872:127  >
+func.func @parse() -> !qalias {
+  %0 = "foo"() : () -> !qalias
+  return %0 : !qalias
+}
+
 // -----
 // Required per-layer params specified:
 //   [unsigned] storageType, expressedType, scale
@@ -47,6 +74,24 @@ func.func @parse() -> !qalias {
   return %0 : !qalias
 }
 
+// -----
+// Storage type: f8E5M2
+// CHECK: !quant.uniform<f8E5M2:f32, 2.000000e+02>
+!qalias = !quant.uniform<f8E5M2:f32, 2.0e+2>
+func.func @parse() -> !qalias {
+  %0 = "foo"() : () -> !qalias
+  return %0 : !qalias
+}
+
+// -----
+// Storage type: f8E4M3FN
+// CHECK: !quant.uniform<f8E4M3FN:f32, 2.000000e+02>
+!qalias = !quant.uniform<f8E4M3FN:f32, 2.0e+2>
+func.func @parse() -> !qalias {
+  %0 = "foo"() : () -> !qalias
+  return %0 : !qalias
+}
+
 // -----
 // Storage type: i16
 // CHECK: !quant.uniform<i16:f32, 2.000000e+02>