Move and rename GranularityType -> Granularity

andrewor14 · andrewor14 · commit fd69fc4b1942 · 2024-10-08T12:44:31.000-07:00
Summary: Move GranularityType to quant_primitives.py to be consistent with other similar fields like MappingType and ZeroPointDomain. Test Plan: CI ghstack-source-id: e9c8552 Pull Request resolved: #1038
diff --git a/torchao/prototype/awq/core.py b/torchao/prototype/awq/core.py
@@ -9,18 +9,19 @@
 from torchao.dtypes import to_affine_quantized_intx
 from torchao.quantization.quant_primitives import (
     MappingType,
+    Granularity,
     ZeroPointDomain,
 )
 from torchao.quantization.observer import (
-    AffineQuantizedObserverBase, GranularityType
+    AffineQuantizedObserverBase,
 )
 
 
 class AWQObserver(AffineQuantizedObserverBase):
     def __init__(self,
         weight: torch.Tensor,
         bias: torch.Tensor,
-        quantization_granularity: GranularityType,
+        quantization_granularity: Granularity,
         mapping_type: MappingType,
         target_dtype: torch.dtype,
         n_validation_examples: int,
@@ -40,7 +41,7 @@ def __init__(self,
         Args:
             weight: The weight tensor to be observed.
             bias: The bias tensor to be observed.
-            quantization_granularity: Granularity type which specifies how many weights share the same scale/zero point
+            quantization_granularity: Granularity which specifies how many weights share the same scale/zero point
             input_dtype: The data type of the input tensor.
             mapping_type: Always set to asymmetric 
             target_dtype: The target data type of the quantized tensor
@@ -153,4 +154,4 @@ def from_float(cls, float_linear: torch.nn.Linear, act_obs: AWQObserver):
         observed_linear = cls(float_linear.in_features, float_linear.out_features, act_obs, False, device=float_linear.weight.device, dtype=float_linear.weight.dtype)
         observed_linear.weight = float_linear.weight
         observed_linear.bias = float_linear.bias
-        return observed_linear
+        return observed_linear
diff --git a/torchao/quantization/observer.py b/torchao/quantization/observer.py
@@ -3,87 +3,19 @@
     _get_reduction_params,
     choose_qparams_affine_with_min_max,
     MappingType,
+    Granularity,
     ZeroPointDomain,
 )
 from torchao.utils import TORCH_VERSION_AT_LEAST_2_5
 
 from abc import ABCMeta, abstractmethod
-from dataclasses import dataclass
 from typing import Tuple, Optional, Any
 from functools import partial
 import logging
 
 logger = logging.getLogger(__name__)
 
 
-@dataclass(frozen=True)
-class GranularityType:
-    """
-    Base class for representing the granularity of quantization.
-
-    This class serves as a parent for specific granularity types used in 
-    quantization operations, such as per-tensor or per-axis quantization.
-    """
-    pass
-
-@dataclass(frozen=True)
-class PerTensor(GranularityType):
-    """
-    Represents per-tensor granularity in quantization.
-
-    This granularity type calcualtes the quantization parameters
-    based off the entire tensor.
-    """
-    pass
-
-@dataclass(frozen=True)
-class PerAxis(GranularityType):
-    """
-    Represents per-axis granularity in quantization.
-
-    This granularity type calcualtes different quantization parameters
-    along a specified axis of the tensor.
-
-    For example if the input tensor is shape [8, 16] and axis=0, then
-    the quantization parameters are calculated for each row of the tensor.
-    Giving a total of 8 quantization parameters.
-
-
-    Attributes:
-        axis (int): The axis along which reduction is performed.
-    """
-    axis: int
-
-@dataclass(frozen=True)
-
-class PerGroup(GranularityType):
-    """
-    Represents per-channel group granularity in quantization.
-
-    This granularity type calcualtes different quantization parameters
-    for each group of <group_size> elements.
-
-    For example if the input tensor is shape [8, 16], and the group size is 4, then
-    the input tensor is reshaped to [64, 4]
-    quantization parameters are calculated for each group of 4 elements,
-    giving a total of 64 quantization parameters.
-
-    Attributes:
-        group_size (int): The size of each quantization group
-
-    """
-    group_size: int
-
-class PerRow(GranularityType):
-    """
-    Represents row-wise granularity in quantization.
-
-    This is a special case of per-axis quantization and is unique to Float8 matmuls
-    where the input is quantized with a block_size of (1, ..., input.shape[-1]). And the weight
-    is quantized with a block_size of (1, weight.shape[1]).
-    """
-    pass
-
 # borrowed from torch.ao.quantization.observer
 class _PartialWrapper:
     def __init__(self, p):
@@ -120,23 +52,23 @@ def _with_args(cls_or_self, *args, **kwargs):
 
 
 def get_block_size(
-    input_shape: Tuple[int, ...], granularity_type: GranularityType
+    input_shape: Tuple[int, ...], granularity: Granularity
 ) -> Tuple[int, ...]:
     """Get the block size based on the input shape and granularity type.
 
     Args:
         input_shape: The input tensor shape possibly more than 2 dimensions
-        granularity_type: The granularity type of the quantization
+        granularity: The granularity type of the quantization
     """
-    if isinstance(granularity_type, PerTensor):
+    if isinstance(granularity, PerTensor):
         return input_shape
-    elif isinstance(granularity_type, PerAxis):
+    elif isinstance(granularity, PerAxis):
         block_size = list(input_shape)
-        block_size[granularity_type.axis] = 1
+        block_size[granularity.axis] = 1
         return tuple(block_size)
-    elif isinstance(granularity_type, PerRow):
+    elif isinstance(granularity, PerRow):
         return (1,) * (len(input_shape) - 1) + (input_shape[-1],)
-    raise ValueError(f"Unsupported GranularityType: {granularity_type}")
+    raise ValueError(f"Unsupported Granularity: {granularity}")
 
 
 ABC: Any = ABCMeta("ABC", (object,), {})  # compatible with Python 2 *and* 3:
@@ -146,7 +78,7 @@ class AffineQuantizedObserverBase(ABC, torch.nn.Module):
     """Observer module for affine quantization (https://github.com/pytorch/ao/tree/main/torchao/quantization#affine-quantization)
 
     Args:
-      `granularity_type` and `block_size`: The granularity of the quantization,
+      `granularity` and `block_size`: The granularity of the quantization,
         must specify at least one, if both are specified `block_size` takes precedence
         Current supported granularity type are `PerTensor` and `PerAxis`
       other args: please see `:class:torchao.dtypes.AffineQuantizedTensor`
@@ -158,7 +90,7 @@ def __init__(
         self,
         mapping_type: MappingType,
         target_dtype: torch.dtype,
-        granularity_type: GranularityType,
+        granularity: Granularity,
         quant_min: Optional[int] = None,
         quant_max: Optional[int] = None,
         eps: Optional[float] = None,
@@ -168,11 +100,11 @@ def __init__(
         zero_point_domain: Optional[ZeroPointDomain] = ZeroPointDomain.INT,
     ):
         super().__init__()
-        assert granularity_type is not None, "granularity_type is None"
+        assert granularity is not None, "granularity is None"
 
         self.mapping_type = mapping_type
         self.target_dtype = target_dtype
-        self.granularity_type = granularity_type
+        self.granularity = granularity
         self.quant_min = quant_min
         self.quant_max = quant_max
         self.eps = eps
@@ -202,8 +134,8 @@ def forward(self, input: torch.Tensor):
             return input
 
         input_detached = input.detach()
-        assert self.granularity_type is not None, "granularity_type is None"
-        block_size = get_block_size(input_detached.shape, self.granularity_type)
+        assert self.granularity is not None, "granularity is None"
+        block_size = get_block_size(input_detached.shape, self.granularity)
 
         shape_for_reduction, reduction_dims = _get_reduction_params(
             block_size, input_detached.size()
diff --git a/torchao/quantization/quant_primitives.py b/torchao/quantization/quant_primitives.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
+from dataclasses import dataclass
 from enum import Enum, auto
 from typing import List, Optional, Tuple, Dict, Callable, Union
 import torch, math
@@ -64,6 +65,74 @@ class ZeroPointDomain(Enum):
     INT = auto()
     FLOAT = auto()
 
+@dataclass(frozen=True)
+class Granularity:
+    """
+    Base class for representing the granularity of quantization.
+
+    This class serves as a parent for specific granularity types used in 
+    quantization operations, such as per-tensor or per-axis quantization.
+    """
+    pass
+
+@dataclass(frozen=True)
+class PerTensor(Granularity):
+    """
+    Represents per-tensor granularity in quantization.
+
+    This granularity type calcualtes the quantization parameters
+    based off the entire tensor.
+    """
+    pass
+
+@dataclass(frozen=True)
+class PerAxis(Granularity):
+    """
+    Represents per-axis granularity in quantization.
+
+    This granularity type calcualtes different quantization parameters
+    along a specified axis of the tensor.
+
+    For example if the input tensor is shape [8, 16] and axis=0, then
+    the quantization parameters are calculated for each row of the tensor.
+    Giving a total of 8 quantization parameters.
+
+
+    Attributes:
+        axis (int): The axis along which reduction is performed.
+    """
+    axis: int
+
+@dataclass(frozen=True)
+
+class PerGroup(Granularity):
+    """
+    Represents per-channel group granularity in quantization.
+
+    This granularity type calcualtes different quantization parameters
+    for each group of <group_size> elements.
+
+    For example if the input tensor is shape [8, 16], and the group size is 4, then
+    the input tensor is reshaped to [64, 4]
+    quantization parameters are calculated for each group of 4 elements,
+    giving a total of 64 quantization parameters.
+
+    Attributes:
+        group_size (int): The size of each quantization group
+
+    """
+    group_size: int
+
+class PerRow(Granularity):
+    """
+    Represents row-wise granularity in quantization.
+
+    This is a special case of per-axis quantization and is unique to Float8 matmuls
+    where the input is quantized with a block_size of (1, ..., input.shape[-1]). And the weight
+    is quantized with a block_size of (1, weight.shape[1]).
+    """
+    pass
+
 if TORCH_VERSION_AT_LEAST_2_5:
     torch.serialization.add_safe_globals([MappingType, ZeroPointDomain])