intel · YIYANGCAI · Nov 23, 2022 · Nov 24, 2022 · Nov 30, 2022 · Dec 1, 2022
diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt
@@ -150,6 +150,7 @@ berts
 bertsquad
 BertTokenizer
 bfloat
+blockwise
 BFP
 BGR
 Bianchi
@@ -327,6 +328,7 @@ convolutional
 Convolutional
 ConvPerStage
 ConvReLU
+cooldown
 copt
 coreml
 CoreML
@@ -741,6 +743,7 @@ horovodrun
 hostfile
 Hounsfield
 howpublished
+hyp
 HqEgzS
 href
 html
@@ -1179,6 +1182,7 @@ ngatang
 NGPUS
 ngram
 NHWC
+ni
 NIC
 nifti
 niftis
@@ -1240,8 +1244,11 @@ nvidia
 NVIDIA
 NVIDIA's
 nvme
+nw
 Nx
+NxM
 nyu
+oc
 ok
 ol
 Omer
@@ -1251,6 +1258,7 @@ oneapi
 oneAPI
 onednn
 oneDNN
+oneshot
 onlinedocs
 onnx
 ONNX
@@ -1885,6 +1893,7 @@ UI
 UID
 uint
 uk
+ultralytics
 un
 uncomment
 uncompress
@@ -1895,6 +1904,7 @@ unidecode
 uniq
 unittest
 unref
+unscale
 unsqueeze
 unstack
 upenn

diff --git a/docs/source/_static/imgs/pruning/Pruning_patterns.PNG b/docs/source/_static/imgs/pruning/Pruning_patterns.PNG
diff --git a/docs/source/_static/imgs/pruning/Pruning_schedule.PNG b/docs/source/_static/imgs/pruning/Pruning_schedule.PNG
diff --git a/docs/source/_static/imgs/pruning/Regularization.PNG b/docs/source/_static/imgs/pruning/Regularization.PNG
diff --git a/docs/source/_static/imgs/pruning/pruning.PNG b/docs/source/_static/imgs/pruning/pruning.PNG
diff --git a/docs/source/_static/imgs/pruning/pruning_criteria.PNG b/docs/source/_static/imgs/pruning/pruning_criteria.PNG
diff --git a/docs/source/_static/imgs/pruning/pruning_patterns.png b/docs/source/_static/imgs/pruning/pruning_patterns.png
diff --git a/docs/source/pruning_details.md b/docs/source/pruning_details.md
@@ -0,0 +1,183 @@
+Pruning details
+============
+
+
+
+1. [Introduction](#introduction)
+
+
+
+>>>[Neural Network Pruning](#neural-network-pruning)
+
+
+
+>>>[Pruning Patterns](#pruning-patterns)
+
+
+
+>>>[Pruning Criteria](#pruning-criteria)
+
+
+
+>>>[Pruning Schedule](#pruning-schedule)
+
+
+
+>>>[Pruning Type](#pruning-type)
+
+
+
+>>>[Regularization](#regularization)
+
+
+
+
+
+2. [Pruning examples](#examples)
+
+
+
+3. [Reference](#reference)
+
+
+
+## Introduction
+
+
+
+### Neural Network Pruning
+Neural network pruning is a promising model compression technique that removes the least important parameters in the network and achieves compact architectures with minimal accuracy drop and maximal inference acceleration. As state-of-the-art model sizes have grown at an unprecedented speed, pruning has become increasingly crucial for reducing the computational and memory footprint that huge neural networks require.
+
+
+
+
+### Pruning Patterns
+
+
+- Unstructured Pruning
+
+
+Unstructured pruning means pruning the least salient connections in the model. The nonzero patterns are irregular and could be anywhere in the matrix.
+
+
+- Structured Pruning
+
+
+Structured pruning means pruning parameters in groups and deleting entire blocks, filters, or channels according to some pruning criterions. In general, structured pruning leads to lower accuracy due to restrictive structure compared to unstructured pruning but it can significantly accelerate the model execution as it fits better with hardware designs.
+
+
+
+
+
+
+### Pruning Criteria
+
+
+
+Pruning criteria determines how should the weights of a neural network be scored and pruned. The magnitude and gradient are widely used to score the weights.
+
+
+- Magnitude
+
+
+  The algorithm prunes the weight by the lowest absolute value at each layer with given sparsity target.
+
+
+- Gradient 
+
+  The algorithm prunes the weight by the lowest gradient value at each layer with given sparsity target.
+
+- SNIP
+
+
+  The algorithm prunes the dense model at its initialization, by analyzing the weights' effect to the loss function when they are masked. Please refer to the original [paper](https://arxiv.org/abs/1810.02340) for details
+
+
+- SNIP with momentum
+
+
+  The algorithm improves original SNIP algorithms and introduces weights' score maps which updates in a momentum way.\
+  In the following formula, $n$ is the pruning step and $W$ and $G$ are model's weights and gradients respectively.
+  $$Score_{n} = 1.0 \times Score_{n-1} + 0.9 \times |W_{n} \times G_{n}|$$
+
+
+
+### Pruning Schedule
+
+
+Pruning schedule defines the way the model reach the target sparsity (the ratio of pruned weights).
+
+
+- One-shot Pruning
+
+
+  One-shot pruning means the model is pruned to its target sparsity with one single step. This pruning method often works at model's initialization step. It can easily cause accuracy drop, but save much training time.
+
+
+
+- Iterative Pruning
+
+
+  Iterative pruning means the model is gradually pruned to its target sparsity during a training process. The pruning process contains several pruning steps, and each step raises model's sparsity to a higher value. In the final pruning step, the model reaches target sparsity and the pruning process ends.
+
+
+
+
+### Pruning Type
+
+
+
+- Pattern_lock Pruning
+
+
+Pattern_lock pruning type uses masks of a fixed pattern during the pruning process.
+
+
+- Progressive Pruning
+
+
+Progressive pruning aims at smoothing the structured pruning by automatically interpolating a group of interval masks during the pruning process. In this method, a sequence of masks are generated to enable a more flexible pruning process and those masks would gradually change into ones to fit the target pruning structure.
+
+
+### Regularization
+
+
+Regularization is a technique that discourages learning a more complex model and therefore performs variable-selection.
+
+
+- Group Lasso
+
+
+  The Group-lasso algorithm is used to prune entire rows, columns or blocks of parameters that result in a smaller dense network.
+
+
+
+
+## Pruning Examples
+
+
+
+We validate the pruning technique on typical models across various domains (including CV, NLP, and Recommendation System) and the examples are listed below. 
+
+
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Dataset</th>
+    <th>Pruning Algorithm</th>
+    <th>Framework</th>
+  </tr>
+</thead>
+
+
+
+## Reference
+
+[1] Namhoon Lee, Thalaiyasingam Ajanthan, and Philip Torr. SNIP: SINGLE-SHOT NETWORK
+PRUNING BASED ON CONNECTION SENSITIVITY. In International Conference on
+Learning Representations, 2019.
+
+
+
+
diff --git a/docs/source/pythonic_style.md b/docs/source/pythonic_style.md
@@ -81,18 +81,14 @@ distiller = Distillation(config)
 To specify pruning configurations, users can assign values to the corresponding attributes. 
 ```python
 from neural_compressor import config
-config.pruning.weight_compression.initial_sparsity = 0.0
 config.pruning.weight_compression.target_sparsity = 0.9
-config.pruning.weight_compression.max_sparsity_ratio_per_layer = 0.98
-config.pruning.weight_compression.prune_type = "basic_magnitude"
-config.pruning.weight_compression.start_epoch = 0
-config.pruning.weight_compression.end_epoch = 3
+config.pruning.weight_compression.max_sparsity_ratio_per_op = 0.98
+config.pruning.weight_compression.pruning_type = "snip_momentum"
 config.pruning.weight_compression.start_step = 0
-config.pruning.weight_compression.end_step = 0
-config.pruning.weight_compression.update_frequency = 1.0
-config.pruning.weight_compression.update_frequency_on_step = 1
-config.pruning.weight_compression.prune_domain = "global"
-config.pruning.weight_compression.pattern = "tile_pattern_1x1"
+config.pruning.weight_compression.end_step = 3
+config.pruning.weight_compression.pruning_frequency = 1
+config.pruning.weight_compression.pruning_scope = "global"
+config.pruning.weight_compression.pattern = "4x1"
 
 from neural_compressor.experimental import Pruning
 prune = Pruning(config)

diff --git a/docs/source/user_yaml.md b/docs/source/user_yaml.md
@@ -111,7 +111,7 @@ pruning:
         - !Pruner
             start_epoch: 0
             end_epoch: 19
-            prune_type: basic_magnitude
+            pruning_type: basic_magnitude
 ```
 * ***distillation***: The distillation specifications define distillation's tuning
 space. Similar to pruning, to define the training behavior, users can use the 

diff --git a/examples/README.md b/examples/README.md
@@ -609,14 +609,14 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Natural Language Processing (text classification)</td>
     <td>Structured (4x1, 2in4), Unstructured</td>
     <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/pruning/pytorch_pruner/eager">eager</a></td>
+    <td><a href="pytorch/nlp/huggingface_models/text-classification/pruning/eager">eager</a></td>
   </tr>
   <tr>
     <td>Bert-mini</td>
     <td>Natural Language Processing (question answering)</td>
     <td>Structured (4x1, 2in4), Unstructured</td>
     <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/pytorch_pruner/eager">eager</a></td>
+    <td><a href="pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
   </tr>
 </tbody>
 </table>

diff --git a/..._recognition/torchvision_models/optimization_pipeline/prune_and_ptq/eager/prune_conf.yaml b/..._recognition/torchvision_models/optimization_pipeline/prune_and_ptq/eager/prune_conf.yaml
@@ -44,16 +44,15 @@ pruning:
       CrossEntropyLoss:
   approach:
     weight_compression:
-      initial_sparsity: 0.0
       target_sparsity: 0.3
-      start_epoch: 0
-      end_epoch: 4
+      start_step: 0
+      end_step: 1000
       pruners:
         - !Pruner
-            start_epoch: 0
-            end_epoch: 4
-            update_frequency: 1
-            prune_type: basic_magnitude
+            start_step: 0
+            end_step: 1000
+            pruning_frequency: 10
+            pruning_type: magnitude
             names: ['layer1.0.conv1.weight', 'layer1.0.conv2.weight']
 
 evaluation:                                          # optional. required if user doesn't provide eval_func in neural_compressor.Quantization.

diff --git a/...cognition/torchvision_models/optimization_pipeline/qat_during_prune/eager/prune_conf.yaml b/...cognition/torchvision_models/optimization_pipeline/qat_during_prune/eager/prune_conf.yaml
@@ -46,16 +46,15 @@ pruning:
       CrossEntropyLoss:
   approach:
     weight_compression:
-      initial_sparsity: 0.0
       target_sparsity: 0.3
-      start_epoch: 0
-      end_epoch: 4
+      start_step: 0
+      end_step: 2000
       pruners:
         - !Pruner
-            start_epoch: 0
-            end_epoch: 4
-            update_frequency: 1
-            prune_type: basic_magnitude
+            start_step: 0
+            end_step: 4
+            pruning_frequency: 100
+            pruning_type: "snip_momentum"
             names: ['layer1.0.conv1.weight', 'layer1.0.conv2.weight']
 
 evaluation:                                          # optional. required if user doesn't provide eval_func in neural_compressor.Quantization.

diff --git a/.../pytorch/image_recognition/torchvision_models/pruning/magnitude/eager/README.md b/.../pytorch/image_recognition/torchvision_models/pruning/magnitude/eager/README.md