feat: fast EntropyBottleneck aux_loss minimization via bisection search

YodaEmbedding · YodaEmbedding · commit 3baa06ad6410 · 2023-04-26T21:59:58.000-07:00
This method completes in &lt;1 second and reduces aux_loss to &lt;0.01.

This makes the aux_loss optimization during training unnecessary.

Another alternative would be to run the following post-training:
```python
while aux_loss &gt; 0.1:
    aux_loss = model.aux_loss()
    aux_loss.backward()
    aux_optimizer.step()
    aux_optimizer.zero_grad()
```
...but since we do not manage aux_loss learning rates,
the bisection search method might converge better.
diff --git a/compressai/entropy_models/entropy_models.py b/compressai/entropy_models/entropy_models.py
@@ -392,6 +392,8 @@ def update(self, force: bool = False) -> bool:
         if self._offset.numel() > 0 and not force:
             return False
 
+        self._update_quantiles()
+
         medians = self.quantiles[:, 0, 1]
 
         minima = medians - self.quantiles[:, 0, 0]
@@ -521,6 +523,31 @@ def _build_indexes(size):
     def _extend_ndims(tensor, n):
         return tensor.reshape(-1, *([1] * n)) if n > 0 else tensor.reshape(-1)
 
+    @torch.no_grad()
+    def _update_quantiles(self):
+        device = self.quantiles.device
+        shape = (self.channels, 1, 1)
+        low = torch.full(shape, -1e9, device=device)
+        high = torch.full(shape, 1e9, device=device)
+
+        def f(y, self=self):
+            return self._logits_cumulative(y, stop_gradient=True)
+
+        for i in range(len(self.target)):
+            q_i = self._search_target(f, self.target[i], low, high)
+            self.quantiles[:, :, i] = q_i[:, :, 0]
+
+    @staticmethod
+    def _search_target(f, target, low, high):
+        assert (low <= high).all()
+        assert ((f(low) <= target) & (target <= f(high))).all()
+        while not torch.isclose(low, high).all():
+            mid = (low + high) / 2
+            f_mid = f(mid)
+            low = torch.where(f_mid <= target, mid, low)
+            high = torch.where(f_mid >= target, mid, high)
+        return (low + high) / 2
+
     def compress(self, x):
         indexes = self._build_indexes(x.size())
         medians = self._get_medians().detach()