pytorch
diff --git a/‎test/torchaudio_unittest/assets/1770-2_Comp_24LKFS_500Hz_2ch.wav‎
5.31 MB b/‎test/torchaudio_unittest/assets/1770-2_Comp_24LKFS_500Hz_2ch.wav‎
5.31 MB
diff --git a/‎test/torchaudio_unittest/assets/1770-2_Comp_AbsGateTest.wav‎
750 KB b/‎test/torchaudio_unittest/assets/1770-2_Comp_AbsGateTest.wav‎
750 KB
diff --git a/‎test/torchaudio_unittest/assets/1770-2_Comp_RelGateTest.wav‎
750 KB b/‎test/torchaudio_unittest/assets/1770-2_Comp_RelGateTest.wav‎
750 KB
diff --git a/‎test/torchaudio_unittest/assets/1770-2_Conf_Mono_Voice+Music-24LKFS.wav‎
7.63 MB b/‎test/torchaudio_unittest/assets/1770-2_Conf_Mono_Voice+Music-24LKFS.wav‎
7.63 MB
diff --git a/‎test/torchaudio_unittest/functional/functional_impl.py‎
Lines changed: 34 additions & 0 deletions b/‎test/torchaudio_unittest/functional/functional_impl.py‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎test/torchaudio_unittest/functional/torchscript_consistency_impl.py‎
Lines changed: 8 additions & 0 deletions b/‎test/torchaudio_unittest/functional/torchscript_consistency_impl.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎torchaudio/functional/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎torchaudio/functional/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎torchaudio/functional/functional.py‎
Lines changed: 62 additions & 0 deletions b/‎torchaudio/functional/functional.py‎
Lines changed: 62 additions & 0 deletions
@@ -10,6 +10,8 @@
 from scipy import signal
 from torchaudio_unittest.common_utils import (
     beamform_utils,
+    get_asset_path,
+    load_wav,
     get_sinusoid,
     get_whitenoise,
     nested_params,
@@ -257,6 +259,38 @@ def test_compute_deltas_two_channels(self):
         computed = F.compute_deltas(specgram, win_length=3)
         self.assertEqual(computed, expected)
 
+    def test_measure_loudness_mono_voice_music(self):
+        filepath = get_asset_path("1770-2_Conf_Mono_Voice+Music-24LKFS.wav")
+        waveform, sample_rate = load_wav(filepath)
+
+        loudness = F.measure_loudness(waveform, sample_rate)
+        expected = torch.tensor(-24.0, dtype=loudness.dtype, device=self.device)
+        self.assertEqual(loudness, expected, rtol=0.01, atol=0.1)
+
+    def test_measure_loudness_two_channels(self):
+        filepath = get_asset_path("1770-2_Comp_24LKFS_500Hz_2ch.wav")
+        waveform, sample_rate = load_wav(filepath)
+
+        loudness = F.measure_loudness(waveform, sample_rate)
+        expected = torch.tensor(-24.0, dtype=loudness.dtype, device=self.device)
+        self.assertEqual(loudness, expected, rtol=0.01, atol=0.1)
+
+    def test_measure_loudness_absolute_gate(self):
+        filepath = get_asset_path("1770-2_Comp_AbsGateTest.wav")
+        waveform, sample_rate = load_wav(filepath)
+
+        loudness = F.measure_loudness(waveform, sample_rate)
+        expected = torch.tensor(-69.5, dtype=loudness.dtype, device=self.device)
+        self.assertEqual(loudness, expected, rtol=0.01, atol=0.1)
+
+    def test_measure_loudness_relative_gate(self):
+        filepath = get_asset_path("1770-2_Comp_RelGateTest.wav")
+        waveform, sample_rate = load_wav(filepath)
+
+        loudness = F.measure_loudness(waveform, sample_rate)
+        expected = torch.tensor(-10.0, dtype=loudness.dtype, device=self.device)
+        self.assertEqual(loudness, expected, rtol=0.01, atol=0.1)
+
     @parameterized.expand([(100,), (440,)])
     def test_detect_pitch_frequency_pitch(self, frequency):
         sample_rate = 44100
 
@@ -111,6 +111,14 @@ def func(tensor):
 
         self._assert_consistency(func, (waveform,))
 
+    def test_measure_loudness(self):
+        if self.dtype == torch.float64:
+            raise unittest.SkipTest("This test is known to fail for float64")
+
+        sample_rate = 44100
+        waveform = common_utils.get_sinusoid(sample_rate=sample_rate)
+        self._assert_consistency(F.measure_loudness, (waveform, sample_rate))
+
     def test_melscale_fbanks(self):
         if self.device != torch.device("cpu"):
             raise unittest.SkipTest("No need to perform test on device other than CPU")
 
@@ -30,6 +30,7 @@
     compute_kaldi_pitch,
     create_dct,
     DB_to_amplitude,
+    measure_loudness,
     detect_pitch_frequency,
     edit_distance,
     griffinlim,
@@ -62,6 +63,7 @@
     "melscale_fbanks",
     "linear_fbanks",
     "DB_to_amplitude",
+    "measure_loudness",
     "detect_pitch_frequency",
     "griffinlim",
     "mask_along_axis",
 
@@ -10,6 +10,7 @@
 import torchaudio
 from torch import Tensor
 from torchaudio._internal import module_utils as _mod_utils
+from .filtering import highpass_biquad, treble_biquad
 
 __all__ = [
     "spectrogram",
@@ -35,6 +36,7 @@
     "apply_codec",
     "resample",
     "edit_distance",
+    "measure_loudness",
     "pitch_shift",
     "rnnt_loss",
     "psd",
@@ -1602,6 +1604,66 @@ def edit_distance(seq1: Sequence, seq2: Sequence) -> int:
     return int(dold[-1])
 
 
+def measure_loudness(waveform: Tensor, sample_rate: int):
+    r"""Measure audio loudness according to the ITU-R BS.1770-4 recommendation.
+
+    .. devices:: CPU CUDA
+
+    .. properties:: TorchScript
+
+    Args:
+        waveform(torch.Tensor): audio waveform of dimension of `(..., channels, time)`
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
+
+    Returns:
+        Tensor: loudness estimates (LKFS)
+
+    Reference:
+        - https://www.itu.int/rec/R-REC-BS.1770-4-201510-I/en
+    """
+
+    if waveform.size(-2) > 5:
+        raise ValueError("Only up to 5 channels are supported.")
+
+    gate_duration: float = 0.4
+    overlap: float = 0.75
+    gamma_abs: float = -70.0
+    gate_samples = int(round(gate_duration * sample_rate))
+    step = int(round(gate_samples * (1 - overlap)))
+
+    # Apply K-weighting
+    waveform = treble_biquad(waveform, sample_rate, 4.0, 1500.0, 1 / math.sqrt(2))
+    waveform = highpass_biquad(waveform, sample_rate, 38.0, 0.5)
+
+    # Compute the energy for each block
+    energy = torch.square(waveform).unfold(-1, gate_samples, step)
+    energy = torch.mean(energy, dim=-1)
+
+    # Compute channel-weighted summation
+    g = torch.tensor([1.0, 1.0, 1.0, 1.41, 1.41], dtype=waveform.dtype, device=waveform.device)
+    g = g[: energy.size(-2)]
+
+    energy_weighted = torch.sum(g.unsqueeze(-1) * energy, dim=-2)
+    loudness = -0.691 + 10 * torch.log10(energy_weighted)
+
+    # Apply absolute gating of the blocks
+    gated_blocks = loudness > gamma_abs
+    gated_blocks = gated_blocks.unsqueeze(-2)
+
+    energy_filtered = torch.sum(gated_blocks * energy, dim=-1) / torch.count_nonzero(gated_blocks, dim=-1)
+    energy_weighted = torch.sum(g * energy_filtered, dim=-1)
+    gamma_rel = -0.691 + 10 * torch.log10(energy_weighted) - 10
+
+    # Apply relative gating of the blocks
+    gated_blocks = torch.logical_and(gated_blocks.squeeze(-2), loudness > gamma_rel.unsqueeze(-1))
+    gated_blocks = gated_blocks.unsqueeze(-2)
+
+    energy_filtered = torch.sum(gated_blocks * energy, dim=-1) / torch.count_nonzero(gated_blocks, dim=-1)
+    energy_weighted = torch.sum(g * energy_filtered, dim=-1)
+    LKFS = -0.691 + 10 * torch.log10(energy_weighted)
+    return LKFS
+
+
 def pitch_shift(
     waveform: Tensor,
     sample_rate: int,