From 9bc0ca4feb619dd06bea835d27254d0520c56555 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 12 Jan 2021 17:25:46 +0000 Subject: [PATCH 01/16] Added bits_per_sample field to sox_io backend --- torchaudio/backend/common.py | 4 +++- torchaudio/backend/sox_io_backend.py | 3 ++- torchaudio/csrc/sox/io.cpp | 13 ++++++++++--- torchaudio/csrc/sox/io.h | 5 ++++- torchaudio/csrc/sox/register.cpp | 3 ++- 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 135a18caee..21989dc182 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -12,11 +12,13 @@ class AudioMetaData: :ivar int sample_rate: Sample rate :ivar int num_frames: The number of frames :ivar int num_channels: The number of channels + :ivar int bits_per_sample: The number of bits per sample """ - def __init__(self, sample_rate: int, num_frames: int, num_channels: int): + def __init__(self, sample_rate: int, num_frames: int, num_channels: int, bits_per_sample: int = 0): self.sample_rate = sample_rate self.num_frames = num_frames self.num_channels = num_channels + self.bits_per_sample = bits_per_sample @_mod_utils.deprecated('Please migrate to `AudioMetaData`.', '0.9.0') diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 1e6d417cb8..cb70de75dd 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -32,7 +32,8 @@ def info( # Cast to str in case type is `pathlib.Path` filepath = str(filepath) sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format) - return AudioMetaData(sinfo.get_sample_rate(), sinfo.get_num_frames(), sinfo.get_num_channels()) + return AudioMetaData(sinfo.get_sample_rate(), sinfo.get_num_frames(), sinfo.get_num_channels(), + sinfo.get_bits_per_sample()) @_mod_utils.requires_module('torchaudio._torchaudio') diff --git a/torchaudio/csrc/sox/io.cpp b/torchaudio/csrc/sox/io.cpp index e381be14f8..1e69c61b1b 100644 --- a/torchaudio/csrc/sox/io.cpp +++ b/torchaudio/csrc/sox/io.cpp @@ -13,10 +13,12 @@ namespace sox_io { SignalInfo::SignalInfo( const int64_t sample_rate_, const int64_t num_channels_, - const int64_t num_frames_) + const int64_t num_frames_, + const int64_t bits_per_sample_) : sample_rate(sample_rate_), num_channels(num_channels_), - num_frames(num_frames_){}; + num_frames(num_frames_), + bits_per_sample(bits_per_sample_){}; int64_t SignalInfo::getSampleRate() const { return sample_rate; @@ -30,6 +32,10 @@ int64_t SignalInfo::getNumFrames() const { return num_frames; } +int64_t SignalInfo::getBitsPerSample() const { + return bits_per_sample; +} + c10::intrusive_ptr get_info( const std::string& path, c10::optional& format) { @@ -46,7 +52,8 @@ c10::intrusive_ptr get_info( return c10::make_intrusive( static_cast(sf->signal.rate), static_cast(sf->signal.channels), - static_cast(sf->signal.length / sf->signal.channels)); + static_cast(sf->signal.length / sf->signal.channels), + static_cast(sf->encoding.bits_per_sample)); } namespace { diff --git a/torchaudio/csrc/sox/io.h b/torchaudio/csrc/sox/io.h index d6e5310077..f7287255a4 100644 --- a/torchaudio/csrc/sox/io.h +++ b/torchaudio/csrc/sox/io.h @@ -15,14 +15,17 @@ struct SignalInfo : torch::CustomClassHolder { int64_t sample_rate; int64_t num_channels; int64_t num_frames; + int64_t bits_per_sample; SignalInfo( const int64_t sample_rate_, const int64_t num_channels_, - const int64_t num_frames_); + const int64_t num_frames_, + const int64_t bits_per_sample_); int64_t getSampleRate() const; int64_t getNumChannels() const; int64_t getNumFrames() const; + int64_t getBitsPerSample() const; }; c10::intrusive_ptr get_info( diff --git a/torchaudio/csrc/sox/register.cpp b/torchaudio/csrc/sox/register.cpp index 7c65bebe2d..0f46af76d5 100644 --- a/torchaudio/csrc/sox/register.cpp +++ b/torchaudio/csrc/sox/register.cpp @@ -42,7 +42,8 @@ TORCH_LIBRARY_FRAGMENT(torchaudio, m) { m.class_("SignalInfo") .def("get_sample_rate", &torchaudio::sox_io::SignalInfo::getSampleRate) .def("get_num_channels", &torchaudio::sox_io::SignalInfo::getNumChannels) - .def("get_num_frames", &torchaudio::sox_io::SignalInfo::getNumFrames); + .def("get_num_frames", &torchaudio::sox_io::SignalInfo::getNumFrames) + .def("get_bits_per_sample", &torchaudio::sox_io::SignalInfo::getBitsPerSample); m.def("torchaudio::sox_io_get_info", &torchaudio::sox_io::get_info); m.def( From cd283042938ae8ceb01c59a0be7789220fe28e81 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 13 Jan 2021 15:50:18 +0000 Subject: [PATCH 02/16] Added tests for sox_io backend --- .../sox_io_backend/info_test.py | 23 +++++++++++++++---- torchaudio/backend/common.py | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/test/torchaudio_unittest/sox_io_backend/info_test.py b/test/torchaudio_unittest/sox_io_backend/info_test.py index 49fc797354..f25885bfb5 100644 --- a/test/torchaudio_unittest/sox_io_backend/info_test.py +++ b/test/torchaudio_unittest/sox_io_backend/info_test.py @@ -36,6 +36,7 @@ def test_wav(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) @parameterized.expand(list(itertools.product( ['float32', 'int32', 'int16', 'uint8'], @@ -52,6 +53,7 @@ def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) @parameterized.expand(list(itertools.product( [8000, 16000], @@ -71,6 +73,7 @@ def test_mp3(self, sample_rate, num_channels, bit_rate): # mp3 does not preserve the number of samples # assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats @parameterized.expand(list(itertools.product( [8000, 16000], @@ -89,6 +92,7 @@ def test_flac(self, sample_rate, num_channels, compression_level): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == 24 # FLAC standard @parameterized.expand(list(itertools.product( [8000, 16000], @@ -107,20 +111,23 @@ def test_vorbis(self, sample_rate, num_channels, quality_level): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats @parameterized.expand(list(itertools.product( [8000, 16000], [1, 2], + [16, 32], )), name_func=name_func) - def test_sphere(self, sample_rate, num_channels): + def test_sphere(self, sample_rate, num_channels, bits_per_sample): """`sox_io_backend.info` can check sph file correctly""" duration = 1 path = self.get_temp_path('data.sph') - sox_utils.gen_audio_file(path, sample_rate, num_channels, duration=duration) + sox_utils.gen_audio_file(path, sample_rate, num_channels, duration=duration, bit_depth=bits_per_sample) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == bits_per_sample @parameterized.expand(list(itertools.product( ['float32', 'int32', 'int16', 'uint8'], @@ -131,13 +138,15 @@ def test_amb(self, dtype, sample_rate, num_channels): """`sox_io_backend.info` can check amb file correctly""" duration = 1 path = self.get_temp_path('data.amb') + bits_per_sample = sox_utils.get_bit_depth(dtype) sox_utils.gen_audio_file( path, sample_rate, num_channels, - bit_depth=sox_utils.get_bit_depth(dtype), duration=duration) + bit_depth=bits_per_sample, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == bits_per_sample def test_amr_nb(self): """`sox_io_backend.info` can check amr-nb file correctly""" @@ -145,12 +154,16 @@ def test_amr_nb(self): num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.amr-nb') + bits_per_sample = 16 sox_utils.gen_audio_file( - path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=16, duration=duration) + path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=bits_per_sample, + duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.num_channels == num_channels + assert info.bits_per_sample == bits_per_sample @skipIfNoExtension @@ -167,6 +180,7 @@ def test_opus(self, bitrate, num_channels, compression_level): assert info.sample_rate == 48000 assert info.num_frames == 32768 assert info.num_channels == num_channels + assert info.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats @skipIfNoExtension @@ -184,3 +198,4 @@ def test_mp3(self): path = get_asset_path("mp3_without_ext") sinfo = sox_io_backend.info(path, format="mp3") assert sinfo.sample_rate == 16000 + assert sinfo.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 21989dc182..2fa43d8e0f 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -12,7 +12,7 @@ class AudioMetaData: :ivar int sample_rate: Sample rate :ivar int num_frames: The number of frames :ivar int num_channels: The number of channels - :ivar int bits_per_sample: The number of bits per sample + :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats. """ def __init__(self, sample_rate: int, num_frames: int, num_channels: int, bits_per_sample: int = 0): self.sample_rate = sample_rate From c29834c1a021f07134f674b639c5a2db37b42f1c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 13 Jan 2021 16:33:38 +0000 Subject: [PATCH 03/16] Added bits_per_sample support and tests to SoundFile backend --- .../soundfile_backend/common.py | 2 +- .../soundfile_backend/info_test.py | 13 +++++-- torchaudio/backend/_soundfile_backend.py | 39 ++++++++++++++++++- torchaudio/backend/common.py | 2 +- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/common.py b/test/torchaudio_unittest/soundfile_backend/common.py index 4da0873fa1..8f991fb0f8 100644 --- a/test/torchaudio_unittest/soundfile_backend/common.py +++ b/test/torchaudio_unittest/soundfile_backend/common.py @@ -26,7 +26,7 @@ def skipIfFormatNotSupported(fmt): import soundfile fmts = soundfile.available_formats() - return skipIf(fmt not in fmts, f'"{fmt}" is not supported by sondfile') + return skipIf(fmt not in fmts, f'"{fmt}" is not supported by soundfile') return skipIf(True, '"soundfile" not available.') diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index 71acb20689..89e643b668 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -8,6 +8,7 @@ skipIfNoModule, get_wav_data, save_wav, + sox_utils, ) from .common import skipIfFormatNotSupported, parameterize @@ -32,6 +33,7 @@ def test_wav(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) @parameterize( ["float32", "int32", "int16", "uint8"], [8000, 16000], [4, 8, 16, 32], @@ -48,6 +50,7 @@ def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) @parameterize([8000, 16000], [1, 2]) @skipIfFormatNotSupported("FLAC") @@ -63,6 +66,7 @@ def test_flac(self, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == num_frames assert info.num_channels == num_channels + assert info.bits_per_sample == 24 @parameterize([8000, 16000], [1, 2]) @skipIfFormatNotSupported("OGG") @@ -78,18 +82,21 @@ def test_ogg(self, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.num_channels == 123 # TODO fix that (can't debug locally) - @parameterize([8000, 16000], [1, 2]) + @parameterize([8000, 16000], [1, 2], [('PCM_24', 24), ('PCM_32', 32)]) @skipIfFormatNotSupported("NIST") - def test_sphere(self, sample_rate, num_channels): + def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth): """`soundfile_backend.info` can check sph file correctly""" duration = 1 num_frames = sample_rate * duration data = torch.randn(num_frames, num_channels).numpy() path = self.get_temp_path("data.nist") - soundfile.write(path, data, sample_rate) + subtype, bits_per_sample = subtype_and_bit_depth + soundfile.write(path, data, sample_rate, subtype=subtype) info = soundfile_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels + assert info.bits_per_sample == bits_per_sample diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py index 719224b827..41fa94cd8f 100644 --- a/torchaudio/backend/_soundfile_backend.py +++ b/torchaudio/backend/_soundfile_backend.py @@ -11,6 +11,42 @@ import soundfile +# mapping from soundfile subtype to number of bits per sample. +# This is mostly heuristical and value is set to 0 when value is irrelevant +# (lossy formats) or can't be inferred. +# The dict is inspired from +# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 +SUBTYPE_TO_BITS_PER_SAMPLE = { + 'PCM_S8': 8, # Signed 8 bit data + 'PCM_16': 16, # Signed 16 bit data + 'PCM_24': 24, # Signed 24 bit data + 'PCM_32': 32, # Signed 32 bit data + 'PCM_U8': 8, # Unsigned 8 bit data (WAV and RAW only) + 'FLOAT': 32, # 32 bit float data + 'DOUBLE': 64, # 64 bit float data + 'ULAW': 0, # U-Law encoded. + 'ALAW': 0, # A-Law encoded. + 'IMA_ADPCM': 0, # IMA ADPCM. + 'MS_ADPCM': 0, # Microsoft ADPCM. + 'GSM610': 0, # GSM 6.10 encoding. + 'VOX_ADPCM': 0, # OKI / Dialogix ADPCM + 'G721_32': 0, # 32kbs G721 ADPCM encoding. + 'G723_24': 0, # 24kbs G723 ADPCM encoding. + 'G723_40': 0, # 40kbs G723 ADPCM encoding. + 'DWVW_12': 12, # 12 bit Delta Width Variable Word encoding. + 'DWVW_16': 16, # 16 bit Delta Width Variable Word encoding. + 'DWVW_24': 24, # 24 bit Delta Width Variable Word encoding. + 'DWVW_N': 0, # N bit Delta Width Variable Word encoding. + 'DPCM_8': 8, # 8 bit differential PCM (XI only) + 'DPCM_16': 16, # 16 bit differential PCM (XI only) + 'VORBIS': 0, # Xiph Vorbis encoding. + 'ALAC_16': 16, # Apple Lossless Audio Codec (16 bit). + 'ALAC_20': 20, # Apple Lossless Audio Codec (20 bit). + 'ALAC_24': 24, # Apple Lossless Audio Codec (24 bit). + 'ALAC_32': 32, # Apple Lossless Audio Codec (32 bit). +} + + @_mod_utils.requires_module("soundfile") def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: """Get signal information of an audio file. @@ -27,7 +63,8 @@ def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: AudioMetaData: meta data of the given audio. """ sinfo = soundfile.info(filepath) - return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels) + bits_per_sample = SUBTYPE_TO_BITS_PER_SAMPLE[sinfo.subtype] + return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels, bits_per_sample=bits_per_sample) _SUBTYPE2DTYPE = { diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 2fa43d8e0f..7cbfea027e 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -14,7 +14,7 @@ class AudioMetaData: :ivar int num_channels: The number of channels :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats. """ - def __init__(self, sample_rate: int, num_frames: int, num_channels: int, bits_per_sample: int = 0): + def __init__(self, sample_rate: int, num_frames: int, num_channels: int, bits_per_sample: int): self.sample_rate = sample_rate self.num_frames = num_frames self.num_channels = num_channels From f99885bb1c4bcda409dab08205e1db23d9e2e4f1 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 13 Jan 2021 16:46:32 +0000 Subject: [PATCH 04/16] fixed FLAC test --- test/torchaudio_unittest/soundfile_backend/info_test.py | 4 ++-- torchaudio/backend/_soundfile_backend.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index 89e643b668..0fb3f52e15 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -66,7 +66,7 @@ def test_flac(self, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == num_frames assert info.num_channels == num_channels - assert info.bits_per_sample == 24 + assert info.bits_per_sample == 16 @parameterize([8000, 16000], [1, 2]) @skipIfFormatNotSupported("OGG") @@ -82,7 +82,7 @@ def test_ogg(self, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.num_channels == 123 # TODO fix that (can't debug locally) + assert info.bits_per_sample == 123 # TODO fix that (can't debug locally) @parameterize([8000, 16000], [1, 2], [('PCM_24', 24), ('PCM_32', 32)]) @skipIfFormatNotSupported("NIST") diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py index 41fa94cd8f..3936aeb44d 100644 --- a/torchaudio/backend/_soundfile_backend.py +++ b/torchaudio/backend/_soundfile_backend.py @@ -11,9 +11,9 @@ import soundfile -# mapping from soundfile subtype to number of bits per sample. -# This is mostly heuristical and value is set to 0 when value is irrelevant -# (lossy formats) or can't be inferred. +# Mapping from soundfile subtype to number of bits per sample. +# This is mostly heuristical and the value is set to 0 when it is irrelevant +# (lossy formats) or when it can't be inferred. # The dict is inspired from # https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 SUBTYPE_TO_BITS_PER_SAMPLE = { From 1f5781d76e28ed3af6dc6ff738439c6191a0c347 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 13 Jan 2021 16:58:37 +0000 Subject: [PATCH 05/16] Fixed OGG test --- test/torchaudio_unittest/soundfile_backend/info_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index 0fb3f52e15..7471999f37 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -82,7 +82,7 @@ def test_ogg(self, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.bits_per_sample == 123 # TODO fix that (can't debug locally) + assert info.bits_per_sample == 0 @parameterize([8000, 16000], [1, 2], [('PCM_24', 24), ('PCM_32', 32)]) @skipIfFormatNotSupported("NIST") From c80cfd45cb4db7fa9dde403c9c40f3095fa2b74d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 15 Jan 2021 11:11:25 +0000 Subject: [PATCH 06/16] Addressed comments: Added links to bit depths + handled unknown subtype --- .../soundfile_backend/info_test.py | 31 +++++++++++++++++++ torchaudio/backend/_soundfile_backend.py | 24 ++++++++++---- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index 7471999f37..bf0e7cd26f 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -12,6 +12,9 @@ ) from .common import skipIfFormatNotSupported, parameterize +import pytest +from _pytest.monkeypatch import MonkeyPatch + if _mod_utils.is_module_available("soundfile"): import soundfile @@ -100,3 +103,31 @@ def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth): assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.bits_per_sample == bits_per_sample + + def test_unknown_subtype_warning(self): + """soundfile_backend.info issues a warning when the subtype is unknown + + This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE + dict should be updated. + """ + + soundfile_info_original = soundfile.info + + def info_wrapper(filepath): + # Wraps soundfile.info and sets the subtype to some unknown value + sinfo = soundfile_info_original(filepath) + sinfo.subtype = 'SOME_UNKNOWN_SUBTYPE' + return sinfo + + mp = MonkeyPatch() + mp.setattr(soundfile, "info", info_wrapper) + + path = self.get_temp_path("data.wav") + data = get_wav_data( + dtype='float32', num_channels=1, normalize=False, num_frames=16000 + ) + save_wav(path, data, sample_rate=16000) + with pytest.warns(UserWarning, match="subtype is unknown to TorchAudio"): + info = soundfile_backend.info(path) + assert info.bits_per_sample == 0 + mp.undo() diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py index 3936aeb44d..0631c7573a 100644 --- a/torchaudio/backend/_soundfile_backend.py +++ b/torchaudio/backend/_soundfile_backend.py @@ -14,9 +14,12 @@ # Mapping from soundfile subtype to number of bits per sample. # This is mostly heuristical and the value is set to 0 when it is irrelevant # (lossy formats) or when it can't be inferred. +# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard: +# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony, +# the default seems to be 8 bits but it can be compressed further to 4 bits. # The dict is inspired from # https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94 -SUBTYPE_TO_BITS_PER_SAMPLE = { +_SUBTYPE_TO_BITS_PER_SAMPLE = { 'PCM_S8': 8, # Signed 8 bit data 'PCM_16': 16, # Signed 16 bit data 'PCM_24': 24, # Signed 24 bit data @@ -24,11 +27,11 @@ 'PCM_U8': 8, # Unsigned 8 bit data (WAV and RAW only) 'FLOAT': 32, # 32 bit float data 'DOUBLE': 64, # 64 bit float data - 'ULAW': 0, # U-Law encoded. - 'ALAW': 0, # A-Law encoded. + 'ULAW': 8, # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types + 'ALAW': 8, # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types 'IMA_ADPCM': 0, # IMA ADPCM. 'MS_ADPCM': 0, # Microsoft ADPCM. - 'GSM610': 0, # GSM 6.10 encoding. + 'GSM610': 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) 'VOX_ADPCM': 0, # OKI / Dialogix ADPCM 'G721_32': 0, # 32kbs G721 ADPCM encoding. 'G723_24': 0, # 24kbs G723 ADPCM encoding. @@ -39,7 +42,7 @@ 'DWVW_N': 0, # N bit Delta Width Variable Word encoding. 'DPCM_8': 8, # 8 bit differential PCM (XI only) 'DPCM_16': 16, # 16 bit differential PCM (XI only) - 'VORBIS': 0, # Xiph Vorbis encoding. + 'VORBIS': 0, # Xiph Vorbis encoding. (lossy) 'ALAC_16': 16, # Apple Lossless Audio Codec (16 bit). 'ALAC_20': 20, # Apple Lossless Audio Codec (20 bit). 'ALAC_24': 24, # Apple Lossless Audio Codec (24 bit). @@ -63,7 +66,16 @@ def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: AudioMetaData: meta data of the given audio. """ sinfo = soundfile.info(filepath) - bits_per_sample = SUBTYPE_TO_BITS_PER_SAMPLE[sinfo.subtype] + try: + bits_per_sample = _SUBTYPE_TO_BITS_PER_SAMPLE[sinfo.subtype] + except KeyError: + bits_per_sample = 0 + warnings.warn( + "The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " + "attribute will be set to 0. If you are seeing this warning, please " + "report by opening an issue on github (after checking for existing/closed ones). " + "You may otherwise ignore this warning." + ) return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels, bits_per_sample=bits_per_sample) From 49d2c7c3a91b933fa075949a5353df125cfdbe19 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 15 Jan 2021 11:15:41 +0000 Subject: [PATCH 07/16] Document bits_per_sample=0 --- torchaudio/backend/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 7cbfea027e..f9c6585bf8 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -12,7 +12,8 @@ class AudioMetaData: :ivar int sample_rate: Sample rate :ivar int num_frames: The number of frames :ivar int num_channels: The number of channels - :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats. + :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats, + or when it cannot be accurately inferred. """ def __init__(self, sample_rate: int, num_frames: int, num_channels: int, bits_per_sample: int): self.sample_rate = sample_rate From c64ea122e720da8913b9efb947573bac21024fc4 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 15 Jan 2021 13:16:18 +0000 Subject: [PATCH 08/16] fix amr-nb test --- test/torchaudio_unittest/sox_io_backend/info_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/torchaudio_unittest/sox_io_backend/info_test.py b/test/torchaudio_unittest/sox_io_backend/info_test.py index f25885bfb5..fc04062091 100644 --- a/test/torchaudio_unittest/sox_io_backend/info_test.py +++ b/test/torchaudio_unittest/sox_io_backend/info_test.py @@ -154,16 +154,13 @@ def test_amr_nb(self): num_channels = 1 sample_rate = 8000 path = self.get_temp_path('data.amr-nb') - bits_per_sample = 16 sox_utils.gen_audio_file( - path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=bits_per_sample, - duration=duration) + path, sample_rate=sample_rate, num_channels=num_channels, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels assert info.num_channels == num_channels - assert info.bits_per_sample == bits_per_sample @skipIfNoExtension From 01c26f9526ea35978428efa12f7cf0fe4b356217 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 18 Jan 2021 10:07:00 +0000 Subject: [PATCH 09/16] Addressed comments --- torchaudio/backend/_soundfile_backend.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py index 0631c7573a..844d66c77f 100644 --- a/torchaudio/backend/_soundfile_backend.py +++ b/torchaudio/backend/_soundfile_backend.py @@ -66,16 +66,14 @@ def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: AudioMetaData: meta data of the given audio. """ sinfo = soundfile.info(filepath) - try: - bits_per_sample = _SUBTYPE_TO_BITS_PER_SAMPLE[sinfo.subtype] - except KeyError: - bits_per_sample = 0 + if sinfo.subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: warnings.warn( "The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " "attribute will be set to 0. If you are seeing this warning, please " "report by opening an issue on github (after checking for existing/closed ones). " - "You may otherwise ignore this warning." + "You may otherwise ignore this warning.".format(subtype=sinfo.subtype) ) + bits_per_sample = _SUBTYPE_TO_BITS_PER_SAMPLE.get(sinfo.subtype, 0) return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels, bits_per_sample=bits_per_sample) From c91d0a36b7dba72fdfbbe7ad1ba1156a1918d461 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 18 Jan 2021 10:46:14 +0000 Subject: [PATCH 10/16] Use f-strings instead of .format() --- torchaudio/backend/_soundfile_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchaudio/backend/_soundfile_backend.py b/torchaudio/backend/_soundfile_backend.py index b34bd22b9f..54d97547f0 100644 --- a/torchaudio/backend/_soundfile_backend.py +++ b/torchaudio/backend/_soundfile_backend.py @@ -68,10 +68,10 @@ def info(filepath: str, format: Optional[str] = None) -> AudioMetaData: sinfo = soundfile.info(filepath) if sinfo.subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: warnings.warn( - "The {subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " + f"The {sinfo.subtype} subtype is unknown to TorchAudio. As a result, the bits_per_sample " "attribute will be set to 0. If you are seeing this warning, please " "report by opening an issue on github (after checking for existing/closed ones). " - "You may otherwise ignore this warning.".format(subtype=sinfo.subtype) + "You may otherwise ignore this warning." ) bits_per_sample = _SUBTYPE_TO_BITS_PER_SAMPLE.get(sinfo.subtype, 0) return AudioMetaData(sinfo.samplerate, sinfo.frames, sinfo.channels, bits_per_sample=bits_per_sample) From 04480b5847b112df4794c9ed25ff72ebdfe6276d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 19 Jan 2021 18:42:00 +0000 Subject: [PATCH 11/16] Addressed comments --- .../soundfile_backend/info_test.py | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index bf0e7cd26f..af30437e29 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -8,7 +8,6 @@ skipIfNoModule, get_wav_data, save_wav, - sox_utils, ) from .common import skipIfFormatNotSupported, parameterize @@ -22,10 +21,11 @@ @skipIfNoModule("soundfile") class TestInfo(TempDirMixin, PytorchTestCase): @parameterize( - ["float32", "int32", "int16", "uint8"], [8000, 16000], [1, 2], + [("float32", 32), ("int32", 32), ("int16", 16), ("uint8", 8)], [8000, 16000], [1, 2], ) - def test_wav(self, dtype, sample_rate, num_channels): + def test_wav(self, dtype_and_bit_depth, sample_rate, num_channels): """`soundfile_backend.info` can check wav file correctly""" + dtype, bits_per_sample = dtype_and_bit_depth duration = 1 path = self.get_temp_path("data.wav") data = get_wav_data( @@ -36,13 +36,14 @@ def test_wav(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) + assert info.bits_per_sample == bits_per_sample @parameterize( - ["float32", "int32", "int16", "uint8"], [8000, 16000], [4, 8, 16, 32], + [("float32", 32), ("int32", 32), ("int16", 16), ("uint8", 8)], [8000, 16000], [1, 2], ) - def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): + def test_wav_multiple_channels(self, dtype_and_bit_depth, sample_rate, num_channels): """`soundfile_backend.info` can check wav file with channels more than 2 correctly""" + dtype, bits_per_sample = dtype_and_bit_depth duration = 1 path = self.get_temp_path("data.wav") data = get_wav_data( @@ -53,7 +54,7 @@ def test_wav_multiple_channels(self, dtype, sample_rate, num_channels): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.bits_per_sample == sox_utils.get_bit_depth(dtype) + assert info.bits_per_sample == bits_per_sample @parameterize([8000, 16000], [1, 2]) @skipIfFormatNotSupported("FLAC") @@ -104,30 +105,29 @@ def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth): assert info.num_channels == num_channels assert info.bits_per_sample == bits_per_sample - def test_unknown_subtype_warning(self): - """soundfile_backend.info issues a warning when the subtype is unknown - This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE - dict should be updated. - """ +def test_unknown_subtype_warning(tmp_path, monkeypatch): + """soundfile_backend.info issues a warning when the subtype is unknown - soundfile_info_original = soundfile.info + This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE + dict should be updated. + """ - def info_wrapper(filepath): - # Wraps soundfile.info and sets the subtype to some unknown value - sinfo = soundfile_info_original(filepath) - sinfo.subtype = 'SOME_UNKNOWN_SUBTYPE' - return sinfo + soundfile_info_original = soundfile.info - mp = MonkeyPatch() - mp.setattr(soundfile, "info", info_wrapper) + def info_wrapper(filepath): + # Wraps soundfile.info and sets the subtype to some unknown value + sinfo = soundfile_info_original(filepath) + sinfo.subtype = 'SOME_UNKNOWN_SUBTYPE' + return sinfo - path = self.get_temp_path("data.wav") - data = get_wav_data( - dtype='float32', num_channels=1, normalize=False, num_frames=16000 - ) - save_wav(path, data, sample_rate=16000) - with pytest.warns(UserWarning, match="subtype is unknown to TorchAudio"): - info = soundfile_backend.info(path) - assert info.bits_per_sample == 0 - mp.undo() + monkeypatch.setattr(soundfile, "info", info_wrapper) + + data = get_wav_data( + dtype='float32', num_channels=1, normalize=False, num_frames=16000 + ) + path = tmp_path / 'data.wav' + save_wav(path, data, sample_rate=16000) + with pytest.warns(UserWarning, match="subtype is unknown to TorchAudio"): + info = soundfile_backend.info(path) + assert info.bits_per_sample == 0 From 04b13781ae8d8cdbee39f78b899f8ba4e83755ab Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 19 Jan 2021 18:44:17 +0000 Subject: [PATCH 12/16] remove unused import --- test/torchaudio_unittest/soundfile_backend/info_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index af30437e29..56067853ff 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -12,7 +12,6 @@ from .common import skipIfFormatNotSupported, parameterize import pytest -from _pytest.monkeypatch import MonkeyPatch if _mod_utils.is_module_available("soundfile"): import soundfile From 1d30db6716a2352bfc6c54debe5899d86b6d8d3d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 20 Jan 2021 08:48:14 +0000 Subject: [PATCH 13/16] remove usage of pytest --- .../soundfile_backend/info_test.py | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index 56067853ff..b17ae9d60a 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -1,3 +1,6 @@ +from unittest.mock import patch +import warnings + import torch from torchaudio.backend import _soundfile_backend as soundfile_backend from torchaudio._internal import module_utils as _mod_utils @@ -11,8 +14,6 @@ ) from .common import skipIfFormatNotSupported, parameterize -import pytest - if _mod_utils.is_module_available("soundfile"): import soundfile @@ -104,29 +105,25 @@ def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth): assert info.num_channels == num_channels assert info.bits_per_sample == bits_per_sample + def test_unknown_subtype_warning(self): + """soundfile_backend.info issues a warning when the subtype is unknown -def test_unknown_subtype_warning(tmp_path, monkeypatch): - """soundfile_backend.info issues a warning when the subtype is unknown - - This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE - dict should be updated. - """ - - soundfile_info_original = soundfile.info - - def info_wrapper(filepath): - # Wraps soundfile.info and sets the subtype to some unknown value - sinfo = soundfile_info_original(filepath) - sinfo.subtype = 'SOME_UNKNOWN_SUBTYPE' - return sinfo - - monkeypatch.setattr(soundfile, "info", info_wrapper) - - data = get_wav_data( - dtype='float32', num_channels=1, normalize=False, num_frames=16000 - ) - path = tmp_path / 'data.wav' - save_wav(path, data, sample_rate=16000) - with pytest.warns(UserWarning, match="subtype is unknown to TorchAudio"): - info = soundfile_backend.info(path) - assert info.bits_per_sample == 0 + This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE + dict should be updated. + """ + dtype, sample_rate, duration, num_channels = 'float32', 16000, 1, 1 + duration = 1 + path = self.get_temp_path("data.wav") + data = get_wav_data( + dtype, num_channels, normalize=False, num_frames=duration * sample_rate + ) + save_wav(path, data, sample_rate=16000) + + # We pretend the _SUBTYPE_TO_BITS_PER_SAMPLE dict knows no subtype / is empty. + # It's easier to mock the internal dict rather than to mock the entire soundfile.info function + with patch.dict(soundfile_backend._SUBTYPE_TO_BITS_PER_SAMPLE, {}, clear=True): + with warnings.catch_warnings(record=True) as w: + info = soundfile_backend.info(path) + assert len(w) == 1 + assert "subtype is unknown to TorchAudio" in str(w[-1].message) + assert info.bits_per_sample == 0 From 41d202093c7e68c177576113c5c761d378f3a521 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 21 Jan 2021 09:33:57 +0000 Subject: [PATCH 14/16] Addressed comments --- .../soundfile_backend/info_test.py | 26 +++++++++---------- .../sox_io_backend/info_test.py | 5 ++-- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/test/torchaudio_unittest/soundfile_backend/info_test.py b/test/torchaudio_unittest/soundfile_backend/info_test.py index b17ae9d60a..05c9ddc3ce 100644 --- a/test/torchaudio_unittest/soundfile_backend/info_test.py +++ b/test/torchaudio_unittest/soundfile_backend/info_test.py @@ -111,19 +111,17 @@ def test_unknown_subtype_warning(self): This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE dict should be updated. """ - dtype, sample_rate, duration, num_channels = 'float32', 16000, 1, 1 - duration = 1 - path = self.get_temp_path("data.wav") - data = get_wav_data( - dtype, num_channels, normalize=False, num_frames=duration * sample_rate - ) - save_wav(path, data, sample_rate=16000) + def _mock_info_func(_): + class MockSoundFileInfo: + samplerate = 8000 + frames = 356 + channels = 2 + subtype = 'UNSEEN_SUBTYPE' + return MockSoundFileInfo() - # We pretend the _SUBTYPE_TO_BITS_PER_SAMPLE dict knows no subtype / is empty. - # It's easier to mock the internal dict rather than to mock the entire soundfile.info function - with patch.dict(soundfile_backend._SUBTYPE_TO_BITS_PER_SAMPLE, {}, clear=True): + with patch("soundfile.info", _mock_info_func): with warnings.catch_warnings(record=True) as w: - info = soundfile_backend.info(path) - assert len(w) == 1 - assert "subtype is unknown to TorchAudio" in str(w[-1].message) - assert info.bits_per_sample == 0 + info = soundfile_backend.info("foo") + assert len(w) == 1 + assert "UNSEEN_SUBTYPE subtype is unknown to TorchAudio" in str(w[-1].message) + assert info.bits_per_sample == 0 diff --git a/test/torchaudio_unittest/sox_io_backend/info_test.py b/test/torchaudio_unittest/sox_io_backend/info_test.py index fc04062091..66da0a2e6c 100644 --- a/test/torchaudio_unittest/sox_io_backend/info_test.py +++ b/test/torchaudio_unittest/sox_io_backend/info_test.py @@ -155,12 +155,13 @@ def test_amr_nb(self): sample_rate = 8000 path = self.get_temp_path('data.amr-nb') sox_utils.gen_audio_file( - path, sample_rate=sample_rate, num_channels=num_channels, duration=duration) + path, sample_rate=sample_rate, num_channel=num_channels, bit_depth=16, + duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.num_channels == num_channels + assert info.bits_per_sample == 16 @skipIfNoExtension From ffee30aeb281540e1be6e021289ca80363daa545 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 21 Jan 2021 09:57:46 +0000 Subject: [PATCH 15/16] use proper param name --- test/torchaudio_unittest/sox_io_backend/info_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/sox_io_backend/info_test.py b/test/torchaudio_unittest/sox_io_backend/info_test.py index 66da0a2e6c..d6b38cfd70 100644 --- a/test/torchaudio_unittest/sox_io_backend/info_test.py +++ b/test/torchaudio_unittest/sox_io_backend/info_test.py @@ -155,7 +155,7 @@ def test_amr_nb(self): sample_rate = 8000 path = self.get_temp_path('data.amr-nb') sox_utils.gen_audio_file( - path, sample_rate=sample_rate, num_channel=num_channels, bit_depth=16, + path, sample_rate=sample_rate, num_channels=num_channels, bit_depth=16, duration=duration) info = sox_io_backend.info(path) assert info.sample_rate == sample_rate From 532397f563d6f2e16aa3b960c1ba4d86f16506a6 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 21 Jan 2021 10:42:22 +0000 Subject: [PATCH 16/16] expected bps is 0 for amr_nb? --- test/torchaudio_unittest/sox_io_backend/info_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/sox_io_backend/info_test.py b/test/torchaudio_unittest/sox_io_backend/info_test.py index d6b38cfd70..2b3b8ffdb2 100644 --- a/test/torchaudio_unittest/sox_io_backend/info_test.py +++ b/test/torchaudio_unittest/sox_io_backend/info_test.py @@ -161,7 +161,7 @@ def test_amr_nb(self): assert info.sample_rate == sample_rate assert info.num_frames == sample_rate * duration assert info.num_channels == num_channels - assert info.bits_per_sample == 16 + assert info.bits_per_sample == 0 @skipIfNoExtension