From 4ed1a346fae7b5bbd7ed32f01de07dbe6af40152 Mon Sep 17 00:00:00 2001 From: Prabhat Roy Date: Thu, 18 Feb 2021 04:55:34 +0000 Subject: [PATCH] Add GSM format support to sox_io's save function (#1275) (cherry picked from commit 490a53e5601bd71dfddbb3789958f5a18a08f475) --- test/torchaudio_unittest/backend/sox_io/save_test.py | 7 +++++++ torchaudio/backend/sox_io_backend.py | 5 ++++- torchaudio/csrc/sox/types.cpp | 2 ++ torchaudio/csrc/sox/types.h | 1 + torchaudio/csrc/sox/utils.cpp | 10 ++++++++++ 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py index dca9c3ad9c..5d3fdb03ca 100644 --- a/test/torchaudio_unittest/backend/sox_io/save_test.py +++ b/test/torchaudio_unittest/backend/sox_io/save_test.py @@ -310,6 +310,13 @@ def test_save_amr_nb(self, test_mode, bit_rate): self.assert_save_consistency( "amr-nb", compression=bit_rate, num_channels=1, test_mode=test_mode) + @nested_params( + ["path", "fileobj", "bytesio"], + ) + def test_save_gsm(self, test_mode): + self.assert_save_consistency( + "gsm", test_mode=test_mode) + @parameterized.expand([ ("wav", "PCM_S", 16), ("mp3", ), diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 16abc70deb..6f33de4e05 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -195,7 +195,7 @@ def save( When ``filepath`` argument is file-like object, this argument is required. Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``, - ``"amb"``, ``"flac"`` and ``"sph"``. + ``"amb"``, ``"flac"``, ``"sph"`` and ``"gsm"``. encoding (str, optional): Changes the encoding for the supported formats. This argument is effective only for supported formats, cush as ``"wav"``, ``""amb"`` and ``"sph"``. Valid values are; @@ -291,6 +291,9 @@ def save( ``"amr-nb"`` Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s + ``"gsm"`` + Lossy Speech Compression, CPU intensive. + Note: To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``, ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has diff --git a/torchaudio/csrc/sox/types.cpp b/torchaudio/csrc/sox/types.cpp index 51e8e720d6..59e9d320c4 100644 --- a/torchaudio/csrc/sox/types.cpp +++ b/torchaudio/csrc/sox/types.cpp @@ -20,6 +20,8 @@ Format get_format_from_string(const std::string& format) { return Format::AMB; if (format == "sph") return Format::SPHERE; + if (format == "gsm") + return Format::GSM; std::ostringstream stream; stream << "Internal Error: unexpected format value: " << format; throw std::runtime_error(stream.str()); diff --git a/torchaudio/csrc/sox/types.h b/torchaudio/csrc/sox/types.h index f3ed637478..f3a337407c 100644 --- a/torchaudio/csrc/sox/types.h +++ b/torchaudio/csrc/sox/types.h @@ -15,6 +15,7 @@ enum class Format { AMR_WB, AMB, SPHERE, + GSM, }; Format get_format_from_string(const std::string& format); diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp index 99a264642f..71bca54b7e 100644 --- a/torchaudio/csrc/sox/utils.cpp +++ b/torchaudio/csrc/sox/utils.cpp @@ -378,6 +378,14 @@ std::tuple get_save_encoding( throw std::runtime_error( "sph does not support encoding: " + encoding.value()); } + case Format::GSM: + if (enc != Encoding::NOT_PROVIDED) + throw std::runtime_error("gsm does not support `encoding` option."); + if (bps != BitDepth::NOT_PROVIDED) + throw std::runtime_error( + "gsm does not support `bits_per_sample` option."); + return std::make_tuple<>(SOX_ENCODING_GSM, 16); + default: throw std::runtime_error("Unsupported format: " + format); } @@ -409,6 +417,8 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) { if (filetype == "amr-nb") { return 16; } + if (filetype == "gsm") + return 16; throw std::runtime_error("Unsupported file type: " + filetype); }