From 3f07949802db985e348012664fd95bc6d62f169d Mon Sep 17 00:00:00 2001
From: Sanchit <sanchit@cs.wisc.edu>
Date: Tue, 23 Feb 2021 14:29:14 -0600
Subject: [PATCH 1/4] Cherrypicking 1276 & 1291 from master

---
 .../backend/sox_io/info_test.py                | 18 +++++++++++++++++-
 .../backend/sox_io/save_test.py                |  6 ++++++
 torchaudio/backend/sox_io_backend.py           |  5 ++++-
 torchaudio/csrc/sox/io.cpp                     | 10 ++++++++++
 torchaudio/csrc/sox/types.cpp                  |  2 ++
 torchaudio/csrc/sox/types.h                    |  1 +
 torchaudio/csrc/sox/utils.cpp                  | 13 ++++++++++++-
 7 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
index a2a93648a1..5be66c5e77 100644
--- a/test/torchaudio_unittest/backend/sox_io/info_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -205,7 +205,7 @@ def test_ulaw(self):
         assert info.encoding == "ULAW"
 
     def test_alaw(self):
-        """`sox_io_backend.info` can check ulaw file correctly"""
+        """`sox_io_backend.info` can check alaw file correctly"""
         duration = 1
         num_channels = 1
         sample_rate = 8000
@@ -220,6 +220,22 @@ def test_alaw(self):
         assert info.num_channels == num_channels
         assert info.bits_per_sample == 8
         assert info.encoding == "ALAW"
+        
+    def test_htk(self):
+        """`sox_io_backend.info` can check HTK file correctly"""
+        duration = 1
+        num_channels = 1
+        sample_rate = 8000
+        path = self.get_temp_path('data.htk')
+        sox_utils.gen_audio_file(
+            path, sample_rate=sample_rate, num_channels=num_channels,
+            bit_depth=16, duration=duration)
+        info = sox_io_backend.info(path)
+        assert info.sample_rate == sample_rate
+        assert info.num_frames == sample_rate * duration
+        assert info.num_channels == num_channels
+        assert info.bits_per_sample == 16
+        assert info.encoding == "PCM_S"
 
 
 @skipIfNoExtension
diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py
index 5d3fdb03ca..da9365a24a 100644
--- a/test/torchaudio_unittest/backend/sox_io/save_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/save_test.py
@@ -317,6 +317,12 @@ def test_save_gsm(self, test_mode):
         self.assert_save_consistency(
             "gsm", test_mode=test_mode)
 
+    @nested_params(
+        ["path", "fileobj", "bytesio"],
+    )
+    def test_save_htk(self, test_mode):
+        self.assert_save_consistency("htk", test_mode=test_mode, num_channels=1)
+
     @parameterized.expand([
         ("wav", "PCM_S", 16),
         ("mp3", ),
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 91b4474a9d..e175913daa 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -195,7 +195,7 @@ def save(
             When ``filepath`` argument is file-like object, this argument is required.
 
             Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
-            ``"amb"``, ``"flac"``, ``"sph"`` and ``"gsm"``.
+            ``"amb"``, ``"flac"``, ``"sph"``, ``"gsm"``, and ``"htk"``.
         encoding (str, optional): Changes the encoding for the supported formats.
             This argument is effective only for supported formats, such as ``"wav"``, ``""amb"``
             and ``"sph"``. Valid values are;
@@ -294,6 +294,9 @@ def save(
     ``"gsm"``
         Lossy Speech Compression, CPU intensive.
 
+    ``"htk"``
+        Uses a default single-channel 16-bit PCM format.
+
     Note:
         To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
         ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
diff --git a/torchaudio/csrc/sox/io.cpp b/torchaudio/csrc/sox/io.cpp
index 8bc520feba..4414e6ca5d 100644
--- a/torchaudio/csrc/sox/io.cpp
+++ b/torchaudio/csrc/sox/io.cpp
@@ -132,6 +132,10 @@ void save_audio_file(
     const auto num_channels = tensor.size(channels_first ? 0 : 1);
     TORCH_CHECK(
         num_channels == 1, "amr-nb format only supports single channel audio.");
+  } else if (filetype == "htk") {
+    const auto num_channels = tensor.size(channels_first ? 0 : 1);
+    TORCH_CHECK(
+        num_channels == 1, "htk format only supports single channel audio.");
   }
   const auto signal_info =
       get_signalinfo(&tensor, sample_rate, filetype, channels_first);
@@ -268,6 +272,12 @@ void save_audio_fileobj(
       throw std::runtime_error(
           "amr-nb format only supports single channel audio.");
     }
+  } else if (filetype == "htk") {
+    const auto num_channels = tensor.size(channels_first ? 0 : 1);
+    if (num_channels != 1) {
+      throw std::runtime_error(
+          "htk format only supports single channel audio.");
+    }
   }
   const auto signal_info =
       get_signalinfo(&tensor, sample_rate, filetype, channels_first);
diff --git a/torchaudio/csrc/sox/types.cpp b/torchaudio/csrc/sox/types.cpp
index 59e9d320c4..ef5ca40812 100644
--- a/torchaudio/csrc/sox/types.cpp
+++ b/torchaudio/csrc/sox/types.cpp
@@ -20,6 +20,8 @@ Format get_format_from_string(const std::string& format) {
     return Format::AMB;
   if (format == "sph")
     return Format::SPHERE;
+  if (format == "htk")
+    return Format::HTK;
   if (format == "gsm")
     return Format::GSM;
   std::ostringstream stream;
diff --git a/torchaudio/csrc/sox/types.h b/torchaudio/csrc/sox/types.h
index f3a337407c..c3512f42d4 100644
--- a/torchaudio/csrc/sox/types.h
+++ b/torchaudio/csrc/sox/types.h
@@ -16,6 +16,7 @@ enum class Format {
   AMB,
   SPHERE,
   GSM,
+  HTK,
 };
 
 Format get_format_from_string(const std::string& format);
diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index 71bca54b7e..cda2b3162c 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -314,6 +314,13 @@ std::tuple<sox_encoding_t, unsigned> get_save_encoding(
         throw std::runtime_error(
             "mp3 does not support `bits_per_sample` option.");
       return std::make_tuple<>(SOX_ENCODING_MP3, 16);
+    case Format::HTK:
+      if (enc != Encoding::NOT_PROVIDED)
+        throw std::runtime_error("htk does not support `encoding` option.");
+      if (bps != BitDepth::NOT_PROVIDED)
+        throw std::runtime_error(
+            "htk does not support `bits_per_sample` option.");
+      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
     case Format::VORBIS:
       if (enc != Encoding::NOT_PROVIDED)
         throw std::runtime_error("vorbis does not support `encoding` option.");
@@ -417,8 +424,12 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) {
   if (filetype == "amr-nb") {
     return 16;
   }
-  if (filetype == "gsm")
+  if (filetype == "gsm") {
+    return 16;      return 16;
+  }
+  if (filetype == "htk") {
     return 16;
+  }
   throw std::runtime_error("Unsupported file type: " + filetype);
 }
 

From 9e2d0a882417f9a98a30083349dd309b79faf713 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Tue, 23 Feb 2021 14:35:08 -0600
Subject: [PATCH 2/4] Fix typo

---
 torchaudio/csrc/sox/utils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index cda2b3162c..83bb31bc9c 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -425,7 +425,7 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) {
     return 16;
   }
   if (filetype == "gsm") {
-    return 16;      return 16;
+    return 16;
   }
   if (filetype == "htk") {
     return 16;

From 9b16f1a5255758800e759ea845d904cdc7831029 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Tue, 23 Feb 2021 14:42:29 -0600
Subject: [PATCH 3/4] Remove whitespace

---
 test/torchaudio_unittest/backend/sox_io/info_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
index 5be66c5e77..8701414f6e 100644
--- a/test/torchaudio_unittest/backend/sox_io/info_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -220,7 +220,7 @@ def test_alaw(self):
         assert info.num_channels == num_channels
         assert info.bits_per_sample == 8
         assert info.encoding == "ALAW"
-        
+
     def test_htk(self):
         """`sox_io_backend.info` can check HTK file correctly"""
         duration = 1

From 013a65723b47e03c9a8c970979a772d5cec1121f Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Tue, 23 Feb 2021 14:53:50 -0600
Subject: [PATCH 4/4] Edit comment to trigger CI

CI failure is due to CUDA unavailability at https://app.circleci.com/pipelines/github/pytorch/audio/5168/workflows/27fce196-7df5-46cc-b41b-50a656b86aab/jobs/169438.

So, editing a comment would trigger CI again.
---
 torchaudio/backend/sox_io_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index e175913daa..036aa5f4ac 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -295,7 +295,7 @@ def save(
         Lossy Speech Compression, CPU intensive.
 
     ``"htk"``
-        Uses a default single-channel 16-bit PCM format.
+        Uses its default single-channel 16-bit PCM format.
 
     Note:
         To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,