From 058fbf102b6f9c905ef1b50764f1d6b381d3048e Mon Sep 17 00:00:00 2001
From: Sanchit <sanchit@cs.wisc.edu>
Date: Wed, 17 Feb 2021 14:47:50 -0600
Subject: [PATCH 1/8] Added HTK format support to soxio's save function

---
 test/torchaudio_unittest/backend/sox_io/save_test.py |  6 ++++++
 torchaudio/backend/sox_io_backend.py                 |  5 ++++-
 torchaudio/csrc/sox/types.cpp                        |  2 ++
 torchaudio/csrc/sox/types.h                          |  1 +
 torchaudio/csrc/sox/utils.cpp                        | 10 ++++++++++
 5 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py
index dca9c3ad9c..17d2130cb6 100644
--- a/test/torchaudio_unittest/backend/sox_io/save_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/save_test.py
@@ -237,6 +237,12 @@ def test_save_flac(self, test_mode, bits_per_sample, compression_level):
             "flac", compression=compression_level,
             bits_per_sample=bits_per_sample, test_mode=test_mode)
 
+    @nested_params(
+        ["path", "fileobj", "bytesio"],
+    )
+    def test_save_hkt(self, test_mode, ):
+        self.assert_save_consistency("hkt", test_mode=test_mode, num_channels=1)
+
     @nested_params(
         ["path", "fileobj", "bytesio"],
         [
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 16abc70deb..d3d0bf5f3f 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -195,7 +195,7 @@ def save(
             When ``filepath`` argument is file-like object, this argument is required.
 
             Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
-            ``"amb"``, ``"flac"`` and ``"sph"``.
+            ``"amb"``, ``"flac"``, ``"sph"``, and ``"htk"``.
         encoding (str, optional): Changes the encoding for the supported formats.
             This argument is effective only for supported formats, cush as ``"wav"``, ``""amb"``
             and ``"sph"``. Valid values are;
@@ -291,6 +291,9 @@ def save(
     ``"amr-nb"``
         Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s
 
+    ``"htk"``
+        Uses its default Single channel 16-bit PCM format.
+
     Note:
         To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
         ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
diff --git a/torchaudio/csrc/sox/types.cpp b/torchaudio/csrc/sox/types.cpp
index 51e8e720d6..49e6762fdc 100644
--- a/torchaudio/csrc/sox/types.cpp
+++ b/torchaudio/csrc/sox/types.cpp
@@ -20,6 +20,8 @@ Format get_format_from_string(const std::string& format) {
     return Format::AMB;
   if (format == "sph")
     return Format::SPHERE;
+  if (format == "htk")
+    return Format::HTK;
   std::ostringstream stream;
   stream << "Internal Error: unexpected format value: " << format;
   throw std::runtime_error(stream.str());
diff --git a/torchaudio/csrc/sox/types.h b/torchaudio/csrc/sox/types.h
index f3ed637478..192330cdb0 100644
--- a/torchaudio/csrc/sox/types.h
+++ b/torchaudio/csrc/sox/types.h
@@ -15,6 +15,7 @@ enum class Format {
   AMR_WB,
   AMB,
   SPHERE,
+  HTK,
 };
 
 Format get_format_from_string(const std::string& format);
diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index 99a264642f..e9cea37359 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -314,6 +314,13 @@ std::tuple<sox_encoding_t, unsigned> get_save_encoding(
         throw std::runtime_error(
             "mp3 does not support `bits_per_sample` option.");
       return std::make_tuple<>(SOX_ENCODING_MP3, 16);
+    case Format::HTK:
+      if (enc != Encoding::NOT_PROVIDED)
+        throw std::runtime_error("htk does not support `encoding` option.");
+      if (bps != BitDepth::NOT_PROVIDED)
+        throw std::runtime_error(
+            "htk does not support `bits_per_sample` option.");
+      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
     case Format::VORBIS:
       if (enc != Encoding::NOT_PROVIDED)
         throw std::runtime_error("vorbis does not support `encoding` option.");
@@ -409,6 +416,9 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) {
   if (filetype == "amr-nb") {
     return 16;
   }
+  if (filetype == "htk") {
+    return 16;
+  }
   throw std::runtime_error("Unsupported file type: " + filetype);
 }
 

From eeb5731de6c7705580c86f953c3b34f83aa83dc3 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Wed, 17 Feb 2021 14:59:48 -0600
Subject: [PATCH 2/8] Fix typo

---
 test/torchaudio_unittest/backend/sox_io/save_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py
index 17d2130cb6..b1c8697f91 100644
--- a/test/torchaudio_unittest/backend/sox_io/save_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/save_test.py
@@ -240,8 +240,8 @@ def test_save_flac(self, test_mode, bits_per_sample, compression_level):
     @nested_params(
         ["path", "fileobj", "bytesio"],
     )
-    def test_save_hkt(self, test_mode, ):
-        self.assert_save_consistency("hkt", test_mode=test_mode, num_channels=1)
+    def test_save_htk(self, test_mode, ):
+        self.assert_save_consistency("htk", test_mode=test_mode, num_channels=1)
 
     @nested_params(
         ["path", "fileobj", "bytesio"],

From fec3ca3d3645b64f8d4399628a0f1470e6c6c38b Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Wed, 17 Feb 2021 15:07:45 -0600
Subject: [PATCH 3/8] Fix another typo :/

---
 test/torchaudio_unittest/backend/sox_io/save_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/save_test.py b/test/torchaudio_unittest/backend/sox_io/save_test.py
index b1c8697f91..349042b1a3 100644
--- a/test/torchaudio_unittest/backend/sox_io/save_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/save_test.py
@@ -240,7 +240,7 @@ def test_save_flac(self, test_mode, bits_per_sample, compression_level):
     @nested_params(
         ["path", "fileobj", "bytesio"],
     )
-    def test_save_htk(self, test_mode, ):
+    def test_save_htk(self, test_mode):
         self.assert_save_consistency("htk", test_mode=test_mode, num_channels=1)
 
     @nested_params(

From 282caa35ec6036d4bf06493d701b9376f2d4a3b6 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Wed, 17 Feb 2021 15:32:20 -0600
Subject: [PATCH 4/8] Edit comment to trigger CI

Edit comment to trigger CI
---
 torchaudio/backend/sox_io_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index d3d0bf5f3f..626def6772 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -292,7 +292,7 @@ def save(
         Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s
 
     ``"htk"``
-        Uses its default Single channel 16-bit PCM format.
+        Uses its default single-channel 16-bit PCM format.
 
     Note:
         To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,

From 3db2aeab4c193ffd73e5341cedecec3799e764f2 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Thu, 18 Feb 2021 11:06:22 -0600
Subject: [PATCH 5/8] Changed merge-order to simplify diff

---
 torchaudio/csrc/sox/utils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchaudio/csrc/sox/utils.cpp b/torchaudio/csrc/sox/utils.cpp
index b75d89275b..83bb31bc9c 100644
--- a/torchaudio/csrc/sox/utils.cpp
+++ b/torchaudio/csrc/sox/utils.cpp
@@ -424,10 +424,10 @@ unsigned get_precision(const std::string filetype, caffe2::TypeMeta dtype) {
   if (filetype == "amr-nb") {
     return 16;
   }
-  if (filetype == "htk") {
+  if (filetype == "gsm") {
     return 16;
   }
-  if (filetype == "gsm") {
+  if (filetype == "htk") {
     return 16;
   }
   throw std::runtime_error("Unsupported file type: " + filetype);

From 4c98e5ed1c268d9e746239944d9d379b5cd4caad Mon Sep 17 00:00:00 2001
From: Sanchit <sanchit@cs.wisc.edu>
Date: Thu, 18 Feb 2021 11:41:45 -0600
Subject: [PATCH 6/8] Added info test for htk

---
 .../backend/sox_io/info_test.py                 | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
index a2a93648a1..5cf4881099 100644
--- a/test/torchaudio_unittest/backend/sox_io/info_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -205,7 +205,7 @@ def test_ulaw(self):
         assert info.encoding == "ULAW"
 
     def test_alaw(self):
-        """`sox_io_backend.info` can check ulaw file correctly"""
+        """`sox_io_backend.info` can check alaw file correctly"""
         duration = 1
         num_channels = 1
         sample_rate = 8000
@@ -221,6 +221,21 @@ def test_alaw(self):
         assert info.bits_per_sample == 8
         assert info.encoding == "ALAW"
 
+    def test_htk(self):
+        """`sox_io_backend.info` can check HTK file correctly"""
+        duration = 1
+        num_channels = 1
+        sample_rate = 8000
+        path = self.get_temp_path('data.wav')
+        sox_utils.gen_audio_file(
+            path, sample_rate=sample_rate, num_channels=num_channels,
+            bit_depth=16, duration=duration)
+        info = sox_io_backend.info(path)
+        assert info.sample_rate == sample_rate
+        assert info.num_frames == sample_rate * duration
+        assert info.num_channels == num_channels
+        assert info.bits_per_sample == 16
+        assert info.encoding == "PCM_S"
 
 @skipIfNoExtension
 class TestInfoOpus(PytorchTestCase):

From 1a96f76d5d3139d461707f9a5e42d3b93deea023 Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Thu, 18 Feb 2021 11:58:39 -0600
Subject: [PATCH 7/8] Fix typo & add newline

---
 test/torchaudio_unittest/backend/sox_io/info_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/torchaudio_unittest/backend/sox_io/info_test.py b/test/torchaudio_unittest/backend/sox_io/info_test.py
index 5cf4881099..8701414f6e 100644
--- a/test/torchaudio_unittest/backend/sox_io/info_test.py
+++ b/test/torchaudio_unittest/backend/sox_io/info_test.py
@@ -226,7 +226,7 @@ def test_htk(self):
         duration = 1
         num_channels = 1
         sample_rate = 8000
-        path = self.get_temp_path('data.wav')
+        path = self.get_temp_path('data.htk')
         sox_utils.gen_audio_file(
             path, sample_rate=sample_rate, num_channels=num_channels,
             bit_depth=16, duration=duration)
@@ -237,6 +237,7 @@ def test_htk(self):
         assert info.bits_per_sample == 16
         assert info.encoding == "PCM_S"
 
+
 @skipIfNoExtension
 class TestInfoOpus(PytorchTestCase):
     @parameterized.expand(list(itertools.product(

From 91e611905bc23556ffbc0027dbf0863b10b9701e Mon Sep 17 00:00:00 2001
From: SJ <76181208+imaginary-person@users.noreply.github.com>
Date: Thu, 18 Feb 2021 21:46:59 -0600
Subject: [PATCH 8/8] Update comment to trigger CI

Update comment to trigger CI
---
 torchaudio/backend/sox_io_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 54061ce1fe..54bacd5e5f 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -296,7 +296,7 @@ def save(
         Lossy Speech Compression, CPU intensive.
 
     ``"htk"``
-        Uses its default single-channel 16-bit PCM format.
+        Uses a default single-channel 16-bit PCM format.
 
     Note:
         To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,