From 566414c6e784e11aa1e3c157373ee50e81df9170 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Thu, 23 Jul 2020 15:00:25 +0000
Subject: [PATCH 1/3] Add sample_rate option to sox_io_backend.load

---
 .../sox_io_backend/load_test.py               | 44 +++++++++++++++++++
 torchaudio/backend/sox_io_backend.py          | 10 ++++-
 torchaudio/csrc/register.cpp                  |  3 ++
 torchaudio/csrc/sox_io.cpp                    | 18 ++++++++
 torchaudio/csrc/sox_io.h                      | 10 +++++
 5 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/test/torchaudio_unittest/sox_io_backend/load_test.py b/test/torchaudio_unittest/sox_io_backend/load_test.py
index 59bcaea386..5e3e0100e6 100644
--- a/test/torchaudio_unittest/sox_io_backend/load_test.py
+++ b/test/torchaudio_unittest/sox_io_backend/load_test.py
@@ -291,3 +291,47 @@ def test_channels_first(self, channels_first):
         found, _ = sox_io_backend.load(self.path, channels_first=channels_first)
         expected = self.original if channels_first else self.original.transpose(1, 0)
         self.assertEqual(found, expected)
+
+
+@skipIfNoExec('sox')
+@skipIfNoExtension
+class TestSampleRate(TempDirMixin, PytorchTestCase):
+    """Test the correctness of frame parameters of `sox_io_backend.load`"""
+    path = None
+
+    def setUp(self):
+        super().setUp()
+        sample_rate = 16000
+        original = get_wav_data('float32', num_channels=2)
+        self.path = self.get_temp_path('original.wave')
+        save_wav(self.path, original, sample_rate)
+
+    @parameterized.expand([(8000, ), (44100, )], name_func=name_func)
+    def test_sample_rate(self, sample_rate):
+        """sample_rate changes sample rate"""
+        found, rate = sox_io_backend.load(self.path, sample_rate=sample_rate)
+        ref_path = self.get_temp_path('reference.wav')
+        sox_utils.run_sox_effect(self.path, ref_path, ['rate', f'{sample_rate}'])
+        expected, expected_rate = load_wav(ref_path)
+
+        assert rate == expected_rate
+        self.assertEqual(found, expected)
+
+    @parameterized.expand(list(itertools.product(
+        [8000, 44100],
+        [0, 1, 10, 100, 1000],
+        [-1, 1, 10, 100, 1000],
+    )), name_func=name_func)
+    def test_frame(self, sample_rate, frame_offset, num_frames):
+        """frame_offset and num_frames applied after sample_rate"""
+        found, rate = sox_io_backend.load(
+            self.path, frame_offset=frame_offset, num_frames=num_frames, sample_rate=sample_rate)
+
+        ref_path = self.get_temp_path('reference.wav')
+        sox_utils.run_sox_effect(self.path, ref_path, ['rate', f'{sample_rate}'])
+        reference, expected_rate = load_wav(ref_path)
+        frame_end = None if num_frames == -1 else frame_offset + num_frames
+        expected = reference[:, frame_offset:frame_end]
+
+        assert rate == expected_rate
+        self.assertEqual(found, expected)
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 31e69c443e..ee17ba9283 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -40,6 +40,7 @@ def load(
         num_frames: int = -1,
         normalize: bool = True,
         channels_first: bool = True,
+        sample_rate: Optional[int] = None,
 ) -> Tuple[torch.Tensor, int]:
     """Load audio data from file.
 
@@ -84,11 +85,13 @@ def load(
             Path to audio file
         frame_offset (int):
             Number of frames to skip before start reading data.
+            If ``sample_rate`` is given, frame counts start after the audio is resampled.
         num_frames (int):
             Maximum number of frames to read. ``-1`` reads all the remaining samples,
             starting from ``frame_offset``.
             This function may return the less number of frames if there is not enough
             frames in the given file.
+            If ``sample_rate`` is given, frame counts start after the audio is resampled.
         normalize (bool):
             When ``True``, this function always return ``float32``, and sample values are
             normalized to ``[-1.0, 1.0]``.
@@ -98,6 +101,8 @@ def load(
         channels_first (bool):
             When True, the returned Tensor has dimension ``[channel, time]``.
             Otherwise, the returned Tensor's dimension is ``[time, channel]``.
+        sample_rate (int, optional):
+            Perform resampling.
 
     Returns:
         torch.Tensor:
@@ -105,8 +110,9 @@ def load(
             integer type, else ``float32`` type. If ``channels_first=True``, it has
             ``[channel, time]`` else ``[time, channel]``.
     """
-    signal = torch.ops.torchaudio.sox_io_load_audio_file(
-        filepath, frame_offset, num_frames, normalize, channels_first)
+    sample_rate = -1 if sample_rate is None else sample_rate
+    signal = torch.ops.torchaudio.sox_io_load_audio_file_v1(
+        filepath, frame_offset, num_frames, normalize, channels_first, sample_rate)
     return signal.get_tensor(), signal.get_sample_rate()
 
 
diff --git a/torchaudio/csrc/register.cpp b/torchaudio/csrc/register.cpp
index a1ec8f2254..63c9ffd1d4 100644
--- a/torchaudio/csrc/register.cpp
+++ b/torchaudio/csrc/register.cpp
@@ -51,6 +51,9 @@ TORCH_LIBRARY(torchaudio, m) {
   m.def(
       "torchaudio::sox_io_load_audio_file",
       &torchaudio::sox_io::load_audio_file);
+  m.def(
+      "torchaudio::sox_io_load_audio_file_v1",
+      &torchaudio::sox_io::load_audio_file_v1);
   m.def(
       "torchaudio::sox_io_save_audio_file",
       &torchaudio::sox_io::save_audio_file);
diff --git a/torchaudio/csrc/sox_io.cpp b/torchaudio/csrc/sox_io.cpp
index 4092d215d0..c7e4b8ad3c 100644
--- a/torchaudio/csrc/sox_io.cpp
+++ b/torchaudio/csrc/sox_io.cpp
@@ -53,6 +53,16 @@ c10::intrusive_ptr<TensorSignal> load_audio_file(
     const int64_t num_frames,
     const bool normalize,
     const bool channels_first) {
+  return load_audio_file_v1(path, frame_offset, num_frames, channels_first, -1);
+}
+
+c10::intrusive_ptr<TensorSignal> load_audio_file_v1(
+    const std::string& path,
+    const int64_t frame_offset,
+    const int64_t num_frames,
+    const bool normalize,
+    const bool channels_first,
+    const int64_t sample_rate) {
   if (frame_offset < 0) {
     throw std::runtime_error(
         "Invalid argument: frame_offset must be non-negative.");
@@ -61,8 +71,16 @@ c10::intrusive_ptr<TensorSignal> load_audio_file(
     throw std::runtime_error(
         "Invalid argument: num_frames must be -1 or greater than 0.");
   }
+  if (sample_rate == 0 || sample_rate < -1) {
+    throw std::runtime_error(
+        "Invalid argument: sample_rate must be -1 or greater than 0.");
+  }
 
   std::vector<std::vector<std::string>> effects;
+  if (sample_rate != -1) {
+    effects.emplace_back(
+        std::vector<std::string>{"rate", std::to_string(sample_rate)});
+  }
   if (num_frames != -1) {
     std::ostringstream offset, frames;
     offset << frame_offset << "s";
diff --git a/torchaudio/csrc/sox_io.h b/torchaudio/csrc/sox_io.h
index 5288e911e8..a92bbc43d4 100644
--- a/torchaudio/csrc/sox_io.h
+++ b/torchaudio/csrc/sox_io.h
@@ -23,6 +23,7 @@ struct SignalInfo : torch::CustomClassHolder {
 
 c10::intrusive_ptr<SignalInfo> get_info(const std::string& path);
 
+// ver. 0
 c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file(
     const std::string& path,
     const int64_t frame_offset = 0,
@@ -30,6 +31,15 @@ c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file(
     const bool normalize = true,
     const bool channels_first = true);
 
+// ver. 1 sample_rate is added
+c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file_v1(
+    const std::string& path,
+    const int64_t frame_offset = 0,
+    const int64_t num_frames = -1,
+    const bool normalize = true,
+    const bool channels_first = true,
+    const int64_t sample_rate = -1);
+
 void save_audio_file(
     const std::string& file_name,
     const c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal>& signal,

From 47aee13d91690405e78458aa9a5efee68c9fac48 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Fri, 28 Aug 2020 17:18:02 +0000
Subject: [PATCH 2/3] Remove BC code

---
 torchaudio/backend/sox_io_backend.py | 2 +-
 torchaudio/csrc/register.cpp         | 3 ---
 torchaudio/csrc/sox_io.cpp           | 9 ---------
 torchaudio/csrc/sox_io.h             | 9 ---------
 4 files changed, 1 insertion(+), 22 deletions(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index ee17ba9283..68738f6363 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -111,7 +111,7 @@ def load(
             ``[channel, time]`` else ``[time, channel]``.
     """
     sample_rate = -1 if sample_rate is None else sample_rate
-    signal = torch.ops.torchaudio.sox_io_load_audio_file_v1(
+    signal = torch.ops.torchaudio.sox_io_load_audio_file(
         filepath, frame_offset, num_frames, normalize, channels_first, sample_rate)
     return signal.get_tensor(), signal.get_sample_rate()
 
diff --git a/torchaudio/csrc/register.cpp b/torchaudio/csrc/register.cpp
index 63c9ffd1d4..a1ec8f2254 100644
--- a/torchaudio/csrc/register.cpp
+++ b/torchaudio/csrc/register.cpp
@@ -51,9 +51,6 @@ TORCH_LIBRARY(torchaudio, m) {
   m.def(
       "torchaudio::sox_io_load_audio_file",
       &torchaudio::sox_io::load_audio_file);
-  m.def(
-      "torchaudio::sox_io_load_audio_file_v1",
-      &torchaudio::sox_io::load_audio_file_v1);
   m.def(
       "torchaudio::sox_io_save_audio_file",
       &torchaudio::sox_io::save_audio_file);
diff --git a/torchaudio/csrc/sox_io.cpp b/torchaudio/csrc/sox_io.cpp
index c7e4b8ad3c..87f5dab3a4 100644
--- a/torchaudio/csrc/sox_io.cpp
+++ b/torchaudio/csrc/sox_io.cpp
@@ -48,15 +48,6 @@ c10::intrusive_ptr<SignalInfo> get_info(const std::string& path) {
 }
 
 c10::intrusive_ptr<TensorSignal> load_audio_file(
-    const std::string& path,
-    const int64_t frame_offset,
-    const int64_t num_frames,
-    const bool normalize,
-    const bool channels_first) {
-  return load_audio_file_v1(path, frame_offset, num_frames, channels_first, -1);
-}
-
-c10::intrusive_ptr<TensorSignal> load_audio_file_v1(
     const std::string& path,
     const int64_t frame_offset,
     const int64_t num_frames,
diff --git a/torchaudio/csrc/sox_io.h b/torchaudio/csrc/sox_io.h
index a92bbc43d4..020ee7aa79 100644
--- a/torchaudio/csrc/sox_io.h
+++ b/torchaudio/csrc/sox_io.h
@@ -23,16 +23,7 @@ struct SignalInfo : torch::CustomClassHolder {
 
 c10::intrusive_ptr<SignalInfo> get_info(const std::string& path);
 
-// ver. 0
 c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file(
-    const std::string& path,
-    const int64_t frame_offset = 0,
-    const int64_t num_frames = -1,
-    const bool normalize = true,
-    const bool channels_first = true);
-
-// ver. 1 sample_rate is added
-c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file_v1(
     const std::string& path,
     const int64_t frame_offset = 0,
     const int64_t num_frames = -1,

From 1e722d825e709bb778360248589ba0ee14d038b0 Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Fri, 28 Aug 2020 18:30:23 +0000
Subject: [PATCH 3/3] Revert "Remove BC code"

This reverts commit abbb52ebdbe6b82b2473a30cfd550e8812810646.
---
 torchaudio/backend/sox_io_backend.py | 2 +-
 torchaudio/csrc/register.cpp         | 3 +++
 torchaudio/csrc/sox_io.cpp           | 9 +++++++++
 torchaudio/csrc/sox_io.h             | 9 +++++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 68738f6363..ee17ba9283 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -111,7 +111,7 @@ def load(
             ``[channel, time]`` else ``[time, channel]``.
     """
     sample_rate = -1 if sample_rate is None else sample_rate
-    signal = torch.ops.torchaudio.sox_io_load_audio_file(
+    signal = torch.ops.torchaudio.sox_io_load_audio_file_v1(
         filepath, frame_offset, num_frames, normalize, channels_first, sample_rate)
     return signal.get_tensor(), signal.get_sample_rate()
 
diff --git a/torchaudio/csrc/register.cpp b/torchaudio/csrc/register.cpp
index a1ec8f2254..63c9ffd1d4 100644
--- a/torchaudio/csrc/register.cpp
+++ b/torchaudio/csrc/register.cpp
@@ -51,6 +51,9 @@ TORCH_LIBRARY(torchaudio, m) {
   m.def(
       "torchaudio::sox_io_load_audio_file",
       &torchaudio::sox_io::load_audio_file);
+  m.def(
+      "torchaudio::sox_io_load_audio_file_v1",
+      &torchaudio::sox_io::load_audio_file_v1);
   m.def(
       "torchaudio::sox_io_save_audio_file",
       &torchaudio::sox_io::save_audio_file);
diff --git a/torchaudio/csrc/sox_io.cpp b/torchaudio/csrc/sox_io.cpp
index 87f5dab3a4..c7e4b8ad3c 100644
--- a/torchaudio/csrc/sox_io.cpp
+++ b/torchaudio/csrc/sox_io.cpp
@@ -48,6 +48,15 @@ c10::intrusive_ptr<SignalInfo> get_info(const std::string& path) {
 }
 
 c10::intrusive_ptr<TensorSignal> load_audio_file(
+    const std::string& path,
+    const int64_t frame_offset,
+    const int64_t num_frames,
+    const bool normalize,
+    const bool channels_first) {
+  return load_audio_file_v1(path, frame_offset, num_frames, channels_first, -1);
+}
+
+c10::intrusive_ptr<TensorSignal> load_audio_file_v1(
     const std::string& path,
     const int64_t frame_offset,
     const int64_t num_frames,
diff --git a/torchaudio/csrc/sox_io.h b/torchaudio/csrc/sox_io.h
index 020ee7aa79..a92bbc43d4 100644
--- a/torchaudio/csrc/sox_io.h
+++ b/torchaudio/csrc/sox_io.h
@@ -23,7 +23,16 @@ struct SignalInfo : torch::CustomClassHolder {
 
 c10::intrusive_ptr<SignalInfo> get_info(const std::string& path);
 
+// ver. 0
 c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file(
+    const std::string& path,
+    const int64_t frame_offset = 0,
+    const int64_t num_frames = -1,
+    const bool normalize = true,
+    const bool channels_first = true);
+
+// ver. 1 sample_rate is added
+c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> load_audio_file_v1(
     const std::string& path,
     const int64_t frame_offset = 0,
     const int64_t num_frames = -1,