Add sox_utils module

mthrok · mthrok · commit 70763f44ff37 · 2020-07-14T20:01:28.000Z
diff --git a/test/utils/__init__.py b/test/utils/__init__.py
diff --git a/test/utils/test_sox_utils.py b/test/utils/test_sox_utils.py
@@ -0,0 +1,44 @@
+from torchaudio.utils import sox_utils
+
+from ..common_utils import (
+    PytorchTestCase,
+    skipIfNoExtension,
+)
+
+
+@skipIfNoExtension
+class TestSoxUtils(PytorchTestCase):
+    """Smoke tests for sox_util module"""
+    def test_set_seed(self):
+        """`set_seed` does not crush"""
+        sox_utils.set_seed(0)
+
+    def test_set_verbosity(self):
+        """`set_verbosity` does not crush"""
+        for val in range(6, 0, -1):
+            sox_utils.set_verbosity(val)
+
+    def test_set_buffer_size(self):
+        """`set_buffer_size` does not crush"""
+        sox_utils.set_buffer_size(131072)
+        # back to default
+        sox_utils.set_buffer_size(8192)
+
+    def test_set_use_threads(self):
+        """`set_use_threads` does not crush"""
+        sox_utils.set_use_threads(True)
+        # back to default
+        sox_utils.set_use_threads(False)
+
+    def test_list_effects(self):
+        """`list_effects` returns the list of available effects"""
+        effects = sox_utils.list_effects()
+        # We cannot infer what effects are available, so only check some of them.
+        assert 'highpass' in effects
+        assert 'phaser' in effects
+        assert 'gain' in effects
+
+    def test_list_formats(self):
+        """`list_formats` returns the list of supported formats"""
+        formats = sox_utils.list_formats()
+        assert 'wav' in formats
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
@@ -4,6 +4,7 @@
     compliance,
     datasets,
     kaldi_io,
+    utils,
     sox_effects,
     transforms
 )
diff --git a/torchaudio/csrc/register.cpp b/torchaudio/csrc/register.cpp
@@ -18,6 +18,17 @@ static auto registerTensorSignal =
         .def("get_sample_rate", &sox_utils::TensorSignal::getSampleRate)
         .def("get_channels_first", &sox_utils::TensorSignal::getChannelsFirst);
 
+static auto registerSetSoxOptions =
+    torch::RegisterOperators()
+        .op("torchaudio::sox_utils_set_seed", &sox_utils::set_seed)
+        .op("torchaudio::sox_utils_set_verbosity", &sox_utils::set_verbosity)
+        .op("torchaudio::sox_utils_set_use_threads",
+            &sox_utils::set_use_threads)
+        .op("torchaudio::sox_utils_set_buffer_size",
+            &sox_utils::set_buffer_size)
+        .op("torchaudio::sox_utils_list_effects", &sox_utils::list_effects)
+        .op("torchaudio::sox_utils_list_formats", &sox_utils::list_formats);
+
 ////////////////////////////////////////////////////////////////////////////////
 // sox_io.h
 ////////////////////////////////////////////////////////////////////////////////
@@ -53,12 +64,11 @@ static auto registerSaveAudioFile = torch::RegisterOperators().op(
 // sox_effects.h
 ////////////////////////////////////////////////////////////////////////////////
 static auto registerSoxEffects =
-    torch::RegisterOperators(
-        "torchaudio::sox_effects_initialize_sox_effects",
-        &sox_effects::initialize_sox_effects)
+    torch::RegisterOperators()
+        .op("torchaudio::sox_effects_initialize_sox_effects",
+            &sox_effects::initialize_sox_effects)
         .op("torchaudio::sox_effects_shutdown_sox_effects",
-            &sox_effects::shutdown_sox_effects)
-        .op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects);
+            &sox_effects::shutdown_sox_effects);
 
 } // namespace
 } // namespace torchaudio
diff --git a/torchaudio/csrc/sox_effects.cpp b/torchaudio/csrc/sox_effects.cpp
@@ -39,16 +39,5 @@ void shutdown_sox_effects() {
   }
 }
 
-std::vector<std::string> list_effects() {
-  std::vector<std::string> names;
-  const sox_effect_fn_t* fns = sox_get_effect_fns();
-  for (int i = 0; fns[i]; ++i) {
-    const sox_effect_handler_t* handler = fns[i]();
-    if (handler && handler->name)
-      names.push_back(handler->name);
-  }
-  return names;
-}
-
 } // namespace sox_effects
 } // namespace torchaudio
diff --git a/torchaudio/csrc/sox_effects.h b/torchaudio/csrc/sox_effects.h
@@ -10,8 +10,6 @@ void initialize_sox_effects();
 
 void shutdown_sox_effects();
 
-std::vector<std::string> list_effects();
-
 } // namespace sox_effects
 } // namespace torchaudio
 
diff --git a/torchaudio/csrc/sox_io.cpp b/torchaudio/csrc/sox_io.cpp
@@ -125,14 +125,12 @@ void save_audio_file(
     const c10::intrusive_ptr<TensorSignal>& signal,
     const double compression) {
   const auto tensor = signal->getTensor();
-  const auto sample_rate = signal->getSampleRate();
   const auto channels_first = signal->getChannelsFirst();
 
   validate_input_tensor(tensor);
 
   const auto filetype = get_filetype(file_name);
-  const auto signal_info =
-      get_signalinfo(tensor, sample_rate, channels_first, filetype);
+  const auto signal_info = get_signalinfo(signal.get(), filetype);
   const auto encoding_info =
       get_encodinginfo(filetype, tensor.dtype(), compression);
 
diff --git a/torchaudio/csrc/sox_utils.cpp b/torchaudio/csrc/sox_utils.cpp
@@ -5,6 +5,49 @@
 namespace torchaudio {
 namespace sox_utils {
 
+void set_seed(const int64_t seed) {
+  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
+}
+
+void set_verbosity(const int64_t verbosity) {
+  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
+}
+
+void set_use_threads(const bool use_threads) {
+  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
+}
+
+void set_buffer_size(const int64_t buffer_size) {
+  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
+}
+
+std::vector<std::vector<std::string>> list_effects() {
+  std::vector<std::vector<std::string>> effects;
+  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
+    const sox_effect_handler_t* handler = (*fns)();
+    if (handler && handler->name) {
+      if (UNSUPPORTED_EFFECTS.find(handler->name) ==
+          UNSUPPORTED_EFFECTS.end()) {
+        effects.emplace_back(std::vector<std::string>{
+            handler->name,
+            handler->usage ? std::string(handler->usage) : std::string("")});
+      }
+    }
+  }
+  return effects;
+}
+
+std::vector<std::string> list_formats() {
+  std::vector<std::string> formats;
+  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
+    for (const char* const* names = fns->fn()->names; *names; ++names) {
+      if (!strchr(*names, '/'))
+        formats.emplace_back(*names);
+    }
+  }
+  return formats;
+}
+
 TensorSignal::TensorSignal(
     torch::Tensor tensor_,
     int64_t sample_rate_,
@@ -205,13 +248,13 @@ unsigned get_precision(
 }
 
 sox_signalinfo_t get_signalinfo(
-    const torch::Tensor& tensor,
-    const int64_t sample_rate,
-    const bool channels_first,
+    const TensorSignal* signal,
     const std::string filetype) {
+  auto tensor = signal->getTensor();
   return sox_signalinfo_t{
-      /*rate=*/static_cast<sox_rate_t>(sample_rate),
-      /*channels=*/static_cast<unsigned>(tensor.size(channels_first ? 0 : 1)),
+      /*rate=*/static_cast<sox_rate_t>(signal->getSampleRate()),
+      /*channels=*/
+      static_cast<unsigned>(tensor.size(signal->getChannelsFirst() ? 0 : 1)),
       /*precision=*/get_precision(filetype, tensor.dtype()),
       /*length=*/static_cast<uint64_t>(tensor.numel())};
 }
diff --git a/torchaudio/csrc/sox_utils.h b/torchaudio/csrc/sox_utils.h
@@ -7,6 +7,25 @@
 namespace torchaudio {
 namespace sox_utils {
 
+////////////////////////////////////////////////////////////////////////////////
+// APIs for Python interaction
+////////////////////////////////////////////////////////////////////////////////
+
+/// Set sox global options
+void set_seed(const int64_t seed);
+
+void set_verbosity(const int64_t verbosity);
+
+void set_use_threads(const bool use_threads);
+
+void set_buffer_size(const int64_t buffer_size);
+
+std::vector<std::vector<std::string>> list_effects();
+
+std::vector<std::string> list_formats();
+
+/// Class for exchanging signal infomation (tensor + meta data) between
+/// C++ and Python for read/write operation.
 struct TensorSignal : torch::CustomClassHolder {
   torch::Tensor tensor;
   int64_t sample_rate;
@@ -22,6 +41,13 @@ struct TensorSignal : torch::CustomClassHolder {
   bool getChannelsFirst() const;
 };
 
+////////////////////////////////////////////////////////////////////////////////
+// Utilities for sox_io / sox_effects implementations
+////////////////////////////////////////////////////////////////////////////////
+
+const std::unordered_set<std::string> UNSUPPORTED_EFFECTS =
+    {"input", "output", "spectrogram", "noiseprof", "noisered", "splice"};
+
 /// helper class to automatically close sox_format_t*
 struct SoxFormat {
   explicit SoxFormat(sox_format_t* fd) noexcept;
@@ -84,9 +110,7 @@ const std::string get_filetype(const std::string path);
 
 /// Get sox_signalinfo_t for passing a torch::Tensor object.
 sox_signalinfo_t get_signalinfo(
-    const torch::Tensor& tensor,
-    const int64_t sample_rate,
-    const bool channels_first,
+    const TensorSignal* signal,
     const std::string filetype);
 
 /// Get sox_encofinginfo_t for saving audoi file
diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py
@@ -7,6 +7,7 @@
     module_utils as _mod_utils,
     misc_ops as _misc_ops,
 )
+from torchaudio.utils.sox_utils import list_effects
 
 if _mod_utils.is_module_available('torchaudio._torchaudio'):
     from torchaudio import _torchaudio
@@ -52,7 +53,7 @@ def effect_names() -> List[str]:
     Example
         >>> EFFECT_NAMES = torchaudio.sox_effects.effect_names()
     """
-    return torch.ops.torchaudio.sox_effects_list_effects()
+    return list(list_effects().keys())
 
 
 @_mod_utils.requires_module('torchaudio._torchaudio')
diff --git a/torchaudio/utils/__init__.py b/torchaudio/utils/__init__.py
@@ -0,0 +1,9 @@
+from . import (
+    sox_utils,
+)
+
+from torchaudio._internal import module_utils as _mod_utils
+
+
+if _mod_utils.is_module_available('torchaudio._torchaudio'):
+    sox_utils.set_verbosity(1)
diff --git a/torchaudio/utils/sox_utils.py b/torchaudio/utils/sox_utils.py
@@ -0,0 +1,84 @@
+from typing import List, Dict
+
+import torch
+
+from torchaudio._internal import (
+    module_utils as _mod_utils,
+)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def set_seed(seed: int):
+    """Set libsox's PRNG
+
+    Args:
+        seed: seed value. valid range is int32.
+
+    See Also:
+        http://sox.sourceforge.net/sox.html
+    """
+    torch.ops.torchaudio.sox_utils_set_seed(seed)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def set_verbosity(verbosity: int):
+    """Set libsox's verbosity
+
+    Args:
+        verbosity: Set verbosity level of libsox.
+            1: failure messages
+            2: warnings
+            3: details of processing
+            4-6: increasing levels of debug messages
+
+    See Also:
+        http://sox.sourceforge.net/sox.html
+    """
+    torch.ops.torchaudio.sox_utils_set_verbosity(verbosity)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def set_buffer_size(buffer_size: int):
+    """Set buffer size for sox effect chain
+
+    Args:
+        buffer_size: Set the size in bytes of the buffers used for processing audio.
+
+    See Also:
+        http://sox.sourceforge.net/sox.html
+    """
+    torch.ops.torchaudio.sox_utils_set_buffer_size(buffer_size)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def set_use_threads(use_threads: bool):
+    """Set multithread option for sox effect chain
+
+    Args:
+        use_threads: When True, enables libsox's parallel effects channels processing.
+            To use mutlithread, the underlying libsox has to be compiled with OpenMP support.
+
+    See Also:
+        http://sox.sourceforge.net/sox.html
+    """
+    torch.ops.torchaudio.sox_utils_set_use_threads(use_threads)
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def list_effects() -> Dict[str, str]:
+    """List the available sox effect names
+
+    Returns:
+        Mapping from "effect name" to "usage"
+    """
+    return dict(torch.ops.torchaudio.sox_utils_list_effects())
+
+
+@_mod_utils.requires_module('torchaudio._torchaudio')
+def list_formats() -> List[str]:
+    """List the supported audio formats
+
+    Returns: list[str]
+        List of supported audio formats
+    """
+    return torch.ops.torchaudio.sox_utils_list_formats()

Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@`
`4`	`4`	`compliance,`
`5`	`5`	`datasets,`
`6`	`6`	`kaldi_io,`
	`7`	`+ utils,`
`7`	`8`	`sox_effects,`
`8`	`9`	`transforms`
`9`	`10`	`)`