Replace save function with sox effects chain

mthrok · mthrok · commit b5cd948ca37f · 2020-07-16T14:19:41.000Z
diff --git a/torchaudio/csrc/sox_effects_chain.cpp b/torchaudio/csrc/sox_effects_chain.cpp
@@ -46,6 +46,9 @@ struct TensorInputPriv {
 struct TensorOutputPriv {
   std::vector<sox_sample_t>* buffer;
 };
+struct FileOutputPriv {
+  sox_format_t* sf;
+};
 
 /// Callback function to feed Tensor data to SoxEffectChain.
 int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
@@ -84,7 +87,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
 
 /// Callback function to fetch data from SoxEffectChain.
 int tensor_output_flow(
-    sox_effect_t* effp LSX_UNUSED,
+    sox_effect_t* effp,
     sox_sample_t const* ibuf,
     sox_sample_t* obuf LSX_UNUSED,
     size_t* isamp,
@@ -97,6 +100,28 @@ int tensor_output_flow(
   return SOX_SUCCESS;
 }
 
+int file_output_flow(
+    sox_effect_t* effp,
+    sox_sample_t const* ibuf,
+    sox_sample_t* obuf LSX_UNUSED,
+    size_t* isamp,
+    size_t* osamp) {
+  *osamp = 0;
+  if (*isamp) {
+    auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
+    if (sox_write(sf, ibuf, *isamp) != *isamp) {
+      if (sf->sox_errno) {
+        std::ostringstream stream;
+        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
+               << sf->filename;
+        throw std::runtime_error(stream.str());
+      }
+      return SOX_EOF;
+    }
+  }
+  return SOX_SUCCESS;
+}
+
 sox_effect_handler_t* get_tensor_input_handler() {
   static sox_effect_handler_t handler{/*name=*/"input_tensor",
                                       /*usage=*/NULL,
@@ -125,6 +150,20 @@ sox_effect_handler_t* get_tensor_output_handler() {
   return &handler;
 }
 
+sox_effect_handler_t* get_file_output_handler() {
+  static sox_effect_handler_t handler{/*name=*/"output_file",
+                                      /*usage=*/NULL,
+                                      /*flags=*/SOX_EFF_MCHAN,
+                                      /*getopts=*/NULL,
+                                      /*start=*/NULL,
+                                      /*flow=*/file_output_flow,
+                                      /*drain=*/NULL,
+                                      /*stop=*/NULL,
+                                      /*kill=*/NULL,
+                                      /*priv_size=*/sizeof(FileOutputPriv)};
+  return &handler;
+}
+
 } // namespace
 
 SoxEffectsChain::SoxEffectsChain(
@@ -134,6 +173,7 @@ SoxEffectsChain::SoxEffectsChain(
       out_enc_(output_encoding),
       in_sig_(),
       interm_sig_(),
+      out_sig_(),
       sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
   if (!sec_) {
     throw std::runtime_error("Failed to create effect chain.");
@@ -184,6 +224,17 @@ void SoxEffectsChain::addInputFile(sox_format_t* sf) {
   }
 }
 
+void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
+  out_sig_ = sf->signal;
+  SoxEffect e(sox_create_effect(get_file_output_handler()));
+  static_cast<FileOutputPriv*>(e->priv)->sf = sf;
+  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
+    std::ostringstream stream;
+    stream << "Failed to add effect: output " << sf->filename;
+    throw std::runtime_error(stream.str());
+  }
+}
+
 void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
   const auto num_args = effect.size();
   if (num_args == 0) {
diff --git a/torchaudio/csrc/sox_effects_chain.h b/torchaudio/csrc/sox_effects_chain.h
@@ -14,6 +14,7 @@ class SoxEffectsChain {
   const sox_encodinginfo_t out_enc_;
   sox_signalinfo_t in_sig_;
   sox_signalinfo_t interm_sig_;
+  sox_signalinfo_t out_sig_;
   sox_effects_chain_t* sec_;
 
  public:
@@ -29,6 +30,7 @@ class SoxEffectsChain {
   void addInputTensor(torchaudio::sox_utils::TensorSignal* signal);
   void addInputFile(sox_format_t* sf);
   void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
+  void addOutputFile(sox_format_t* sf);
   void addEffect(const std::vector<std::string> effect);
   int64_t getOutputNumChannels();
   int64_t getOutputSampleRate();
diff --git a/torchaudio/csrc/sox_io.cpp b/torchaudio/csrc/sox_io.cpp
@@ -1,7 +1,8 @@
 #include <sox.h>
+#include <torchaudio/csrc/sox_effects.h>
+#include <torchaudio/csrc/sox_effects_chain.h>
 #include <torchaudio/csrc/sox_io.h>
 #include <torchaudio/csrc/sox_utils.h>
-#include <torchaudio/csrc/sox_effects.h>
 
 using namespace torch::indexing;
 using namespace torchaudio::sox_utils;
@@ -66,22 +67,23 @@ c10::intrusive_ptr<TensorSignal> load_audio_file(
     std::ostringstream offset, frames;
     offset << frame_offset << "s";
     frames << "+" << num_frames << "s";
-    effects.emplace_back(std::vector<std::string>{"trim", offset.str(), frames.str()});
+    effects.emplace_back(
+        std::vector<std::string>{"trim", offset.str(), frames.str()});
   } else if (frame_offset != 0) {
     std::ostringstream offset;
     offset << frame_offset << "s";
     effects.emplace_back(std::vector<std::string>{"trim", offset.str()});
   }
 
-  return torchaudio::sox_effects::apply_effects_file(path, effects, normalize, channels_first);
+  return torchaudio::sox_effects::apply_effects_file(
+      path, effects, normalize, channels_first);
 }
 
 void save_audio_file(
     const std::string& file_name,
     const c10::intrusive_ptr<TensorSignal>& signal,
     const double compression) {
   const auto tensor = signal->getTensor();
-  const auto channels_first = signal->getChannelsFirst();
 
   validate_input_tensor(tensor);
 
@@ -102,22 +104,12 @@ void save_audio_file(
     throw std::runtime_error("Error saving audio file: failed to open file.");
   }
 
-  auto tensor_ = tensor;
-  if (channels_first) {
-    tensor_ = tensor_.t();
-  }
-
-  const int64_t frames_per_chunk = 65536;
-  for (int64_t i = 0; i < tensor_.size(0); i += frames_per_chunk) {
-    auto chunk = tensor_.index({Slice(i, i + frames_per_chunk), Slice()});
-    chunk = unnormalize_wav(chunk).contiguous();
-
-    const size_t numel = chunk.numel();
-    if (sox_write(sf, chunk.data_ptr<int32_t>(), numel) != numel) {
-      throw std::runtime_error(
-          "Error saving audio file: failed to write the entier buffer.");
-    }
-  }
+  torchaudio::sox_effects_chain::SoxEffectsChain chain(
+      /*input_encoding=*/get_encodinginfo("wav", tensor.dtype(), 0.),
+      /*output_encoding=*/sf->encoding);
+  chain.addInputTensor(signal.get());
+  chain.addOutputFile(sf);
+  chain.run();
 }
 
 } // namespace sox_io