Skip to content

Commit b3d4f38

Browse files
committed
Add sox effects implementation
1 parent 7b7b4d2 commit b3d4f38

File tree

8 files changed

+452
-1
lines changed

8 files changed

+452
-1
lines changed

torchaudio/csrc/register.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,19 @@ static auto registerSoxEffects =
6868
.op("torchaudio::sox_effects_initialize_sox_effects",
6969
&sox_effects::initialize_sox_effects)
7070
.op("torchaudio::sox_effects_shutdown_sox_effects",
71-
&sox_effects::shutdown_sox_effects);
71+
&sox_effects::shutdown_sox_effects)
72+
.op(torch::RegisterOperators::options()
73+
.schema(
74+
"torchaudio::sox_effects_apply_effects_tensor(__torch__.torch.classes.torchaudio.TensorSignal input_signal, str[][] effects) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
75+
.catchAllKernel<
76+
decltype(sox_effects::apply_effects_tensor),
77+
&sox_effects::apply_effects_tensor>())
78+
.op(torch::RegisterOperators::options()
79+
.schema(
80+
"torchaudio::sox_effects_apply_effects_file(str path, str[][] effects, bool normalize, bool channels_first) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
81+
.catchAllKernel<
82+
decltype(sox_effects::apply_effects_file),
83+
&sox_effects::apply_effects_file>());
7284

7385
} // namespace
7486
} // namespace torchaudio

torchaudio/csrc/sox_effects.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#include <sox.h>
22
#include <torchaudio/csrc/sox_effects.h>
3+
#include <torchaudio/csrc/sox_effects_chain.h>
4+
#include <torchaudio/csrc/sox_utils.h>
5+
6+
using namespace torchaudio::sox_utils;
37

48
namespace torchaudio {
59
namespace sox_effects {
@@ -46,5 +50,88 @@ void shutdown_sox_effects() {
4650
}
4751
}
4852

53+
c10::intrusive_ptr<TensorSignal> apply_effects_tensor(
54+
const c10::intrusive_ptr<TensorSignal>& input_signal,
55+
std::vector<std::vector<std::string>> effects) {
56+
auto in_tensor = input_signal->getTensor();
57+
validate_input_tensor(in_tensor);
58+
59+
// Create SoxEffectsChain
60+
const auto dtype = in_tensor.dtype();
61+
torchaudio::sox_effects_chain::SoxEffectsChain chain(
62+
/*input_encoding=*/get_encodinginfo("wav", dtype, 0.),
63+
/*output_encoding=*/get_encodinginfo("wav", dtype, 0.));
64+
65+
// Prepare output buffer
66+
std::vector<sox_sample_t> out_buffer;
67+
out_buffer.reserve(in_tensor.numel());
68+
69+
// Build and run effects chain
70+
chain.addInputTensor(input_signal.get());
71+
for (const auto& effect : effects) {
72+
chain.addEffect(effect);
73+
}
74+
chain.addOutputBuffer(&out_buffer);
75+
chain.run();
76+
77+
// Create tensor from buffer
78+
const auto channels_first = input_signal->getChannelsFirst();
79+
auto out_tensor = convert_to_tensor(
80+
/*buffer=*/out_buffer.data(),
81+
/*num_samples=*/out_buffer.size(),
82+
/*num_channels=*/chain.getOutputNumChannels(),
83+
dtype,
84+
/*noramlize=*/false,
85+
channels_first);
86+
87+
return c10::make_intrusive<TensorSignal>(
88+
out_tensor, chain.getOutputSampleRate(), channels_first);
89+
}
90+
91+
c10::intrusive_ptr<TensorSignal> apply_effects_file(
92+
const std::string path,
93+
std::vector<std::vector<std::string>> effects,
94+
const bool normalize,
95+
const bool channels_first) {
96+
// Open input file
97+
SoxFormat sf(sox_open_read(
98+
path.c_str(),
99+
/*signal=*/nullptr,
100+
/*encoding=*/nullptr,
101+
/*filetype=*/nullptr));
102+
103+
validate_input_file(sf);
104+
105+
const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
106+
107+
// Prepare output
108+
std::vector<sox_sample_t> out_buffer;
109+
out_buffer.reserve(sf->signal.length);
110+
111+
// Create and run SoxEffectsChain
112+
torchaudio::sox_effects_chain::SoxEffectsChain chain(
113+
/*input_encoding=*/sf->encoding,
114+
/*output_encoding=*/get_encodinginfo("wav", dtype, 0.));
115+
116+
chain.addInputFile(sf);
117+
for (const auto& effect : effects) {
118+
chain.addEffect(effect);
119+
}
120+
chain.addOutputBuffer(&out_buffer);
121+
chain.run();
122+
123+
// Create tensor from buffer
124+
auto tensor = convert_to_tensor(
125+
/*buffer=*/out_buffer.data(),
126+
/*num_samples=*/out_buffer.size(),
127+
/*num_channels=*/chain.getOutputNumChannels(),
128+
dtype,
129+
normalize,
130+
channels_first);
131+
132+
return c10::make_intrusive<TensorSignal>(
133+
tensor, chain.getOutputSampleRate(), channels_first);
134+
}
135+
49136
} // namespace sox_effects
50137
} // namespace torchaudio

torchaudio/csrc/sox_effects.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define TORCHAUDIO_SOX_EFFECTS_H
33

44
#include <torch/script.h>
5+
#include <torchaudio/csrc/sox_utils.h>
56

67
namespace torchaudio {
78
namespace sox_effects {
@@ -10,6 +11,16 @@ void initialize_sox_effects();
1011

1112
void shutdown_sox_effects();
1213

14+
c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> apply_effects_tensor(
15+
const c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal>& input_signal,
16+
std::vector<std::vector<std::string>> effects);
17+
18+
c10::intrusive_ptr<torchaudio::sox_utils::TensorSignal> apply_effects_file(
19+
const std::string path,
20+
std::vector<std::vector<std::string>> effects,
21+
const bool normalize = true,
22+
const bool channels_first = true);
23+
1324
} // namespace sox_effects
1425
} // namespace torchaudio
1526

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
#include <torchaudio/csrc/sox_effects_chain.h>
2+
#include <torchaudio/csrc/sox_utils.h>
3+
4+
using namespace torch::indexing;
5+
using namespace torchaudio::sox_utils;
6+
7+
namespace torchaudio {
8+
namespace sox_effects_chain {
9+
10+
namespace {
11+
12+
// Helper struct to safely close sox_effect_t* pointer returned by
13+
// sox_create_effect
14+
struct SoxEffect {
15+
explicit SoxEffect(sox_effect_t* se) noexcept : se_(se){};
16+
SoxEffect(const SoxEffect& other) = delete;
17+
SoxEffect(const SoxEffect&& other) = delete;
18+
SoxEffect& operator=(const SoxEffect& other) = delete;
19+
SoxEffect& operator=(SoxEffect&& other) = delete;
20+
~SoxEffect() {
21+
if (se_ != nullptr) {
22+
free(se_);
23+
}
24+
}
25+
operator sox_effect_t*() const {
26+
return se_;
27+
};
28+
sox_effect_t* operator->() noexcept {
29+
return se_;
30+
}
31+
32+
private:
33+
sox_effect_t* se_;
34+
};
35+
36+
/// helper classes for passing the location of input tensor and output buffer
37+
///
38+
/// drain/flow callback functions require plaing C style function signature and
39+
/// the way to pass extra data is to attach data to sox_fffect_t::priv pointer.
40+
/// The following structs will be assigned to sox_fffect_t::priv pointer which
41+
/// gives sox_effect_t an access to input Tensor and output buffer object.
42+
struct TensorInputPriv {
43+
size_t index;
44+
TensorSignal* signal;
45+
};
46+
struct TensorOutputPriv {
47+
std::vector<sox_sample_t>* buffer;
48+
};
49+
50+
/// Callback function to feed Tensor data to SoxEffectChain.
51+
int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
52+
// Retrieve the input Tensor and current index
53+
auto priv = static_cast<TensorInputPriv*>(effp->priv);
54+
auto index = priv->index;
55+
auto signal = priv->signal;
56+
auto tensor = signal->getTensor();
57+
auto num_channels = effp->out_signal.channels;
58+
59+
// Adjust the number of samples to read
60+
const size_t num_samples = tensor.numel();
61+
if (index + *osamp > num_samples) {
62+
*osamp = num_samples - index;
63+
}
64+
// Ensure that it's a multiple of the number of channels
65+
*osamp -= *osamp % num_channels;
66+
67+
// Slice the input Tensor and unnormalize the values
68+
const auto tensor_ = [&]() {
69+
auto i_frame = index / num_channels;
70+
auto num_frames = *osamp / num_channels;
71+
auto t = (signal->getChannelsFirst())
72+
? tensor.index({Slice(), Slice(i_frame, i_frame + num_frames)}).t()
73+
: tensor.index({Slice(i_frame, i_frame + num_frames), Slice()});
74+
return unnormalize_wav(t.reshape({-1})).contiguous();
75+
}();
76+
priv->index += *osamp;
77+
78+
// Write data to SoxEffectsChain buffer.
79+
auto ptr = tensor_.data_ptr<int32_t>();
80+
std::copy(ptr, ptr + *osamp, obuf);
81+
82+
return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
83+
}
84+
85+
/// Callback function to fetch data from SoxEffectChain.
86+
int tensor_output_flow(
87+
sox_effect_t* effp LSX_UNUSED,
88+
sox_sample_t const* ibuf,
89+
sox_sample_t* obuf LSX_UNUSED,
90+
size_t* isamp,
91+
size_t* osamp) {
92+
*osamp = 0;
93+
// Get output buffer
94+
auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
95+
// Append at the end
96+
out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
97+
return SOX_SUCCESS;
98+
}
99+
100+
sox_effect_handler_t* get_tensor_input_handler() {
101+
static sox_effect_handler_t handler{/*name=*/"input_tensor",
102+
/*usage=*/NULL,
103+
/*flags=*/SOX_EFF_MCHAN,
104+
/*getopts=*/NULL,
105+
/*start=*/NULL,
106+
/*flow=*/NULL,
107+
/*drain=*/tensor_input_drain,
108+
/*stop=*/NULL,
109+
/*kill=*/NULL,
110+
/*priv_size=*/sizeof(TensorInputPriv)};
111+
return &handler;
112+
}
113+
114+
sox_effect_handler_t* get_tensor_output_handler() {
115+
static sox_effect_handler_t handler{/*name=*/"output_tensor",
116+
/*usage=*/NULL,
117+
/*flags=*/SOX_EFF_MCHAN,
118+
/*getopts=*/NULL,
119+
/*start=*/NULL,
120+
/*flow=*/tensor_output_flow,
121+
/*drain=*/NULL,
122+
/*stop=*/NULL,
123+
/*kill=*/NULL,
124+
/*priv_size=*/sizeof(TensorOutputPriv)};
125+
return &handler;
126+
}
127+
128+
} // namespace
129+
130+
SoxEffectsChain::SoxEffectsChain(
131+
sox_encodinginfo_t input_encoding,
132+
sox_encodinginfo_t output_encoding)
133+
: in_enc_(input_encoding),
134+
out_enc_(output_encoding),
135+
in_sig_(),
136+
interm_sig_(),
137+
sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
138+
if (!sec_) {
139+
throw std::runtime_error("Failed to create effect chain.");
140+
}
141+
}
142+
143+
SoxEffectsChain::~SoxEffectsChain() {
144+
if (sec_ != nullptr) {
145+
sox_delete_effects_chain(sec_);
146+
}
147+
}
148+
149+
void SoxEffectsChain::run() {
150+
sox_flow_effects(sec_, NULL, NULL);
151+
}
152+
153+
void SoxEffectsChain::addInputTensor(TensorSignal* signal) {
154+
in_sig_ = get_signalinfo(signal, "wav");
155+
interm_sig_ = in_sig_;
156+
SoxEffect e(sox_create_effect(get_tensor_input_handler()));
157+
auto priv = static_cast<TensorInputPriv*>(e->priv);
158+
priv->signal = signal;
159+
priv->index = 0;
160+
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
161+
throw std::runtime_error("Failed to add effect: input_tensor");
162+
}
163+
}
164+
165+
void SoxEffectsChain::addOutputBuffer(
166+
std::vector<sox_sample_t>* output_buffer) {
167+
SoxEffect e(sox_create_effect(get_tensor_output_handler()));
168+
static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
169+
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
170+
throw std::runtime_error("Failed to add effect: output_tensor");
171+
}
172+
}
173+
174+
void SoxEffectsChain::addInputFile(sox_format_t* sf) {
175+
in_sig_ = sf->signal;
176+
interm_sig_ = in_sig_;
177+
SoxEffect e(sox_create_effect(sox_find_effect("input")));
178+
char* opts[] = {(char*)sf};
179+
sox_effect_options(e, 1, opts);
180+
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
181+
std::ostringstream stream;
182+
stream << "Failed to add effect: input " << sf->filename;
183+
throw std::runtime_error(stream.str());
184+
}
185+
}
186+
187+
void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
188+
const auto num_args = effect.size();
189+
if (num_args == 0) {
190+
throw std::runtime_error("Invalid argument: empty effect.");
191+
}
192+
const auto name = effect[0];
193+
if (UNSUPPORTED_EFFECTS.find(name) != UNSUPPORTED_EFFECTS.end()) {
194+
std::ostringstream stream;
195+
stream << "Unsupported effect: " << name;
196+
throw std::runtime_error(stream.str());
197+
}
198+
199+
SoxEffect e(sox_create_effect(sox_find_effect(name.c_str())));
200+
const auto num_options = num_args - 1;
201+
if (num_options == 0) {
202+
sox_effect_options(e, 0, nullptr);
203+
} else {
204+
std::vector<char*> opts;
205+
for (size_t i = 1; i < num_args; ++i) {
206+
opts.push_back((char*)effect[i].c_str());
207+
}
208+
if (sox_effect_options(e, num_options, opts.data()) != SOX_SUCCESS) {
209+
std::ostringstream stream;
210+
stream << "Invalid effect option:";
211+
for (const auto& v : effect) {
212+
stream << " " << v;
213+
}
214+
throw std::runtime_error(stream.str());
215+
}
216+
}
217+
218+
if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
219+
std::ostringstream stream;
220+
stream << "Failed to add effect: \"" << name;
221+
for (size_t i = 1; i < num_args; ++i) {
222+
stream << " " << effect[i];
223+
}
224+
stream << "\"";
225+
throw std::runtime_error(stream.str());
226+
}
227+
}
228+
229+
int64_t SoxEffectsChain::getOutputNumChannels() {
230+
return interm_sig_.channels;
231+
}
232+
233+
int64_t SoxEffectsChain::getOutputSampleRate() {
234+
return interm_sig_.rate;
235+
}
236+
237+
} // namespace sox_effects_chain
238+
} // namespace torchaudio

0 commit comments

Comments
 (0)