From 5fe8e2e2c0b2e2d2e3cd4068f0247c9f855fffa4 Mon Sep 17 00:00:00 2001 From: Pearu Peterson Date: Thu, 30 Oct 2025 21:03:02 +0200 Subject: [PATCH] Remove rir extension. --- .github/scripts/unittest-linux/run_test.sh | 5 - .github/workflows/unittest-linux-cpu.yml | 4 - .github/workflows/unittest-macos-cpu.yml | 4 - CMakeLists.txt | 4 - src/libtorchaudio/CMakeLists.txt | 5 - src/libtorchaudio/pybind/pybind.cpp | 1 - src/libtorchaudio/rir/ray_tracing.cpp | 334 ------------------ src/libtorchaudio/rir/rir.cpp | 207 ----------- src/libtorchaudio/rir/wall.h | 198 ----------- src/libtorchaudio/utils.cpp | 8 - src/libtorchaudio/utils.h | 1 - src/torchaudio/_extension/__init__.py | 14 - test/cpp/CMakeLists.txt | 27 -- test/cpp/rir/wall_collision.cpp | 118 ------- .../common_utils/__init__.py | 2 - .../common_utils/case_utils.py | 7 - tools/setup_helpers/extension.py | 2 - 17 files changed, 941 deletions(-) delete mode 100644 src/libtorchaudio/rir/ray_tracing.cpp delete mode 100644 src/libtorchaudio/rir/rir.cpp delete mode 100644 src/libtorchaudio/rir/wall.h delete mode 100644 test/cpp/CMakeLists.txt delete mode 100644 test/cpp/rir/wall_collision.cpp diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh index 0485a3617f..7740932530 100755 --- a/.github/scripts/unittest-linux/run_test.sh +++ b/.github/scripts/unittest-linux/run_test.sh @@ -22,11 +22,6 @@ if [[ "${CUDA_TESTS_ONLY}" = "1" ]]; then args+=('-k' 'cuda or gpu') fi -( - cd build/temp*/test/cpp - ctest -) - ( export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CTC_DECODER=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true diff --git a/.github/workflows/unittest-linux-cpu.yml b/.github/workflows/unittest-linux-cpu.yml index ea0ac854fa..a695e87a0c 100644 --- a/.github/workflows/unittest-linux-cpu.yml +++ b/.github/workflows/unittest-linux-cpu.yml @@ -72,10 +72,6 @@ jobs: python -m pip install . -v --no-build-isolation echo "::endgroup::" - echo "::group::Run TorchAudio C tests" - (cd build/temp*/test/cpp && ./wall_collision) - echo "::endgroup::" - echo "::group::Run TorchAudio tests" export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CTC_DECODER=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true diff --git a/.github/workflows/unittest-macos-cpu.yml b/.github/workflows/unittest-macos-cpu.yml index 24a10c2c02..e6adb85238 100644 --- a/.github/workflows/unittest-macos-cpu.yml +++ b/.github/workflows/unittest-macos-cpu.yml @@ -70,10 +70,6 @@ jobs: python -m pip install . -v --no-build-isolation echo "::endgroup::" - echo "::group::Run TorchAudio C tests" - (cd build/temp*/test/cpp && ./wall_collision) - echo "::endgroup::" - echo "::group::Run TorchAudio tests" export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_APPLY_CMVN_SLIDING=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_FBANK_FEATS=true diff --git a/CMakeLists.txt b/CMakeLists.txt index cb9c84bbf9..e5fd1f96a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,6 @@ endif() # Options -option(BUILD_RIR "Enable RIR simulation" ON) option(BUILD_RNNT "Enable RNN transducer" ON) option(BUILD_ALIGN "Enable forced alignment" ON) option(BUILD_CUDA_CTC_DECODER "Build CUCTC decoder" OFF) @@ -170,6 +169,3 @@ if (BUILD_CUDA_CTC_DECODER) endif() add_subdirectory(src/libtorchaudio/cuctc) endif() -if (BUILD_CPP_TEST) - add_subdirectory(test/cpp) -endif() diff --git a/src/libtorchaudio/CMakeLists.txt b/src/libtorchaudio/CMakeLists.txt index c7813d1222..063aa93e34 100644 --- a/src/libtorchaudio/CMakeLists.txt +++ b/src/libtorchaudio/CMakeLists.txt @@ -34,11 +34,6 @@ if(BUILD_RNNT) endif() endif() -if(BUILD_RIR) - list(APPEND sources rir/rir.cpp rir/ray_tracing.cpp) - list(APPEND compile_definitions INCLUDE_RIR) -endif() - if(BUILD_ALIGN) list( APPEND diff --git a/src/libtorchaudio/pybind/pybind.cpp b/src/libtorchaudio/pybind/pybind.cpp index 790833dd5b..d3100164ed 100644 --- a/src/libtorchaudio/pybind/pybind.cpp +++ b/src/libtorchaudio/pybind/pybind.cpp @@ -5,7 +5,6 @@ namespace torchaudio { namespace { PYBIND11_MODULE(_torchaudio, m) { - m.def("is_rir_available", &is_rir_available, ""); m.def("is_align_available", &is_align_available, ""); m.def("cuda_version", &cuda_version, ""); } diff --git a/src/libtorchaudio/rir/ray_tracing.cpp b/src/libtorchaudio/rir/ray_tracing.cpp deleted file mode 100644 index 82f1b0aabf..0000000000 --- a/src/libtorchaudio/rir/ray_tracing.cpp +++ /dev/null @@ -1,334 +0,0 @@ -/* -Copyright (c) 2014-2017 EPFL-LCAV - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -// -// Ray tracing implementation. This is heavily based on PyRoomAcoustics: -// https://github.com/LCAV/pyroomacoustics -// -#include -#include -#include -#include - -namespace torchaudio { -namespace rir { -namespace { - -// TODO: remove this once hybrid method is supported -const bool IS_HYBRID_SIM = false; - -// TODO: remove this once ISM method is supported -const int ISM_ORDER = 10; - -#define EPS ((scalar_t)(1e-5)) -#define VAL(x) ((x).template item()) -#define NORM(x) (VAL((x).norm())) -#define MAX(x) (VAL((x).max())) -#define IN_RANGE(x, y) ((-EPS < (x)) && ((x) < (y) + EPS)) - -template -const std::array, 6> make_walls( - const torch::Tensor& room, - const torch::Tensor& absorption, - const torch::Tensor& scattering) { - auto w = room.index({0}).item(); - auto l = room.index({1}).item(); - auto h = room.index({2}).item(); - return make_room(w, l, h, absorption, scattering); -} - -inline double get_energy_coeff( - const double travel_dist, - const double mic_radius_sq) { - double sq = travel_dist * travel_dist; - auto p_hit = 1. - std::sqrt(1. - mic_radius_sq / std::max(mic_radius_sq, sq)); - return sq * p_hit; -} - -/// RayTracer class helper for ray tracing. -/// For attribute description, Python wrapper. -template -class RayTracer { - // Provided parameters - const torch::Tensor& room; - const torch::Tensor& mic_array; - const double mic_radius; - - // Values derived from the parameters - const int num_bands; - const double mic_radius_sq; - const bool do_scattering; // Whether scattering is needed (scattering != 0) - const std::array, 6> walls; // The walls of the room - - // Runtime value caches - // Updated at the beginning of the simulation - double sound_speed = 343.0; - double distance_thres = 10.0 * sound_speed; // upper bound - double energy_thres = 0.0; // lower bound - double hist_bin_width = 0.004; // [second] - - public: - RayTracer( - const torch::Tensor& room, - const torch::Tensor& absorption, - const torch::Tensor& scattering, - const torch::Tensor& mic_array, - const double mic_radius) - : room(room), - mic_array(mic_array), - mic_radius(mic_radius), - num_bands(absorption.size(0)), - mic_radius_sq(mic_radius * mic_radius), - do_scattering(MAX(scattering) > 0.), - walls(make_walls(room, absorption, scattering)) {} - - // The main (and only) public entry point of this class. The histograms Tensor - // reference is passed along and modified in the subsequent private method - // calls. This method spawns num_rays rays in all directions from the source - // and calls simul_ray() on each of them. - torch::Tensor compute_histograms( - const torch::Tensor& origin, - int num_rays, - double time_thres, - double energy_thres_ratio, - double sound_speed_, - int num_bins) { - scalar_t energy_0 = 2. / num_rays; - auto energies = torch::full({num_bands}, energy_0, room.options()); - - auto histograms = - torch::zeros({mic_array.size(0), num_bins, num_bands}, room.options()); - - // Cache runtime parameters - sound_speed = sound_speed_; - energy_thres = energy_0 * energy_thres_ratio; - distance_thres = time_thres * sound_speed; - hist_bin_width = time_thres / num_bins; - - // TODO: the for loop can be parallelized over num_rays by creating - // `num_threads` histograms and then sum-reducing them into a single - // histogram. - scalar_t delta = 2. / num_rays; - scalar_t increment = M_PI * (3. - std::sqrt(5.)); // phi increment - - for (auto i = 0; i < num_rays; ++i) { - auto z = (i * delta - 1) + delta / 2.; - auto rho = std::sqrt(1. - z * z); - - scalar_t phi = i * increment; - - auto x = cos(phi) * rho; - auto y = sin(phi) * rho; - - auto azimuth = atan2(y, x); - auto colatitude = atan2(std::sqrt(x * x + y * y), z); - - auto dir = torch::tensor( - {sin(colatitude) * cos(azimuth), - sin(colatitude) * sin(azimuth), - cos(colatitude)}, - room.scalar_type()); - - simul_ray(energies, origin, dir, histograms); - } - return histograms.transpose(1, 2); // (num_mics, num_bands, num_bins) - } - - private: - /// Get the bin index from the distance traveled to a mic. - inline int get_bin_idx(scalar_t travel_dist_at_mic) { - auto time_at_mic = travel_dist_at_mic / sound_speed; - return (int)floor(time_at_mic / hist_bin_width); - } - - /// - /// Traces a single ray. phi (horizontal) and theta (vectorical) are the - /// angles of the ray from the source. Theta is 0 for 2D rooms. When a ray - /// intersects a wall, it is reflected and part of its energy is absorbed. It - /// is also scattered (sent directly to the microphone(s)) according to the - /// scattering coefficient. When a ray is close to the microphone, its current - /// energy is recoreded in the output histogram for that given time slot. - /// - /// See also: - /// https://github.com/LCAV/pyroomacoustics/blob/df8af24c88a87b5d51c6123087cd3cd2d361286a/pyroomacoustics/libroom_src/room.cpp#L855-L986 - void simul_ray( - torch::Tensor& energies, - torch::Tensor origin, - torch::Tensor dir, - torch::Tensor& histograms) { - auto travel_dist = 0.; - // To count the number of times the ray bounces on the walls - // For hybrid generation we add a ray to output only if specular_counter - // is higher than the ism order. - int specular_counter = 0; - while (true) { - // Find the next hit point - auto [hit_point, next_wall_index, hit_distance] = - find_collision_wall(room, origin, dir); - - auto& wall = walls[next_wall_index]; - - // Check if the specular ray hits any of the microphone - if (!(IS_HYBRID_SIM && specular_counter < ISM_ORDER)) { - // Compute the distance between the line defined by (origin, hit_point) - // and the center of the microphone (mic_pos) - - for (auto mic_idx = 0; mic_idx < mic_array.size(0); mic_idx++) { - // - // _ o microphone - // to_mic / | ^ - // / | wall - // / | mic radious | | - // origin / | | | - // / v | | - // x ---------------------------> |x| collision - // - // | <--------> | - // impact_distance - // | <--------------------------> | - // hit_distance - // - torch::Tensor to_mic = mic_array[mic_idx] - origin; - scalar_t impact_distance = VAL(to_mic.dot(dir)); - - // mic is further than the collision point. - // So microphone did not pick up the sound. - if (!IN_RANGE(impact_distance, hit_distance)) { - continue; - } - - // If the ray hit the coverage of the mic, compute the energy - if (NORM(to_mic - dir * impact_distance) < mic_radius + EPS) { - // The length of this last hop - auto travel_dist_at_mic = travel_dist + std::abs(impact_distance); - auto bin_idx = get_bin_idx(travel_dist_at_mic); - if (bin_idx >= histograms.size(1)) { - continue; - } - auto coeff = get_energy_coeff(travel_dist_at_mic, mic_radius_sq); - auto energy = energies / coeff; - histograms[mic_idx][bin_idx] += energy; - } - } - } - - travel_dist += hit_distance; - energies *= wall.reflection; - - // Let's shoot the scattered ray induced by the rebound on the wall - if (do_scattering) { - scat_ray(histograms, wall, energies, origin, hit_point, travel_dist); - energies *= (1. - wall.scattering); - } - - // Check if we reach the thresholds for this ray - if (travel_dist > distance_thres || VAL(energies.max()) < energy_thres) { - break; - } - - // set up for next iteration - specular_counter += 1; - dir = reflect(wall, dir); - origin = hit_point; - } - } - - /// - /// Scatters a ray towards the microphone(s), i.e. records its scattered - /// energy in the histogram. Called when a ray hits a wall. - /// - /// See also: - /// https://github.com/LCAV/pyroomacoustics/blob/df8af24c88a87b5d51c6123087cd3cd2d361286a/pyroomacoustics/libroom_src/room.cpp#L761-L853 - void scat_ray( - torch::Tensor& histograms, - const Wall& wall, - const torch::Tensor& energies, - const torch::Tensor& prev_hit_point, - const torch::Tensor& hit_point, - scalar_t travel_dist) { - for (auto mic_idx = 0; mic_idx < mic_array.size(0); mic_idx++) { - auto mic_pos = mic_array[mic_idx]; - if (side(wall, mic_pos) != side(wall, prev_hit_point)) { - continue; - } - - // As the ray is shot towards the microphone center, - // the hop dist can be easily computed - torch::Tensor hit_point_to_mic = mic_pos - hit_point; - auto hop_dist = NORM(hit_point_to_mic); - auto travel_dist_at_mic = travel_dist + hop_dist; - - // compute the scattered energy reaching the microphone - auto h_sq = hop_dist * hop_dist; - auto p_hit_equal = 1. - std::sqrt(1. - mic_radius_sq / h_sq); - // cosine angle should be positive, but could be negative if normal is - // facing out of room so we take abs - auto p_lambert = (scalar_t)2. * std::abs(cosine(wall, hit_point_to_mic)); - auto scat_trans = wall.scattering * energies * p_hit_equal * p_lambert; - - if (travel_dist_at_mic < distance_thres && - MAX(scat_trans) > energy_thres) { - auto coeff = get_energy_coeff(travel_dist_at_mic, mic_radius_sq); - auto energy = scat_trans / coeff; - histograms[mic_idx][get_bin_idx(travel_dist_at_mic)] += energy; - } - } - } -}; - -/// -/// @brief Compute energy histogram via ray tracing. See Python wrapper for -/// detail about parameters and output. -/// -torch::Tensor ray_tracing( - const torch::Tensor& room, - const torch::Tensor& source, - const torch::Tensor& mic_array, - int64_t num_rays, - const torch::Tensor& absorption, - const torch::Tensor& scattering, - double mic_radius, - double sound_speed, - double energy_thres, - double time_thres, // TODO: rename to duration - double hist_bin_size) { - // TODO: Raise this to Python layer - auto num_bins = (int)ceil(time_thres / hist_bin_size); - return AT_DISPATCH_FLOATING_TYPES(room.scalar_type(), "ray_tracing_3d", [&] { - RayTracer rt(room, absorption, scattering, mic_array, mic_radius); - return rt.compute_histograms( - source, num_rays, time_thres, energy_thres, sound_speed, num_bins); - }); -} - -TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { - m.impl("torchaudio::ray_tracing", torchaudio::rir::ray_tracing); -} - -TORCH_LIBRARY_FRAGMENT(torchaudio, m) { - m.def( - "torchaudio::ray_tracing(Tensor room, Tensor source, Tensor mic_array, int num_rays, Tensor absorption, Tensor scattering, float mic_radius, float sound_speed, float energy_thres, float time_thres, float hist_bin_size) -> Tensor"); -} - -} // namespace -} // namespace rir -} // namespace torchaudio diff --git a/src/libtorchaudio/rir/rir.cpp b/src/libtorchaudio/rir/rir.cpp deleted file mode 100644 index 483f0e6344..0000000000 --- a/src/libtorchaudio/rir/rir.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* -Copyright (c) 2014-2017 EPFL-LCAV -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -/** - * Image source method implementation based on PyRoomAcoustics: - * https://github.com/LCAV/pyroomacoustics - */ -#include -#include -#include -using namespace torch::indexing; - -namespace torchaudio { -namespace rir { - -namespace { -/** - * @brief Sum up impulse response signal of all image sources into one Tensor - * based on delays of arrival of the image sources. The implementation is based - * on the one in pyroomacoustics: - * https://github.com/LCAV/pyroomacoustics/blob/master/pyroomacoustics/build_rir.pyx - * - * @tparam scalar_t The type of irs and rirs Tensor - * @param irs The impulse responses for all image sources. Tensor with - * dimensions `(num_band, num_image, num_mic, ir_length)`. - * @param delay The delays for the impulse response of each image source. Tensor - * with dimensions `(num_inage, num_mic)`. - * @param rirs The output room impulse response signal. Tensor with dimensions - * `(num_band, num_mic, rir_length)`. - * @param num_band The number of frequency bands for the wall materials. - * @param num_image The number of image sources in irs. - * @param num_mic The number of microphones in the array. - * @param ir_length The length of impulse response signal. - */ -template -void simulate_rir_impl( - const torch::Tensor& irs, - const torch::Tensor& delay, - const int64_t rir_length, - const int64_t num_band, - const int64_t num_image, - const int64_t num_mic, - const int64_t ir_length, - torch::Tensor& rirs) { - const scalar_t* input_data = irs.data_ptr(); - const int* delay_data = delay.data_ptr(); - scalar_t* output_data = rirs.data_ptr(); - for (auto i = 0; i < num_band * num_image * num_mic; i++) { - int64_t offset_input = i * ir_length; - int64_t mic = i % num_mic; - int64_t image = ((i - mic) / num_mic) % num_image; - int64_t band = (i - mic - image * num_mic) / (num_image * num_mic); - int64_t offset_output = (band * num_mic + mic) * rir_length; - int64_t offset_delay = image * num_mic + mic; - for (auto j = 0; j < ir_length; j++) { - output_data[offset_output + j + delay_data[offset_delay]] += - input_data[offset_input + j]; - } - } -} - -/** - * @brief Sum up impulse response signal of all image sources into one Tensor - * based on delays of arrival of the image sources. - * - * @param irs The impulse responses for all image sources. Tensor with - * dimensions `(num_band, num_image, num_mic, ir_length)`. - * @param delay The delays for the impulse response of each image source. Tensor - * with dimensions `(num_inage, num_mic)`. - * @param rir_length The length of the output room impulse response signal. - * @return torch::Tensor The output room impulse response signal. Tensor with - * dimensions `(num_band, num_mic, rir_length)`. - */ -torch::Tensor simulate_rir( - const torch::Tensor& irs, - const torch::Tensor& delay, - const int64_t rir_length) { - const int64_t num_band = irs.size(0); - const int64_t num_image = irs.size(1); - const int64_t num_mic = irs.size(2); - const int64_t ir_length = irs.size(3); - torch::Tensor rirs = - torch::zeros({num_band, num_mic, rir_length}, irs.dtype()); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(irs.scalar_type(), "build_rir", [&] { - simulate_rir_impl( - irs, delay, rir_length, num_band, num_image, num_mic, ir_length, rirs); - }); - return rirs; -} - -/** - * @brief Create the band-pass filters for the octave bands. - * The implementation is based on the one in pyroomacoustics: - * https://github.com/LCAV/pyroomacoustics/blob/master/pyroomacoustics/acoustics.py#L261 - * - * @tparam scalar_t The type of center frequencies and output filter Tensors. - * @param centers The Tensor that stores the center frequencies of octave bands. - * Tensor with dimension `(num_band,)`. - * @param sample_rate The sample_rate of simulated room impulse response signal. - * @param n_fft The number of fft points. - * @param filters The output band-pass filter. Tensor with dimensions - * `(num_band, n_fft - 1)`. - */ -template -void make_rir_filter_impl( - torch::Tensor& centers, - double sample_rate, - int64_t n_fft, - torch::Tensor& filters) { - int64_t n = centers.size(0); - torch::Tensor new_bands = torch::zeros({n, 2}, centers.dtype()); - scalar_t* newband_data = new_bands.data_ptr(); - const scalar_t* centers_data = centers.data_ptr(); - for (int64_t i = 0; i < n; i++) { - if (i == 0) { - newband_data[i * 2] = centers_data[0] / 2; - newband_data[i * 2 + 1] = centers_data[1]; - } else if (i == n - 1) { - newband_data[i * 2] = centers_data[n - 2]; - newband_data[i * 2 + 1] = sample_rate / 2; - } else { - newband_data[i * 2] = centers_data[i - 1]; - newband_data[i * 2 + 1] = centers_data[i + 1]; - } - } - const auto half = 0.5; - auto n_freq = n_fft / 2 + 1; - torch::Tensor freq_resp = torch::zeros({n_freq, n}, centers.dtype()); - torch::Tensor freq = - torch::arange(n_freq, centers.dtype()) / n_fft * sample_rate; - const scalar_t* freq_data = freq.data_ptr(); - scalar_t* freqreq_data = freq_resp.data_ptr(); - - for (auto i = 0; i < n; i++) { - for (auto j = 0; j < n_freq; j++) { - if (freq_data[j] >= newband_data[i * 2] && - freq_data[j] < centers_data[i]) { - freqreq_data[j * n + i] = - half * (1 + cos(2 * M_PI * freq_data[j] / centers_data[i])); - } - if (i != n - 1 && freq_data[j] >= centers_data[i] && - freq_data[j] < newband_data[i * 2 + 1]) { - freqreq_data[j * n + i] = - half * (1 - cos(2 * M_PI * freq_data[j] / newband_data[i * 2 + 1])); - } - if (i == n - 1 && centers_data[i] <= freq_data[j]) { - freqreq_data[j * n + i] = 1.0; - } - } - } - filters = torch::fft::fftshift(torch::fft::irfft(freq_resp, n_fft, 0), 0); - filters = filters.index({Slice(1)}).transpose(0, 1); -} - -/** - * @brief Create the band-pass filters for the octave bands. - * - * @param centers The Tensor that stores the center frequencies of octave bands. - * Tensor with dimension `(num_band,)`. - * @param sample_rate The sample_rate of simulated room impulse response signal. - * @param n_fft The number of fft points. - * @return torch::Tensor The output band-pass filter. Tensor with dimensions - * `(num_band, n_fft - 1)`. - */ -torch::Tensor make_rir_filter( - torch::Tensor centers, - double sample_rate, - int64_t n_fft) { - torch::Tensor filters; - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - centers.scalar_type(), "make_filter", [&] { - make_rir_filter_impl(centers, sample_rate, n_fft, filters); - }); - return filters; -} - -TORCH_LIBRARY_IMPL(torchaudio, CPU, m) { - m.impl("torchaudio::_simulate_rir", torchaudio::rir::simulate_rir); - m.impl("torchaudio::_make_rir_filter", torchaudio::rir::make_rir_filter); -} - -TORCH_LIBRARY_FRAGMENT(torchaudio, m) { - m.def( - "torchaudio::_simulate_rir(Tensor irs, Tensor delay_i, int rir_length) -> Tensor"); - m.def( - "torchaudio::_make_rir_filter(Tensor centers, float sample_rate, int n_fft) -> Tensor"); -} - -} // Anonymous namespace -} // namespace rir -} // namespace torchaudio diff --git a/src/libtorchaudio/rir/wall.h b/src/libtorchaudio/rir/wall.h deleted file mode 100644 index 4185721fad..0000000000 --- a/src/libtorchaudio/rir/wall.h +++ /dev/null @@ -1,198 +0,0 @@ -#pragma once -#include - -#define EPS ((scalar_t)(1e-5)) -#define SCALAR(x) ((x).template item()) - -namespace torchaudio { -namespace rir { - -//////////////////////////////////////////////////////////////////////////////// -// Basic Wall implementation -//////////////////////////////////////////////////////////////////////////////// - -/// Wall helper class. A wall records its own absorption, reflection and -/// scattering coefficient, and exposes a few methods for geometrical operations -/// (e.g. reflection of a ray) -template -struct Wall { - const torch::Tensor origin; - const torch::Tensor normal; - const torch::Tensor scattering; - const torch::Tensor reflection; - - Wall( - const torch::ArrayRef& origin, - const torch::ArrayRef& normal, - const torch::Tensor& absorption, - const torch::Tensor& scattering) - : origin(torch::tensor(origin).to(scattering.dtype())), - normal(torch::tensor(normal).to(scattering.dtype())), - scattering(scattering), - reflection(1. - absorption) {} -}; - -/// Returns the side (-1, 1 or 0) on which a point lies w.r.t. the wall. -template -int side(const Wall& wall, const torch::Tensor& pos) { - auto dot = SCALAR((pos - wall.origin).dot(wall.normal)); - - if (dot > EPS) { - return 1; - } else if (dot < -EPS) { - return -1; - } else { - return 0; - } -} - -/// Reflects a ray (dir) on the wall. Preserves norm of vector. -template -torch::Tensor reflect(const Wall& wall, const torch::Tensor& dir) { - return dir - wall.normal * 2 * dir.dot(wall.normal); -} - -/// Returns the cosine angle of a ray (dir) with the normal of the wall -template -scalar_t cosine(const Wall& wall, const torch::Tensor& dir) { - return SCALAR(dir.dot(wall.normal) / dir.norm()); -} - -//////////////////////////////////////////////////////////////////////////////// -// Room (multiple walls) and interactions -//////////////////////////////////////////////////////////////////////////////// - -/// Creates a shoebox room consists of multiple walls. -/// Normals are vectors facing *outwards* the room, and origins are arbitrary -/// corners of each wall. -/// -/// Note: -/// The wall has to be ordered in the following way: -/// - parallel walls are next (W/E, S/N, and F/C) -/// - The one closer to the origin must come first. (W -> E, S -> N, F -> C) -/// - The order of wall pair must be W/E, S/N, then F/C because -/// `find_collision_wall` will search in the order x, y, z and -/// wall pairs must be distibguishable on these axis. - -/// 3D room -template -const std::array, 6> make_room( - const T& w, - const T& l, - const T& h, - const torch::Tensor& abs, - const torch::Tensor& scat) { - using namespace torch::indexing; -#define SLICE(x, i) x.index({Slice(), i}) - return { - Wall({0, l, 0}, {-1, 0, 0}, SLICE(abs, 0), SLICE(scat, 0)), // West - Wall({w, 0, 0}, {1, 0, 0}, SLICE(abs, 1), SLICE(scat, 1)), // East - Wall({0, 0, 0}, {0, -1, 0}, SLICE(abs, 2), SLICE(scat, 2)), // South - Wall({w, l, 0}, {0, 1, 0}, SLICE(abs, 3), SLICE(scat, 3)), // North - Wall({w, 0, 0}, {0, 0, -1}, SLICE(abs, 4), SLICE(scat, 4)), // Floor - Wall({w, 0, h}, {0, 0, 1}, SLICE(abs, 5), SLICE(scat, 5)) // Ceiling - }; -#undef SLICE -} - -/// Find a wall that the given ray hits. -/// The room is assumed to be shoebox room and the walls are constructed -/// in the order used in `make_room`. -/// The room is shoebox-shape and the ray travels infinite distance -/// so that it does hit one of the walls. -/// See also: -/// https://github.com/LCAV/pyroomacoustics/blob/df8af24c88a87b5d51c6123087cd3cd2d361286a/pyroomacoustics/libroom_src/room.cpp#L609-L716 -template -std::tuple find_collision_wall( - const torch::Tensor& room, - const torch::Tensor& origin, - const torch::Tensor& direction // Unit-vector -) { -#define BOOL(x) torch::all(x).template item() -#define INSIDE(x, y) (BOOL(-EPS < (x)) && BOOL((x) < (y + EPS))) - - TORCH_INTERNAL_ASSERT_DEBUG_ONLY( - 3 == room.size(0), - "Expected room to be 3 dimension, but received ", - room.sizes()); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY( - 3 == origin.size(0), - "Expected origin to be 3 dimension, but received ", - origin.sizes()); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY( - 3 == direction.size(0), - "Expected direction to be 3 dimension, but received ", - direction.sizes()); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY( - BOOL(room > 0), "Room size should be greater than zero. Found: ", room); - TORCH_INTERNAL_ASSERT_DEBUG_ONLY( - INSIDE(origin, room), - "The origin of ray must be inside the room. Origin: ", - origin, - ", room: ", - room); - - // i is the coordinate in the collision is searched. - for (unsigned int i = 0; i < 3; ++i) { - auto dir0 = SCALAR(direction[i]); - auto abs_dir0 = std::abs(dir0); - // If the ray is almost parallel to a plane, then we delegate the - // computation to the other planes. - if (abs_dir0 < EPS) { - continue; - } - - // Check the distance to the facing wall along the coordinate. - scalar_t distance = (dir0 < 0.) - ? SCALAR(origin[i]) // Going towards origin - : SCALAR(room[i] - origin[i]); // Going away from origin - // sometimes origin is slightly outside of room - if (distance < 0) { - distance = 0.; - } - auto ratio = distance / abs_dir0; - int i_increment = dir0 > 0.; - - // Compute the intersection of ray and the wall - auto intersection = origin + ratio * direction; - - // The intersection can be within the room or outside. - // If it's inside, the collision point is found. - // ^ - // | | Not Good - // ---+-----------+---x---- - // | | / - // | | / - // | |/ - // | x Found - // | /| - // | / | - // | o | - // | | - // ---+-----------+--------> - // O| | - // - - if (INSIDE(intersection, room)) { - int i_wall = 2 * i + i_increment; - auto dist = SCALAR((intersection - origin).norm()); - return std::make_tuple(intersection, i_wall, dist); - } - } - // This should not happen - TORCH_INTERNAL_ASSERT( - false, - "Failed to find the intersection. room: ", - room, - " origin: ", - origin, - " direction: ", - direction); -#undef INSIDE -#undef BOOL -} -} // namespace rir -} // namespace torchaudio - -#undef EPS -#undef SCALAR diff --git a/src/libtorchaudio/utils.cpp b/src/libtorchaudio/utils.cpp index 4789f0c0a8..4318c4ef62 100644 --- a/src/libtorchaudio/utils.cpp +++ b/src/libtorchaudio/utils.cpp @@ -7,14 +7,6 @@ namespace torchaudio { -bool is_rir_available() { -#ifdef INCLUDE_RIR - return true; -#else - return false; -#endif -} - bool is_align_available() { #ifdef INCLUDE_ALIGN return true; diff --git a/src/libtorchaudio/utils.h b/src/libtorchaudio/utils.h index ffab65cd38..d66e243298 100644 --- a/src/libtorchaudio/utils.h +++ b/src/libtorchaudio/utils.h @@ -2,7 +2,6 @@ #include namespace torchaudio { -bool is_rir_available(); bool is_align_available(); std::optional cuda_version(); } // namespace torchaudio diff --git a/src/torchaudio/_extension/__init__.py b/src/torchaudio/_extension/__init__.py index 9b8bd21d5e..16f5dac741 100644 --- a/src/torchaudio/_extension/__init__.py +++ b/src/torchaudio/_extension/__init__.py @@ -16,7 +16,6 @@ __all__ = [ "_check_cuda_version", "_IS_TORCHAUDIO_EXT_AVAILABLE", - "_IS_RIR_AVAILABLE", ] @@ -28,10 +27,6 @@ # In case of an error, we do not catch the failure as it suggests there is something # wrong with the installation. _IS_TORCHAUDIO_EXT_AVAILABLE = is_module_available("torchaudio.lib._torchaudio") -# RIR features are implemented in _torchaudio extension, but they can be individually -# turned on/off at build time. Available means that _torchaudio is loaded properly, and -# RIR features are found there. -_IS_RIR_AVAILABLE = False _IS_ALIGN_AVAILABLE = False if _IS_TORCHAUDIO_EXT_AVAILABLE: _load_lib("libtorchaudio") @@ -39,18 +34,9 @@ import torchaudio.lib._torchaudio # noqa _check_cuda_version() - _IS_RIR_AVAILABLE = torchaudio.lib._torchaudio.is_rir_available() _IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available() -fail_if_no_rir = ( - no_op - if _IS_RIR_AVAILABLE - else fail_with_message( - "requires RIR extension, but TorchAudio is not compiled with it. Please build TorchAudio with RIR support." - ) -) - fail_if_no_align = ( no_op if _IS_ALIGN_AVAILABLE diff --git a/test/cpp/CMakeLists.txt b/test/cpp/CMakeLists.txt deleted file mode 100644 index f12216ba28..0000000000 --- a/test/cpp/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -include(FetchContent) -FetchContent_Declare( - googletest - URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip -) - -# For Windows: Prevent overriding the parent project's compiler/linker settings -set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -FetchContent_MakeAvailable(googletest) - -enable_testing() - -add_executable( - wall_collision - rir/wall_collision.cpp -) -target_link_libraries( - wall_collision - torch - GTest::gtest_main -) -target_include_directories( - wall_collision - PRIVATE - "${PROJECT_SOURCE_DIR}/src" -) -add_test(NAME wall_collision_test COMMAND wall_collision) diff --git a/test/cpp/rir/wall_collision.cpp b/test/cpp/rir/wall_collision.cpp deleted file mode 100644 index bf86eef9cd..0000000000 --- a/test/cpp/rir/wall_collision.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include - -using namespace torchaudio::rir; - -using DTYPE = double; - -struct CollisionTestParam { - // Input - torch::Tensor origin; - torch::Tensor direction; - // Expected - torch::Tensor hit_point; - int next_wall_index; - DTYPE hit_distance; -}; - -CollisionTestParam par( - torch::ArrayRef origin, - torch::ArrayRef direction, - torch::ArrayRef hit_point, - int next_wall_index, - DTYPE hit_distance) { - auto dir = torch::tensor(direction); - return { - torch::tensor(origin), - dir / dir.norm(), - torch::tensor(hit_point), - next_wall_index, - hit_distance}; -} - -////////////////////////////////////////////////////////////////////////////// -// 3D test -////////////////////////////////////////////////////////////////////////////// - -class Simple3DRoomCollisionTest - : public ::testing::TestWithParam {}; - -TEST_P(Simple3DRoomCollisionTest, CollisionTest3D) { - // y z - // ^ ^ - // | 3 | y - // | ______ | / - // | | | | / - // | 0 | | 1 | ______ - // | |______| | / / 4: floor, 5: ceiling - // | 2 |/ / - // -+----------------> x -+--------------> x - // - auto room = torch::tensor({1, 1, 1}); - - auto param = GetParam(); - auto [hit_point, next_wall_index, hit_distance] = - find_collision_wall(room, param.origin, param.direction); - - EXPECT_EQ(param.next_wall_index, next_wall_index); - EXPECT_FLOAT_EQ(param.hit_distance, hit_distance); - EXPECT_NEAR( - param.hit_point[0].item(), hit_point[0].item(), 1e-5); - EXPECT_NEAR( - param.hit_point[1].item(), hit_point[1].item(), 1e-5); - EXPECT_NEAR( - param.hit_point[2].item(), hit_point[2].item(), 1e-5); -} - -#define ISQRT2 0.70710678118 - -INSTANTIATE_TEST_CASE_P( - BasicCollisionTests, - Simple3DRoomCollisionTest, - ::testing::Values( - // From 0 - par({0, .5, .5}, {1.0, 0.0, 0.0}, {1., .5, .5}, 1, 1.0), - par({0, .5, .5}, {1.0, -1., 0.0}, {.5, .0, .5}, 2, ISQRT2), - par({0, .5, .5}, {1.0, 1.0, 0.0}, {.5, 1., .5}, 3, ISQRT2), - par({0, .5, .5}, {1.0, 0.0, -1.}, {.5, .5, .0}, 4, ISQRT2), - par({0, .5, .5}, {1.0, 0.0, 1.0}, {.5, .5, 1.}, 5, ISQRT2), - // From 1 - par({1, .5, .5}, {-1., 0.0, 0.0}, {.0, .5, .5}, 0, 1.0), - par({1, .5, .5}, {-1., -1., 0.0}, {.5, .0, .5}, 2, ISQRT2), - par({1, .5, .5}, {-1., 1.0, 0.0}, {.5, 1., .5}, 3, ISQRT2), - par({1, .5, .5}, {-1., 0.0, -1.}, {.5, .5, .0}, 4, ISQRT2), - par({1, .5, .5}, {-1., 0.0, 1.0}, {.5, .5, 1.}, 5, ISQRT2), - // From 2 - par({.5, 0, .5}, {-1., 1.0, 0.0}, {.0, .5, .5}, 0, ISQRT2), - par({.5, 0, .5}, {1.0, 1.0, 0.0}, {1., .5, .5}, 1, ISQRT2), - par({.5, 0, .5}, {0.0, 1.0, 0.0}, {.5, 1., .5}, 3, 1.0), - par({.5, 0, .5}, {0.0, 1.0, -1.}, {.5, .5, .0}, 4, ISQRT2), - par({.5, 0, .5}, {0.0, 1.0, 1.0}, {.5, .5, 1.}, 5, ISQRT2), - // From 3 - par({.5, 1, .5}, {-1., -1., 0.0}, {.0, .5, .5}, 0, ISQRT2), - par({.5, 1, .5}, {1.0, -1., 0.0}, {1., .5, .5}, 1, ISQRT2), - par({.5, 1, .5}, {0.0, -1., 0.0}, {.5, .0, .5}, 2, 1.0), - par({.5, 1, .5}, {0.0, -1., -1.}, {.5, .5, .0}, 4, ISQRT2), - par({.5, 1, .5}, {0.0, -1., 1.0}, {.5, .5, 1.}, 5, ISQRT2), - // From 4 - par({.5, .5, 0}, {-1., 0.0, 1.0}, {.0, .5, .5}, 0, ISQRT2), - par({.5, .5, 0}, {1.0, 0.0, 1.0}, {1., .5, .5}, 1, ISQRT2), - par({.5, .5, 0}, {0.0, -1., 1.0}, {.5, .0, .5}, 2, ISQRT2), - par({.5, .5, 0}, {0.0, 1.0, 1.0}, {.5, 1., .5}, 3, ISQRT2), - par({.5, .5, 0}, {0.0, 0.0, 1.0}, {.5, .5, 1.}, 5, 1.0), - // From 5 - par({.5, .5, 1}, {-1., 0.0, -1.}, {.0, .5, .5}, 0, ISQRT2), - par({.5, .5, 1}, {1.0, 0.0, -1.}, {1., .5, .5}, 1, ISQRT2), - par({.5, .5, 1}, {0.0, -1., -1.}, {.5, .0, .5}, 2, ISQRT2), - par({.5, .5, 1}, {0.0, 1.0, -1.}, {.5, 1., .5}, 3, ISQRT2), - par({.5, .5, 1}, {0.0, 0.0, -1.}, {.5, .5, .0}, 4, 1.0))); - -INSTANTIATE_TEST_CASE_P( - CornerCollisionTest, - Simple3DRoomCollisionTest, - ::testing::Values( - par({1, 1, 0}, {1., 1., 0.}, {1., 1., 0.}, 1, 0.0), - par({1, 1, 0}, {-1., 1., 0.}, {1., 1., 0.}, 3, 0.0), - par({1, 1, 1}, {1., 1., 1.}, {1., 1., 1.}, 1, 0.0), - par({1, 1, 1}, {-1., 1., 1.}, {1., 1., 1.}, 3, 0.0), - par({1, 1, 1}, {-1., -1., 1.}, {1., 1., 1.}, 5, 0.0))); diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py index 67a70033a8..c464bede69 100644 --- a/test/torchaudio_unittest/common_utils/__init__.py +++ b/test/torchaudio_unittest/common_utils/__init__.py @@ -16,7 +16,6 @@ skipIfNoMacOS, skipIfNoModule, skipIfNoQengine, - skipIfNoRIR, skipIfPy310, skipIfRocm, TempDirMixin, @@ -61,7 +60,6 @@ def inject_request(self, request): "skipIfNoExec", "skipIfNoMacOS", "skipIfNoModule", - "skipIfNoRIR", "skipIfRocm", "skipIfNoQengine", "skipIfNoFFmpeg", diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py index f269d9ea18..e1af754b04 100644 --- a/test/torchaudio_unittest/common_utils/case_utils.py +++ b/test/torchaudio_unittest/common_utils/case_utils.py @@ -9,7 +9,6 @@ from itertools import zip_longest import torch -import torchaudio from torch.testing._internal.common_utils import TestCase as PytorchTestCase from torchaudio._internal.module_utils import eval_env, is_module_available @@ -202,12 +201,6 @@ def skipIfNoModule(module, display_name=None): reason="CUDA does not have enough memory.", key="CUDA_SMALL_MEMORY", ) - -skipIfNoRIR = _skipIf( - not torchaudio._extension._IS_RIR_AVAILABLE, - reason="RIR features are not available.", - key="NO_RIR", -) skipIfNoCtcDecoder = _skipIf( not is_ctc_decoder_available(), reason="CTC decoder not available.", diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py index 635b4ae496..8d442486d7 100644 --- a/tools/setup_helpers/extension.py +++ b/tools/setup_helpers/extension.py @@ -34,7 +34,6 @@ def _get_build(var, default=False): _BUILD_CPP_TEST = _get_build("BUILD_CPP_TEST", False) -_BUILD_RIR = _get_build("BUILD_RIR", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True) _USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None) _USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None) @@ -100,7 +99,6 @@ def build_extension(self, ext): "-DCMAKE_VERBOSE_MAKEFILE=ON", f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}", f"-DBUILD_CPP_TEST={'ON' if _BUILD_CPP_TEST else 'OFF'}", - f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}", f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}", f"-DBUILD_ALIGN:BOOL={'ON' if _BUILD_ALIGN else 'OFF'}", f"-DBUILD_CUDA_CTC_DECODER:BOOL={'ON' if _BUILD_CUDA_CTC_DECODER else 'OFF'}",