From 83286b534d0424992581b803d39d9edbc20ce185 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Wed, 15 Jul 2020 23:40:32 +0000 Subject: [PATCH 1/4] Update documentation and fix docstrings --- docs/source/backend.rst | 138 +++++++++++++++ docs/source/index.rst | 14 +- docs/source/sox_effects.rst | 39 ++++- docs/source/torchaudio.rst | 43 +++++ docs/source/utils.rst | 24 +-- torchaudio/__init__.py | 6 +- torchaudio/backend/common.py | 28 ++++ torchaudio/backend/sox_io_backend.py | 159 ++++++++++++------ torchaudio/backend/utils.py | 19 ++- torchaudio/functional.py | 4 +- torchaudio/models/wav2letter.py | 9 +- torchaudio/sox_effects/sox_effects.py | 231 ++++++++++++++++++-------- torchaudio/transforms.py | 37 +++-- torchaudio/utils/sox_utils.py | 6 +- 14 files changed, 569 insertions(+), 188 deletions(-) create mode 100644 docs/source/backend.rst create mode 100644 docs/source/torchaudio.rst diff --git a/docs/source/backend.rst b/docs/source/backend.rst new file mode 100644 index 0000000000..ffbdd63852 --- /dev/null +++ b/docs/source/backend.rst @@ -0,0 +1,138 @@ +.. _backend: + +torchaudio.backend +================== + +:mod:`torchaudio.backend` module provides implemenrations for audio file I/O, using different backend libraries +To switch backend, use :py:func:`torchaudio.set_audio_backend`. To check the current backend use :py:func:`torchaudio.get_audio_backend`. + +.. warning:: + Although ``sox`` backend is default for backward compatibility reason, it has a number of issues, therefore it is highly recommended to use ``sox_io`` backend instead. Note, however, that due to the interface refinement, functions defined in ``sox`` backend and those defined in ``sox_io`` backend do not have the signatures. + +.. note:: + Instead of calling functions in :mod:`torchaudio.backend` directly, please use ``torchaudio.info``, ``torhcaudio.load``, ``torchaudio.load_wav`` and ``torchaudio.save`` with proper backend set with :func:`torchaudio.get_audio_backend`. + +There are currently three implementations available. + + * :ref:`sox` + * :ref:`sox_io` + * :ref:`soundfile` + +``sox`` backend is the original backend which is built on ``libsox``. This module is currently default but is known to have number of issues, such as wrong handling of WAV files other than 16-bit signed integer. Users are encouraged to use ``sox_io`` backend. This backend requires C++ extension module and is not available on Windows system. + +``sox_io`` backend is the new backend which is built on ``libsox`` and bound to Python with ``Torchscript``. This module is thoroughly tested and addresses all the known issues ``sox`` backend has. Function calls to this backend can be Torchscriptable. This backend requires C++ extension module and is not available on Windows system. + +``soundfile`` backend is built on ``PySoundFile``. You need to install ``PySoundFile`` separately. + +Common Data Structure +~~~~~~~~~~~~~~~~~~~~~ + +Structures used to exchange data between Python interface and ``libsox``. They are used by :ref:`sox` and :ref:`soundfile` but not by :ref:`sox_io`. + +.. autoclass:: torchaudio.backend.common.SignalInfo + +.. autoclass:: torchaudio.backend.common.EncodingInfo + +.. _sox_backend: + +Sox Backend +~~~~~~~~~~~ + +``sox`` backend is available on ``torchaudio`` installation with C++ extension. It is currently not available on Windows system. + +It is currently default backend when it's available. You can switch from another backend to ``sox`` backend with the following; + +.. code:: + + torchaudio.set_audio_backend("sox") + +info +---- + +.. autofunction:: torchaudio.backend.sox_backend.info + +load +---- + +.. autofunction:: torchaudio.backend.sox_backend.load + +.. autofunction:: torchaudio.backend.sox_backend.load_wav + + +save +---- + +.. autofunction:: torchaudio.backend.sox_backend.save + +others +------ + +.. automodule:: torchaudio.backend.sox_backend + :members: + :exclude-members: info, load, load_wav, save + +.. _sox_io_backend: + +Sox IO Backend +~~~~~~~~~~~~~~ + +``sox_io`` backend is available on ``torchaudio`` installation with C++ extension. It is currently not available on Windows system. + +This new backend is recommended over ``sox`` backend. You can switch from another backend to ``sox_io`` backend with the following; + +.. code:: + + torchaudio.set_audio_backend("sox_io") + +The function call to this backend can be Torchsript-able. You can apply :func:`torch.jit.script` and dump the object to file, then call it from C++ application. + +info +---- + +.. autoclass:: torchaudio.backend.sox_io_backend.AudioMetaData + +.. autofunction:: torchaudio.backend.sox_io_backend.info + +load +---- + +.. autofunction:: torchaudio.backend.sox_io_backend.load + +.. autofunction:: torchaudio.backend.sox_io_backend.load_wav + + +save +---- + +.. autofunction:: torchaudio.backend.sox_io_backend.save + +.. _soundfile_backend: + +Soundfile Backend +~~~~~~~~~~~~~~~~~ + +``soundfile`` backend is available when ``PySoundFile`` is installed. This backend works on ``torchaudio`` installation without C++ extension. (i.e. Windows) + +You can switch from another backend to ``soundfile`` backend with the following; + +.. code:: + + torchaudio.set_audio_backend("soundfile") + +info +---- + +.. autofunction:: torchaudio.backend.soundfile_backend.info + +load +---- + +.. autofunction:: torchaudio.backend.soundfile_backend.load + +.. autofunction:: torchaudio.backend.soundfile_backend.load_wav + + +save +---- + +.. autofunction:: torchaudio.backend.soundfile_backend.save diff --git a/docs/source/index.rst b/docs/source/index.rst index cee5075d92..8146e545c3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,5 @@ torchaudio -=========== +========== The :mod:`torchaudio` package consists of I/O, popular datasets and common audio transformations. @@ -7,13 +7,13 @@ The :mod:`torchaudio` package consists of I/O, popular datasets and common audio :maxdepth: 2 :caption: Package Reference - sox_effects + torchaudio + backend + functional + transforms datasets + models + sox_effects compliance.kaldi kaldi_io - transforms - functional utils - -.. automodule:: torchaudio - :members: diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst index 46c0a74552..0aa34e7908 100644 --- a/docs/source/sox_effects.rst +++ b/docs/source/sox_effects.rst @@ -1,27 +1,52 @@ -.. role:: hidden - :class: hidden-section +.. _sox_effects: torchaudio.sox_effects ====================== .. currentmodule:: torchaudio.sox_effects +.. warning:: + + The :py:class:`SoxEffect` and :py:class:`SoxEffectsChain` classes are deprecated. Please migrate to :func:`apply_effects_tensor` and :func:`apply_effects_file`. + +Resource initialization / shutdown +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: init_sox_effects + +.. autofunction:: shutdown_sox_effects + +Listing supported effects +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: effect_names + +Applying effects +~~~~~~~~~~~~~~~~ + Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor. +Applying effects on Tensor +-------------------------- + .. autofunction:: apply_effects_tensor +Applying effects on file +------------------------ + .. autofunction:: apply_effects_file -Create SoX effects chain for preprocessing audio. +Legacy +~~~~~~ -:hidden:`SoxEffect` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +SoxEffect +--------- .. autoclass:: SoxEffect :members: -:hidden:`SoxEffectsChain` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +SoxEffectsChain +--------------- .. autoclass:: SoxEffectsChain :members: append_effect_to_chain, sox_build_flow_effects, clear_chain, set_input_file diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst new file mode 100644 index 0000000000..f7b826b571 --- /dev/null +++ b/docs/source/torchaudio.rst @@ -0,0 +1,43 @@ +torchaudio +========== + +I/O functionalities +~~~~~~~~~~~~~~~~~~~ + +Audio I/O functions are implemented in :ref:`torchaudio.backend` module, but for the ease of use, the following functions are made available on :mod:`torchaudio` module. There are different backends available and you can switch backends with :func:`set_audio_backend`. + +Refer to :ref:`backend` for the detail. + +.. function:: torchaudio.info(filepath: str, ...) + + Fetch meta data of an audio file. Refer to :ref:`backend` for the detail. + +.. function:: torchaudio.load(filepath: str, ...) + + Load audio file into torch.Tensor object. Refer to :ref:`backend` for the detail. + +.. function:: torchaudio.load_wav(filepath: str, ...) + + Load audio file into torch.Tensor, Refer to :ref:`backend` for the detail. + +.. function:: torchaudio.save(filepath: str, src: torch.Tensor, sample_rate: int, ...) + + Save torch.Tensor object into an audio format. Refer to :ref:`backend` for the detail. + +.. currentmodule:: torchaudio + +Backend Utilities +~~~~~~~~~~~~~~~~~ + +.. autofunction:: list_audio_backends + +.. autofunction:: get_audio_backend + +.. autofunction:: set_audio_backend + +Sox Effects Utilities +~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: initialize_sox + +.. autofunction:: shutdown_sox diff --git a/docs/source/utils.rst b/docs/source/utils.rst index b56aabb7bb..dc5ad0fd73 100644 --- a/docs/source/utils.rst +++ b/docs/source/utils.rst @@ -1,21 +1,11 @@ -.. role:: hidden - :class: hidden-section +torchaudio.utils +================ torchaudio.utils.sox_utils -========================== +~~~~~~~~~~~~~~~~~~~~~~~~~~ -Utility module to configure libsox. This affects functionalities in ``sox_io`` backend and ``torchaudio.sox_effects``. +Utility module to configure libsox. +This affects functionalities in :ref:`Sox IO backend` and :ref:`Sox Effects`. -.. currentmodule:: torchaudio.utils.sox_utils - -.. autofunction:: set_seed - -.. autofunction:: set_verbosity - -.. autofunction:: set_buffer_size - -.. autofunction:: set_use_threads - -.. autofunction:: list_effects - -.. autofunction:: list_formats +.. automodule:: torchaudio.utils.sox_utils + :members: diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py index f748d861fd..9a6a5e3c16 100644 --- a/torchaudio/__init__.py +++ b/torchaudio/__init__.py @@ -35,10 +35,10 @@ @_mod_utils.deprecated( "Please remove the function call to initialize_sox. " "Resource initialization is now automatically handled.") -def initialize_sox() -> int: +def initialize_sox(): """Initialize sox effects. - This function is deprecated. See ``torchaudio.sox_effects.init_sox_effects`` + This function is deprecated. See :py:func:`torchaudio.sox_effects.init_sox_effects` """ _init_sox_effects() @@ -51,6 +51,6 @@ def initialize_sox() -> int: def shutdown_sox(): """Shutdown sox effects. - This function is deprecated. See ``torchaudio.sox_effects.shutdown_sox_effects`` + This function is deprecated. See :py:func:`torchaudio.sox_effects.shutdown_sox_effects` """ _shutdown_sox_effects() diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 0593c34bd5..47249e739e 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -2,6 +2,19 @@ class SignalInfo: + """Data class returned ``info`` functions. + + Used by :py:func:`torchaudio.backend.sox_backend.info` and + :py:func:`torchaudio.backend.soundfile_backend.info` + + See https://fossies.org/dox/sox-14.4.2/structsox__signalinfo__t.html + + :ivar Optional[int] channels: The number of channels + :ivar Optional[float] rate: Sampleing rate + :ivar Optional[int] precision: Bit depth + :ivar Optional[int] length: For :ref:`sox backend`, the number of samples. + (frames * channels). For :ref:`soundfile backend`, the number of frames. + """ def __init__(self, channels: Optional[int] = None, rate: Optional[float] = None, @@ -14,6 +27,21 @@ def __init__(self, class EncodingInfo: + """Data class returned ``info`` functions. + + Used by :py:func:`torchaudio.backend.sox_backend.info` and + :py:func:`torchaudio.backend.soundfile_backend.info` + + See https://fossies.org/dox/sox-14.4.2/structsox__encodinginfo__t.html + + :ivar Optional[int] encoding: sox_encoding_t + :ivar Optional[int] bits_per_sample: bit depth + :ivar Optional[float] compression: Compression option + :ivar Any reverse_bytes: + :ivar Any reverse_nibbles: + :ivar Any reverse_bits: + :ivar Optional[bool] opposite_endian: + """ def __init__(self, encoding: Any = None, bits_per_sample: Optional[int] = None, diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 4664f733c6..57d8db7723 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -7,6 +7,12 @@ class AudioMetaData: + """Data class to be returned by :py:func:`~torchaudio.backend.sox_io_backend.info`. + + :ivar int sample_rate: Sample rate + :ivar int num_frames: The number of frames + :ivar int num_channels: The number of channels + """ def __init__(self, sample_rate: int, num_frames: int, num_channels: int): self.sample_rate = sample_rate self.num_frames = num_frames @@ -15,7 +21,14 @@ def __init__(self, sample_rate: int, num_frames: int, num_channels: int): @_mod_utils.requires_module('torchaudio._torchaudio') def info(filepath: str) -> AudioMetaData: - """Get signal information of an audio file.""" + """Get signal information of an audio file. + + Args: + filepath (str): Path to audio file + + Returns: + AudioMetaData: meta data of the given audio. + """ sinfo = torch.ops.torchaudio.sox_io_get_info(filepath) return AudioMetaData(sinfo.get_sample_rate(), sinfo.get_num_frames(), sinfo.get_num_channels()) @@ -30,21 +43,28 @@ def load( ) -> Tuple[torch.Tensor, int]: """Load audio data from file. - This function can handle all the codecs that underlying libsox can handle, however note the - followings. - Note: - This function is tested on the following formats; - - WAV - - 32-bit floating-point - - 32-bit signed integer - - 16-bit signed integer - - 8-bit unsigned integer - - MP3 - - FLAC - - OGG/VORBIS - - By default, this function returns Tensor with ``float32`` dtype and the shape of ``[channel, time]``. + This function can handle all the codecs that underlying libsox can handle, + however it is tested on the following formats; + + * WAV + + * 32-bit floating-point + * 32-bit signed integer + * 16-bit signed integer + * 8-bit unsigned integer + + * MP3 + * FLAC + * OGG/VORBIS + * OPUS + + To load ``MP3``, ``FLAC``, ``OGG/VORBIS``, ``OPUS`` and other codecs ``libsox`` does not + handle natively, your installation of ``torchaudio`` has to be linked to ``libsox`` + and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc. + + By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with + ``float32`` dtype and the shape of ``[channel, time]``. The samples are normalized to fit in the range of ``[-1.0, 1.0]``. When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit @@ -54,24 +74,33 @@ def load( for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM. ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as - flac and mp3. For these formats, this function always returns ``float32`` Tensor with values - normalized to ``[-1.0, 1.0]``. + ``flac`` and ``mp3``. + For these formats, this function always returns ``float32`` Tensor with values normalized to + ``[-1.0, 1.0]``. Args: - filepath: Path to audio file - frame_offset: Number of frames to skip before start reading data. - num_frames: Maximum number of frames to read. -1 reads all the remaining samples, starting - from ``frame_offset``. This function may return the less number of frames if there is - not enough frames in the given file. - normalize: When ``True``, this function always return ``float32``, and sample values are - normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change - the resulting Tensor type to integer type. This argument has no effect for formats other - than integer WAV type. - channels_first: When True, the returned Tensor has dimension ``[channel, time]``. + filepath (str): + Path to audio file + frame_offset (int): + Number of frames to skip before start reading data. + num_frames (int): + Maximum number of frames to read. ``-1`` reads all the remaining samples, + starting from ``frame_offset``. + This function may return the less number of frames if there is not enough + frames in the given file. + normalize (bool): + When ``True``, this function always return ``float32``, and sample values are + normalized to ``[-1.0, 1.0]``. + If input file is integer WAV, giving ``False`` will change the resulting Tensor type to + integer type. + This argument has no effect for formats other than integer WAV type. + channels_first (bool): + When True, the returned Tensor has dimension ``[channel, time]``. Otherwise, the returned Tensor's dimension is ``[time, channel]``. Returns: - torch.Tensor: If the input file has integer wav format and normalization is off, then it has + torch.Tensor: + If the input file has integer wav format and normalization is off, then it has integer type, else ``float32`` type. If ``channels_first=True``, it has ``[channel, time]`` else ``[time, channel]``. """ @@ -83,37 +112,49 @@ def load( @_mod_utils.requires_module('torchaudio._torchaudio') def save( filepath: str, - tensor: torch.Tensor, + src: torch.Tensor, sample_rate: int, channels_first: bool = True, compression: Optional[float] = None, ): """Save audio data to file. - Supported formats are; - - WAV - - 32-bit floating-point - - 32-bit signed integer - - 16-bit signed integer - - 8-bit unsigned integer - - MP3 - - FLAC - - OGG/VORBIS + Note: + Supported formats are; + + * WAV + + * 32-bit floating-point + * 32-bit signed integer + * 16-bit signed integer + * 8-bit unsigned integer + + * MP3 + * FLAC + * OGG/VORBIS + + To save ``MP3``, ``FLAC``, ``OGG/VORBIS``, and other codecs ``libsox`` does not + handle natively, your installation of ``torchaudio`` has to be linked to ``libsox`` + and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc. Args: - filepath: Path to save file. - tensor: Audio data to save. must be 2D tensor. - sample_rate: sampling rate - channels_first: If True, the given tensor is interpreted as ``[channel, time]``. - compression: Used for formats other than WAV. This corresponds to ``-C`` option - of ``sox`` command. + filepath (str): Path to save file. + tensor (torch.Tensor): Audio data to save. must be 2D tensor. + sample_rate (int): sampling rate + channels_first (bool): + If ``True``, the given tensor is interpreted as ``[channel, time]``, + otherwise ``[time, channel]``. + compression (Optional[float]): + Used for formats other than WAV. This corresponds to ``-C`` option of ``sox`` command. + + * | ``MP3``: Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or + | VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``. + * | ``FLAC``: compression level. Whole number from ``0`` to ``8``. + | ``8`` is default and highest compression. + * | ``OGG/VORBIS``: number from ``-1`` to ``10``; ``-1`` is the highest compression + | and lowest quality. Default: ``3``. + See the detail at http://sox.sourceforge.net/soxformat.html. - - MP3: Either bitrate [kbps] with quality factor, such as ``128.2`` or - VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5`` - - FLAC: compression level. Whole number from ``0`` to ``8``. - ``8`` is default and highest compression. - - OGG/VORBIS: number from -1 to 10; -1 is the highest compression and lowest - quality. Default: ``3``. """ if compression is None: ext = str(filepath)[-3:].lower() @@ -127,8 +168,22 @@ def save( compression = 3. else: raise RuntimeError(f'Unsupported file type: "{ext}"') - signal = torch.classes.torchaudio.TensorSignal(tensor, sample_rate, channels_first) + signal = torch.classes.torchaudio.TensorSignal(src, sample_rate, channels_first) torch.ops.torchaudio.sox_io_save_audio_file(filepath, signal, compression) -load_wav = load +@_mod_utils.requires_module('torchaudio._torchaudio') +def load_wav( + filepath: str, + frame_offset: int = 0, + num_frames: int = -1, + channels_first: bool = True, +) -> Tuple[torch.Tensor, int]: + """Load wave file. + + + This function is defined only for the purpose of compatibility against other backend + for simple usecases, such as ``torchaudio.load_wav(filepath)``. + The implementation is same as :py:func:`load`. + """ + return load(filepath, frame_offset, num_frames, normalize=False, channels_first=channels_first) diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py index cb53b3e02f..84ec2fa3f5 100644 --- a/torchaudio/backend/utils.py +++ b/torchaudio/backend/utils.py @@ -19,7 +19,11 @@ def list_audio_backends() -> List[str]: - """List available backends""" + """List available backends + + Returns: + List[str]: The list of available backends. + """ backends = [] if is_module_available('soundfile'): backends.append('soundfile') @@ -29,12 +33,13 @@ def list_audio_backends() -> List[str]: return backends -def set_audio_backend(backend: Optional[str]) -> None: +def set_audio_backend(backend: Optional[str]): """Set the backend for I/O operation Args: - backend (str): Name of the backend. One of "sox" or "soundfile", - based on availability of the system. + backend (Optional[str]): Name of the backend. + One of ``"sox"``, ``"sox_io"`` or ``"soundfile"`` based on availability + of the system. If ``None`` is provided the current backend is unassigned. """ if backend is not None and backend not in list_audio_backends(): raise RuntimeError( @@ -68,7 +73,11 @@ def _init_audio_backend(): def get_audio_backend() -> Optional[str]: - """Get the name of the current backend""" + """Get the name of the current backend + + Returns: + Optional[str]: The name of the current backend or ``None`` if no backend is assigned. + """ if torchaudio.load == no_backend.load: return None if torchaudio.load == sox_backend.load: diff --git a/torchaudio/functional.py b/torchaudio/functional.py index 78c8c594c9..713544cc74 100644 --- a/torchaudio/functional.py +++ b/torchaudio/functional.py @@ -1646,11 +1646,11 @@ def compute_deltas( r"""Compute delta coefficients of a tensor, usually a spectrogram: .. math:: - d_t = \frac{\sum_{n=1}^{\text{N}} n (c_{t+n} - c_{t-n})}{2 \sum_{n=1}^{\text{N} n^2} + d_t = \frac{\sum_{n=1}^{\text{N}} n (c_{t+n} - c_{t-n})}{2 \sum_{n=1}^{\text{N}} n^2} where :math:`d_t` is the deltas at time :math:`t`, :math:`c_t` is the spectrogram coeffcients at time :math:`t`, - :math:`N` is (`win_length`-1)//2. + :math:`N` is ``(win_length-1)//2``. Args: specgram (Tensor): Tensor of audio of dimension (..., freq, time) diff --git a/torchaudio/models/wav2letter.py b/torchaudio/models/wav2letter.py index 3466e42dd2..acd4dfe4b3 100644 --- a/torchaudio/models/wav2letter.py +++ b/torchaudio/models/wav2letter.py @@ -1,5 +1,3 @@ -from typing import Optional - from torch import Tensor from torch import nn @@ -7,8 +5,9 @@ class Wav2Letter(nn.Module): - r"""Wav2Letter model architecture from the `"Wav2Letter: an End-to-End ConvNet-based Speech Recognition System" - `_ paper. + r"""Wav2Letter model architecture from the `Wav2Letter an End-to-End ConvNet-based Speech Recognition System`_. + + .. _Wav2Letter an End-to-End ConvNet-based Speech Recognition System: https://arxiv.org/abs/1609.03193 :math:`\text{padding} = \frac{\text{ceil}(\text{kernel} - \text{stride})}{2}` @@ -63,7 +62,7 @@ def __init__(self, num_classes: int = 40, def forward(self, x: Tensor) -> Tensor: r""" Args: - x (Tensor): Tensor of dimension (batch_size, num_features, input_length). + x (torch.Tensor): Tensor of dimension (batch_size, num_features, input_length). Returns: Tensor: Predictor tensor of dimension (batch_size, number_of_classes, input_length). diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py index eab82fdf98..92157d0569 100644 --- a/torchaudio/sox_effects/sox_effects.py +++ b/torchaudio/sox_effects/sox_effects.py @@ -15,31 +15,29 @@ @_mod_utils.requires_module('torchaudio._torchaudio') -def init_sox_effects() -> None: - """Initialize resources required to use ``SoxEffectsChain`` - - You do not need to call this function manually. It is called automatically. - - Once initialized, you do not need to call this function again across the multiple call of - ``SoxEffectsChain.sox_build_flow_effects``, though it is safe to do so as long as - ``shutdown_sox_effects`` is not called yet. - Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and - initializing again will result in error. +def init_sox_effects(): + """Initialize resources required to use sox effects. Note: - This function is not required for simple loading. + You do not need to call this function manually. It is called automatically. + + Once initialized, you do not need to call this function again across the multiple uses of + sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet. + Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing + again will result in error. """ torch.ops.torchaudio.sox_effects_initialize_sox_effects() @_mod_utils.requires_module("torchaudio._torchaudio") -def shutdown_sox_effects() -> None: - """Clean up resources required to use ``SoxEffectsChain`` +def shutdown_sox_effects(): + """Clean up resources required to use sox effects. - You do not need to call this function manually. It is called automatically. + Note: + You do not need to call this function manually. It is called automatically. It is safe to call this function multiple times. - Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and + Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing again will result in error. """ torch.ops.torchaudio.sox_effects_shutdown_sox_effects() @@ -49,10 +47,12 @@ def shutdown_sox_effects() -> None: def effect_names() -> List[str]: """Gets list of valid sox effect names - Returns: list[str] + Returns: + List[str]: list of available effect names. Example - >>> EFFECT_NAMES = torchaudio.sox_effects.effect_names() + >>> torchaudio.sox_effects.effect_names() + ['allpass', 'band', 'bandpass', ... ] """ return list(list_effects().keys()) @@ -66,6 +66,13 @@ def apply_effects_tensor( ) -> Tuple[torch.Tensor, int]: """Apply sox effects to given Tensor + Note: + This function works in the way very similar to ``sox`` command, however there are slight + differences. For example, ``sox`` commnad adds certain effects automatically (such as + ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does + only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also + need to give ``rate`` effect with desired sampling rate.) + Args: tensor (torch.Tensor): Input 2D Tensor. sample_rate (int): Sample rate @@ -79,20 +86,15 @@ def apply_effects_tensor( the same channels order. The shape of the Tensor can be different based on the effects applied. Sample rate can also be different based on the effects applied. - Notes: - This function works in the way very similar to ``sox`` command, however there are slight - differences. For example, ``sox`` commnad adds certain effects automatically (such as - ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does - only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also - need to give ``rate`` effect with desired sampling rate.) - - Examples: + Example - Basic usage + >>> >>> # Defines the effects to apply >>> effects = [ ... ['gain', '-n'], # normalises to 0dB ... ['pitch', '5'], # 5 cent pitch shift ... ['rate', '8000'], # resample to 8000 Hz ... ] + >>> >>> # Generate pseudo wave: >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second >>> sample_rate = 16000 @@ -102,9 +104,12 @@ def apply_effects_tensor( >>> waveform tensor([[ 0.3138, 0.7620, -0.9019, ..., -0.7495, -0.4935, 0.5442], [-0.0832, 0.0061, 0.8233, ..., -0.5176, -0.9140, -0.2434]]) + >>> >>> # Apply effects >>> waveform, sample_rate = apply_effects_tensor( ... wave_form, sample_rate, effects, channels_first=True) + >>> + >>> # Check the result >>> # The new waveform is sampling rate 8000, 1 second. >>> # normalization and channel order are preserved >>> waveform.shape @@ -114,6 +119,40 @@ def apply_effects_tensor( [ 0.1331, 0.0436, -0.3783, ..., -0.0035, 0.0012, 0.0008]]) >>> sample_rate 8000 + + Example - Torchscript-able transform + >>> + >>> # Use `apply_effects_tensor` in `torch.nn.Module` and dump it to file, + >>> # then run sox effect via Torchscript runtime. + >>> + >>> class SoxEffectTransform(torch.nn.Module): + ... effects: List[List[str]] + ... + ... def __init__(self, effects: List[List[str]]): + ... super().__init__() + ... self.effects = effects + ... + ... def forward(self, tensor: torch.Tensor, sample_rate: int): + ... return sox_effects.apply_effects_tensor( + ... tensor, sample_rate, self.effects) + ... + ... + >>> # Create transform object + >>> effects = [ + ... ["lowpass", "-1", "300"], # apply single-pole lowpass filter + ... ["rate", "8000"], # change sample rate to 8000 + ... ] + >>> trans = SoxEffectTensorTransform(effects, input_sample_rate) + >>> + >>> # Dump it to file and load + >>> path = 'sox_effect.zip' + >>> torch.jit.script(trans).save(path) + >>> trans = torch.jit.load(path) + >>> + >>>> # Run transform + >>> waveform, input_sample_rate = torchaudio.load("input.wav") + >>> waveform, sample_rate = trans(waveform, input_sample_rate) + >>> assert sample_rate == 8000 """ in_signal = torch.classes.torchaudio.TensorSignal(tensor, sample_rate, channels_first) out_signal = torch.ops.torchaudio.sox_effects_apply_effects_tensor(in_signal, effects) @@ -129,12 +168,22 @@ def apply_effects_file( ) -> Tuple[torch.Tensor, int]: """Apply sox effects to the audio file and load the resulting data as Tensor + Note: + This function works in the way very similar to ``sox`` command, however there are slight + differences. For example, ``sox`` commnad adds certain effects automatically (such as + ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given + effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate`` + effect with desired sampling rate, because internally, ``speed`` effects only alter sampling + rate and leave samples untouched. + Args: path (str): Path to the audio file. effects (List[List[str]]): List of effects. - normalize (bool): When ``True``, this function always return ``float32``, and sample values are - normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change - the resulting Tensor type to integer type. This argument has no effect for formats other + normalize (bool): + When ``True``, this function always return ``float32``, and sample values are + normalized to ``[-1.0, 1.0]``. + If input file is integer WAV, giving ``False`` will change the resulting Tensor type to + integer type. This argument has no effect for formats other than integer WAV type. channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``. Otherwise, the returned Tensor's dimension is ``[time, channel]``. @@ -147,23 +196,19 @@ def apply_effects_file( If ``channels_first=True``, the resulting Tensor has dimension ``[channel, time]``, otherwise ``[time, channel]``. - Notes: - This function works in the way very similar to ``sox`` command, however there are slight - differences. For example, ``sox`` commnad adds certain effects automatically (such as - ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given - effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate`` - effect with desired sampling rate, because internally, ``speed`` effects only alter sampling - rate and leave samples untouched. - - Examples: + Example - Basic usage + >>> >>> # Defines the effects to apply >>> effects = [ ... ['gain', '-n'], # normalises to 0dB ... ['pitch', '5'], # 5 cent pitch shift ... ['rate', '8000'], # resample to 8000 Hz ... ] + >>> >>> # Apply effects and load data with channels_first=True >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True) + >>> + >>> # Check the result >>> waveform.shape torch.Size([2, 8000]) >>> waveform @@ -173,6 +218,42 @@ def apply_effects_file( -5.6159e-07, 4.8103e-07]]) >>> sample_rate 8000 + + Example - Apply random speed perturbation to dataset + >>> + >>> # Load data from file, apply random speed perturbation + >>> class RandomPerturbationFile(torch.utils.data.Dataset): + ... \"\"\"Given flist, apply random speed perturbation + ... + ... Suppose all the input files are at least one second long. + ... \"\"\" + ... def __init__(self, flist: List[str], sample_rate: int): + ... super().__init__() + ... self.flist = flist + ... self.sample_rate = sample_rate + ... self.rng = None + ... + ... def __getitem__(self, index): + ... speed = self.rng.uniform(0.5, 2.0) + ... effects = [ + ... ['gain', '-n', '-10'], # apply 10 db attenuation + ... ['remix', '-'], # merge all the channels + ... ['speed', f'{speed:.5f}'], # duration is now 0.5 ~ 2.0 seconds. + ... ['rate', f'{self.sample_rate}'], + ... ['pad', '0', '1.5'], # add 1.5 seconds silence at the end + ... ['trim', '0', '2'], # get the first 2 seconds + ... ] + ... waveform, _ = torchaudio.sox_effects.apply_effects_file( + ... self.flist[index], effects) + ... return waveform + ... + ... def __len__(self): + ... return len(self.flist) + ... + >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000) + >>> loader = torch.utils.data.DataLoader(dataset, batch_size=32) + >>> for batch in loader: + >>> pass """ signal = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first) return signal.get_tensor(), signal.get_sample_rate() @@ -183,7 +264,7 @@ def apply_effects_file( def SoxEffect(): r"""Create an object for passing sox effect information between python and c++ - Note: + Warning: This function is deprecated. Please migrate to :func:`apply_effects_file` or :func:`apply_effects_tensor`. @@ -198,50 +279,56 @@ def SoxEffect(): class SoxEffectsChain(object): r"""SoX effects chain class. - Note: + Warning: This class is deprecated. Please migrate to :func:`apply_effects_file` or :func:`apply_effects_tensor`. Args: - normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31` - (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `number`, then output is divided by that - number. If `callable`, then the output is passed as a parameter to the given function, then the - output is divided by the result. (Default: ``True``) - channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``) - out_siginfo (sox_signalinfo_t, optional): a sox_signalinfo_t type, which could be helpful if the - audio type cannot be automatically determined. (Default: ``None``) - out_encinfo (sox_encodinginfo_t, optional): a sox_encodinginfo_t type, which could be set if the - audio type cannot be automatically determined. (Default: ``None``) - filetype (str, optional): a filetype or extension to be set if sox cannot determine it - automatically. . (Default: ``'raw'``) + normalization (bool, number, or callable, optional): + If boolean ``True``, then output is divided by ``1 << 31`` + (assumes signed 32-bit audio), and normalizes to ``[-1, 1]``. + If ``number``, then output is divided by that number. + If ``callable``, then the output is passed as a parameter to the given function, then + the output is divided by the result. (Default: ``True``) + channels_first (bool, optional): + Set channels first or length first in result. (Default: ``True``) + out_siginfo (sox_signalinfo_t, optional): + a sox_signalinfo_t type, which could be helpful if the audio type cannot be + automatically determined. (Default: ``None``) + out_encinfo (sox_encodinginfo_t, optional): + a sox_encodinginfo_t type, which could be set if the audio type cannot be + automatically determined. (Default: ``None``) + filetype (str, optional): + a filetype or extension to be set if sox cannot determine it automatically. + (Default: ``'raw'``) Returns: - Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number + Tuple[Tensor, int]: + An output Tensor of size ``[C x L]`` or ``[L x C]`` where L is the number of audio frames and C is the number of channels. An integer which is the sample rate of the audio (as listed in the metadata of the file) Example >>> class MyDataset(Dataset): - >>> def __init__(self, audiodir_path): - >>> self.data = [os.path.join(audiodir_path, fn) for fn in os.listdir(audiodir_path)] - >>> self.E = torchaudio.sox_effects.SoxEffectsChain() - >>> self.E.append_effect_to_chain("rate", [16000]) # resample to 16000hz - >>> self.E.append_effect_to_chain("channels", ["1"]) # mono signal - >>> def __getitem__(self, index): - >>> fn = self.data[index] - >>> self.E.set_input_file(fn) - >>> x, sr = self.E.sox_build_flow_effects() - >>> return x, sr - >>> - >>> def __len__(self): - >>> return len(self.data) - >>> - >>> torchaudio.initialize_sox() + ... def __init__(self, audiodir_path): + ... self.data = [ + ... os.path.join(audiodir_path, fn) + ... for fn in os.listdir(audiodir_path)] + ... self.E = torchaudio.sox_effects.SoxEffectsChain() + ... self.E.append_effect_to_chain("rate", [16000]) # resample to 16000hz + ... self.E.append_effect_to_chain("channels", ["1"]) # mono signal + ... def __getitem__(self, index): + ... fn = self.data[index] + ... self.E.set_input_file(fn) + ... x, sr = self.E.sox_build_flow_effects() + ... return x, sr + ... + ... def __len__(self): + ... return len(self.data) + ... >>> ds = MyDataset(path_to_audio_files) >>> for sig, sr in ds: - >>> [do something here] - >>> torchaudio.shutdown_sox() - + ... pass """ EFFECTS_UNIMPLEMENTED = {"spectrogram", "splice", "noiseprof", "fir"} @@ -298,9 +385,9 @@ def sox_build_flow_effects(self, out (Tensor, optional): Where the output will be written to. (Default: ``None``) Returns: - Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number - of audio frames and C is the number of channels. An integer which is the sample rate of the - audio (as listed in the metadata of the file) + Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where + L is the number of audio frames and C is the number of channels. + An integer which is the sample rate of the audio (as listed in the metadata of the file) """ # initialize output tensor if out is not None: diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py index f34f5c468e..72e55c94c8 100644 --- a/torchaudio/transforms.py +++ b/torchaudio/transforms.py @@ -86,20 +86,8 @@ def forward(self, waveform: Tensor) -> Tensor: class GriffinLim(torch.nn.Module): r"""Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation. - Implementation ported from `librosa`. - .. [1] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto. - "librosa: Audio and music signal analysis in python." - In Proceedings of the 14th python in science conference, pp. 18-25. 2015. - - .. [2] Perraudin, N., Balazs, P., & Søndergaard, P. L. - "A fast Griffin-Lim algorithm," - IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (pp. 1-4), - Oct. 2013. - - .. [3] D. W. Griffin and J. S. Lim, - "Signal estimation from modified short-time Fourier transform," - IEEE Trans. ASSP, vol.32, no.2, pp.236–243, Apr. 1984. + Implementation ported from ``librosa`` [1]_, [2]_, [3]_. Args: n_fft (int, optional): Size of FFT, creates ``n_fft // 2 + 1`` bins. (Default: ``400``) @@ -117,6 +105,24 @@ class GriffinLim(torch.nn.Module): Values near 1 can lead to faster convergence, but above 1 may not converge. (Default: ``0.99``) length (int, optional): Array length of the expected output. (Default: ``None``) rand_init (bool, optional): Initializes phase randomly if True and to zero otherwise. (Default: ``True``) + + References: + .. [1] + | McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, + and Oriol Nieto. + | "librosa: Audio and music signal analysis in python." + | In Proceedings of the 14th python in science conference, pp. 18-25. 2015. + + .. [2] + | Perraudin, N., Balazs, P., & Søndergaard, P. L. + | "A fast Griffin-Lim algorithm," + | IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (pp. 1-4), + | Oct. 2013. + + .. [3] + | D. W. Griffin and J. S. Lim, + | "Signal estimation from modified short-time Fourier transform," + | IEEE Trans. ASSP, vol.32, no.2, pp.236–243, Apr. 1984. """ __constants__ = ['n_fft', 'n_iter', 'win_length', 'hop_length', 'power', 'normalized', 'length', 'momentum', 'rand_init'] @@ -153,8 +159,9 @@ def __init__(self, def forward(self, specgram: Tensor) -> Tensor: r""" Args: - specgram (Tensor): A magnitude-only STFT spectrogram of dimension (..., freq, frames) - where freq is ``n_fft // 2 + 1``. + specgram (Tensor): + A magnitude-only STFT spectrogram of dimension (..., freq, frames) + where freq is ``n_fft // 2 + 1``. Returns: Tensor: waveform of (..., time), where time equals the ``length`` parameter if given. diff --git a/torchaudio/utils/sox_utils.py b/torchaudio/utils/sox_utils.py index d9901472ec..326de42a05 100644 --- a/torchaudio/utils/sox_utils.py +++ b/torchaudio/utils/sox_utils.py @@ -55,8 +55,8 @@ def set_use_threads(use_threads: bool): """Set multithread option for sox effect chain Args: - use_threads (bool): When True, enables libsox's parallel effects channels processing. - To use mutlithread, the underlying libsox has to be compiled with OpenMP support. + use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing. + To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support. See Also: http://sox.sourceforge.net/sox.html @@ -69,7 +69,7 @@ def list_effects() -> Dict[str, str]: """List the available sox effect names Returns: - Dict[str, str]: Mapping from "effect name" to "usage" + Dict[str, str]: Mapping from ``effect name`` to ``usage`` """ return dict(torch.ops.torchaudio.sox_utils_list_effects()) From 41ff487894f7b7667907828a4db0b68cc8972b92 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Thu, 16 Jul 2020 17:09:34 +0000 Subject: [PATCH 2/4] Fix typo and tweak backend.common --- docs/source/backend.rst | 2 +- torchaudio/backend/common.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/source/backend.rst b/docs/source/backend.rst index ffbdd63852..5166f2c648 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -3,7 +3,7 @@ torchaudio.backend ================== -:mod:`torchaudio.backend` module provides implemenrations for audio file I/O, using different backend libraries +:mod:`torchaudio.backend` module provides implementations for audio file I/O, using different backend libraries. To switch backend, use :py:func:`torchaudio.set_audio_backend`. To check the current backend use :py:func:`torchaudio.get_audio_backend`. .. warning:: diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 47249e739e..bc61df4746 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -4,8 +4,7 @@ class SignalInfo: """Data class returned ``info`` functions. - Used by :py:func:`torchaudio.backend.sox_backend.info` and - :py:func:`torchaudio.backend.soundfile_backend.info` + Used by :ref:`sox backend` and :ref:`soundfile backend` See https://fossies.org/dox/sox-14.4.2/structsox__signalinfo__t.html @@ -29,8 +28,7 @@ def __init__(self, class EncodingInfo: """Data class returned ``info`` functions. - Used by :py:func:`torchaudio.backend.sox_backend.info` and - :py:func:`torchaudio.backend.soundfile_backend.info` + Used by :ref:`sox backend` and :ref:`soundfile backend` See https://fossies.org/dox/sox-14.4.2/structsox__encodinginfo__t.html From a020c0c86cb39b94fae6f51cb1868cbc714f8fd8 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Thu, 16 Jul 2020 17:11:10 +0000 Subject: [PATCH 3/4] Fix typo --- docs/source/backend.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/backend.rst b/docs/source/backend.rst index 5166f2c648..5f3a648c95 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -7,7 +7,7 @@ torchaudio.backend To switch backend, use :py:func:`torchaudio.set_audio_backend`. To check the current backend use :py:func:`torchaudio.get_audio_backend`. .. warning:: - Although ``sox`` backend is default for backward compatibility reason, it has a number of issues, therefore it is highly recommended to use ``sox_io`` backend instead. Note, however, that due to the interface refinement, functions defined in ``sox`` backend and those defined in ``sox_io`` backend do not have the signatures. + Although ``sox`` backend is default for backward compatibility reason, it has a number of issues, therefore it is highly recommended to use ``sox_io`` backend instead. Note, however, that due to the interface refinement, functions defined in ``sox`` backend and those defined in ``sox_io`` backend do not have the same signatures. .. note:: Instead of calling functions in :mod:`torchaudio.backend` directly, please use ``torchaudio.info``, ``torhcaudio.load``, ``torchaudio.load_wav`` and ``torchaudio.save`` with proper backend set with :func:`torchaudio.get_audio_backend`. From 6c6512a5ec7d988dc7a245d912d6a3bf39782ec4 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Mon, 20 Jul 2020 15:02:02 -0700 Subject: [PATCH 4/4] Address reviews --- docs/source/backend.rst | 2 +- torchaudio/sox_effects/sox_effects.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/backend.rst b/docs/source/backend.rst index 5f3a648c95..6b0e074d5c 100644 --- a/docs/source/backend.rst +++ b/docs/source/backend.rst @@ -20,7 +20,7 @@ There are currently three implementations available. ``sox`` backend is the original backend which is built on ``libsox``. This module is currently default but is known to have number of issues, such as wrong handling of WAV files other than 16-bit signed integer. Users are encouraged to use ``sox_io`` backend. This backend requires C++ extension module and is not available on Windows system. -``sox_io`` backend is the new backend which is built on ``libsox`` and bound to Python with ``Torchscript``. This module is thoroughly tested and addresses all the known issues ``sox`` backend has. Function calls to this backend can be Torchscriptable. This backend requires C++ extension module and is not available on Windows system. +``sox_io`` backend is the new backend which is built on ``libsox`` and bound to Python with ``Torchscript``. This module addresses all the known issues ``sox`` backend has. Function calls to this backend can be Torchscriptable. This backend requires C++ extension module and is not available on Windows system. ``soundfile`` backend is built on ``PySoundFile``. You need to install ``PySoundFile`` separately. diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py index 92157d0569..9262786541 100644 --- a/torchaudio/sox_effects/sox_effects.py +++ b/torchaudio/sox_effects/sox_effects.py @@ -142,16 +142,16 @@ def apply_effects_tensor( ... ["lowpass", "-1", "300"], # apply single-pole lowpass filter ... ["rate", "8000"], # change sample rate to 8000 ... ] - >>> trans = SoxEffectTensorTransform(effects, input_sample_rate) + >>> transform = SoxEffectTensorTransform(effects, input_sample_rate) >>> >>> # Dump it to file and load >>> path = 'sox_effect.zip' >>> torch.jit.script(trans).save(path) - >>> trans = torch.jit.load(path) + >>> transform = torch.jit.load(path) >>> >>>> # Run transform >>> waveform, input_sample_rate = torchaudio.load("input.wav") - >>> waveform, sample_rate = trans(waveform, input_sample_rate) + >>> waveform, sample_rate = transform(waveform, input_sample_rate) >>> assert sample_rate == 8000 """ in_signal = torch.classes.torchaudio.TensorSignal(tensor, sample_rate, channels_first)