From 5742fa40ade9a7e29e0fac9f5512ff9fed168711 Mon Sep 17 00:00:00 2001 From: Tim Loderhose Date: Sun, 13 Sep 2020 16:00:27 +0200 Subject: [PATCH 1/4] Add pathlib.Path support to sox_io backend --- torchaudio/backend/sox_io_backend.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index 31e69c443e..d9d39a4fcd 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -1,4 +1,5 @@ -from typing import Tuple, Optional +from typing import Tuple, Optional, Union +from pathlib import Path import torch from torchaudio._internal import ( @@ -20,22 +21,24 @@ def __init__(self, sample_rate: int, num_frames: int, num_channels: int): @_mod_utils.requires_module('torchaudio._torchaudio') -def info(filepath: str) -> AudioMetaData: +def info(filepath: Union[str, Path]) -> AudioMetaData: """Get signal information of an audio file. Args: - filepath (str): Path to audio file + filepath (str/pathlib.Path): Path to audio file Returns: AudioMetaData: meta data of the given audio. """ + # Cast to str in case type is `pathlib.Path` + filepath = str(filepath) sinfo = torch.ops.torchaudio.sox_io_get_info(filepath) return AudioMetaData(sinfo.get_sample_rate(), sinfo.get_num_frames(), sinfo.get_num_channels()) @_mod_utils.requires_module('torchaudio._torchaudio') def load( - filepath: str, + filepath: Union[str, Path], frame_offset: int = 0, num_frames: int = -1, normalize: bool = True, @@ -80,7 +83,7 @@ def load( ``[-1.0, 1.0]``. Args: - filepath (str): + filepath (str/pathlib.Path): Path to audio file frame_offset (int): Number of frames to skip before start reading data. @@ -105,6 +108,8 @@ def load( integer type, else ``float32`` type. If ``channels_first=True``, it has ``[channel, time]`` else ``[time, channel]``. """ + # Cast to str in case type is `pathlib.Path` + filepath = str(filepath) signal = torch.ops.torchaudio.sox_io_load_audio_file( filepath, frame_offset, num_frames, normalize, channels_first) return signal.get_tensor(), signal.get_sample_rate() @@ -112,7 +117,7 @@ def load( @_mod_utils.requires_module('torchaudio._torchaudio') def save( - filepath: str, + filepath: Union[str, Path], src: torch.Tensor, sample_rate: int, channels_first: bool = True, @@ -140,7 +145,7 @@ def save( and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc. Args: - filepath (str): Path to save file. + filepath (str/pathlib.Path): Path to save file. tensor (torch.Tensor): Audio data to save. must be 2D tensor. sample_rate (int): sampling rate channels_first (bool): @@ -158,6 +163,8 @@ def save( See the detail at http://sox.sourceforge.net/soxformat.html. """ + # Cast to str in case type is `pathlib.Path` + filepath = str(filepath) if compression is None: ext = str(filepath).split('.')[-1].lower() if ext in ['wav', 'sph']: @@ -176,7 +183,7 @@ def save( @_mod_utils.requires_module('torchaudio._torchaudio') def load_wav( - filepath: str, + filepath: Union[str, Path], frame_offset: int = 0, num_frames: int = -1, channels_first: bool = True, From 93bb3da8915c86406fcac219a829edee959a3c69 Mon Sep 17 00:00:00 2001 From: Tim Loderhose Date: Tue, 15 Sep 2020 21:30:51 +0200 Subject: [PATCH 2/4] Remove Union type hints, clarify docstrings --- torchaudio/backend/common.py | 12 ++++++++---- torchaudio/backend/sox_io_backend.py | 21 +++++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index bc61df4746..3f603c6426 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -60,7 +60,8 @@ def __init__(self, _LOAD_DOCSTRING = r"""Loads an audio file from disk into a tensor Args: - filepath: Path to audio file + filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated + as ``str`` for TorchScript compiler compatibility. out: An optional output tensor to use instead of creating one. (Default: ``None``) @@ -112,7 +113,8 @@ def __init__(self, shifting the input right by 16 bits. Args: - filepath: Path to audio file + filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated + as ``str`` for TorchScript compiler compatibility. Returns: (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where L is the number @@ -123,7 +125,8 @@ def __init__(self, _SAVE_DOCSTRING = r"""Saves a Tensor on file as an audio file Args: - filepath: Path to audio file + filepath: Path to audio file. This function also handles ``pathlib.Path`` objects, but is + annotated as ``str`` for TorchScript compiler compatibility. src: An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate: An integer which is the sample rate of the @@ -137,7 +140,8 @@ def __init__(self, _INFO_DOCSTRING = r"""Gets metadata from an audio file without loading the signal. Args: - filepath: Path to audio file + filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated + as ``str`` for TorchScript compiler compatibility. Returns: (sox_signalinfo_t, sox_encodinginfo_t): A si (sox_signalinfo_t) signal diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index d9d39a4fcd..f8ab75db20 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -21,11 +21,13 @@ def __init__(self, sample_rate: int, num_frames: int, num_channels: int): @_mod_utils.requires_module('torchaudio._torchaudio') -def info(filepath: Union[str, Path]) -> AudioMetaData: +def info(filepath: str) -> AudioMetaData: """Get signal information of an audio file. Args: - filepath (str/pathlib.Path): Path to audio file + filepath (str or pathlib.Path): + Path to audio file. This function also handles ``pathlib.Path`` objects, but is annotated as + ``str`` for TorchScript compiler compatibility. Returns: AudioMetaData: meta data of the given audio. @@ -38,7 +40,7 @@ def info(filepath: Union[str, Path]) -> AudioMetaData: @_mod_utils.requires_module('torchaudio._torchaudio') def load( - filepath: Union[str, Path], + filepath: str, frame_offset: int = 0, num_frames: int = -1, normalize: bool = True, @@ -83,8 +85,9 @@ def load( ``[-1.0, 1.0]``. Args: - filepath (str/pathlib.Path): - Path to audio file + filepath (str or pathlib.Path): + Path to audio file. This function also handles ``pathlib.Path`` objects, but is + annotated as ``str`` for TorchScript compiler compatibility. frame_offset (int): Number of frames to skip before start reading data. num_frames (int): @@ -117,7 +120,7 @@ def load( @_mod_utils.requires_module('torchaudio._torchaudio') def save( - filepath: Union[str, Path], + filepath: str, src: torch.Tensor, sample_rate: int, channels_first: bool = True, @@ -145,7 +148,9 @@ def save( and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc. Args: - filepath (str/pathlib.Path): Path to save file. + filepath (str or pathlib.Path): + Path to save file. This function also handles ``pathlib.Path`` objects, but is annotated + as ``str`` for TorchScript compiler compatibility. tensor (torch.Tensor): Audio data to save. must be 2D tensor. sample_rate (int): sampling rate channels_first (bool): @@ -183,7 +188,7 @@ def save( @_mod_utils.requires_module('torchaudio._torchaudio') def load_wav( - filepath: Union[str, Path], + filepath: str, frame_offset: int = 0, num_frames: int = -1, channels_first: bool = True, From 5a029a6594ec79aabf32997a01ae44795ed1d5f3 Mon Sep 17 00:00:00 2001 From: Tim Loderhose Date: Tue, 15 Sep 2020 21:59:08 +0200 Subject: [PATCH 3/4] Revert change to common doc and unnecessary import --- torchaudio/backend/common.py | 3 +-- torchaudio/backend/sox_io_backend.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index 3f603c6426..fb7ef9a214 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -125,8 +125,7 @@ def __init__(self, _SAVE_DOCSTRING = r"""Saves a Tensor on file as an audio file Args: - filepath: Path to audio file. This function also handles ``pathlib.Path`` objects, but is - annotated as ``str`` for TorchScript compiler compatibility. + filepath: Path to audio file. src: An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate: An integer which is the sample rate of the diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py index f8ab75db20..ef5632ea57 100644 --- a/torchaudio/backend/sox_io_backend.py +++ b/torchaudio/backend/sox_io_backend.py @@ -1,5 +1,4 @@ -from typing import Tuple, Optional, Union -from pathlib import Path +from typing import Tuple, Optional import torch from torchaudio._internal import ( From 0226fc058cea8c95cf5f45efc2c3ade77134860d Mon Sep 17 00:00:00 2001 From: Tim Loderhose Date: Wed, 16 Sep 2020 21:27:10 +0200 Subject: [PATCH 4/4] Revert change to common documentation --- torchaudio/backend/common.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py index fb7ef9a214..bc61df4746 100644 --- a/torchaudio/backend/common.py +++ b/torchaudio/backend/common.py @@ -60,8 +60,7 @@ def __init__(self, _LOAD_DOCSTRING = r"""Loads an audio file from disk into a tensor Args: - filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated - as ``str`` for TorchScript compiler compatibility. + filepath: Path to audio file out: An optional output tensor to use instead of creating one. (Default: ``None``) @@ -113,8 +112,7 @@ def __init__(self, shifting the input right by 16 bits. Args: - filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated - as ``str`` for TorchScript compiler compatibility. + filepath: Path to audio file Returns: (Tensor, int): An output tensor of size `[C x L]` or `[L x C]` where L is the number @@ -125,7 +123,7 @@ def __init__(self, _SAVE_DOCSTRING = r"""Saves a Tensor on file as an audio file Args: - filepath: Path to audio file. + filepath: Path to audio file src: An input 2D tensor of shape `[C x L]` or `[L x C]` where L is the number of audio frames, C is the number of channels sample_rate: An integer which is the sample rate of the @@ -139,8 +137,7 @@ def __init__(self, _INFO_DOCSTRING = r"""Gets metadata from an audio file without loading the signal. Args: - filepath: Path to audio file. This function also handles `pathlib.Path` objects, but is annotated - as ``str`` for TorchScript compiler compatibility. + filepath: Path to audio file Returns: (sox_signalinfo_t, sox_encodinginfo_t): A si (sox_signalinfo_t) signal