|
| 1 | +from typing import Optional |
| 2 | + |
| 3 | +import torch |
| 4 | +import scipy.io.wavfile |
| 5 | + |
| 6 | + |
1 | 7 | def get_test_name(func, _, params): |
2 | 8 | return f'{func.__name__}_{"_".join(str(p) for p in params.args)}' |
| 9 | + |
| 10 | + |
| 11 | +def normalize_wav(tensor: torch.Tensor) -> torch.Tensor: |
| 12 | + if tensor.dtype == torch.float32: |
| 13 | + pass |
| 14 | + elif tensor.dtype == torch.int32: |
| 15 | + tensor = tensor.to(torch.float32) |
| 16 | + tensor[tensor > 0] /= 2147483647. |
| 17 | + tensor[tensor < 0] /= 2147483648. |
| 18 | + elif tensor.dtype == torch.int16: |
| 19 | + tensor = tensor.to(torch.float32) |
| 20 | + tensor[tensor > 0] /= 32767. |
| 21 | + tensor[tensor < 0] /= 32768. |
| 22 | + elif tensor.dtype == torch.uint8: |
| 23 | + tensor = tensor.to(torch.float32) - 128 |
| 24 | + tensor[tensor > 0] /= 127. |
| 25 | + tensor[tensor < 0] /= 128. |
| 26 | + return tensor |
| 27 | + |
| 28 | + |
| 29 | +def get_wav_data( |
| 30 | + dtype: str, |
| 31 | + num_channels: int, |
| 32 | + *, |
| 33 | + num_frames: Optional[int] = None, |
| 34 | + normalize: bool = True, |
| 35 | + channels_first: bool = True, |
| 36 | +): |
| 37 | + """Generate linear signal of the given dtype and num_channels |
| 38 | +
|
| 39 | + Data range is |
| 40 | + [-1.0, 1.0] for float32, |
| 41 | + [-2147483648, 2147483647] for int32 |
| 42 | + [-32768, 32767] for int16 |
| 43 | + [0, 255] for uint8 |
| 44 | +
|
| 45 | + num_frames allow to change the linear interpolation parameter. |
| 46 | + Default values are 256 for uint8, else 1 << 16. |
| 47 | + 1 << 16 as default is so that int16 value range is completely covered. |
| 48 | + """ |
| 49 | + dtype_ = getattr(torch, dtype) |
| 50 | + |
| 51 | + if num_frames is None: |
| 52 | + if dtype == 'uint8': |
| 53 | + num_frames = 256 |
| 54 | + else: |
| 55 | + num_frames = 1 << 16 |
| 56 | + |
| 57 | + if dtype == 'uint8': |
| 58 | + base = torch.linspace(0, 255, num_frames, dtype=dtype_) |
| 59 | + if dtype == 'float32': |
| 60 | + base = torch.linspace(-1., 1., num_frames, dtype=dtype_) |
| 61 | + if dtype == 'int32': |
| 62 | + base = torch.linspace(-2147483648, 2147483647, num_frames, dtype=dtype_) |
| 63 | + if dtype == 'int16': |
| 64 | + base = torch.linspace(-32768, 32767, num_frames, dtype=dtype_) |
| 65 | + data = base.repeat([num_channels, 1]) |
| 66 | + if not channels_first: |
| 67 | + data = data.transpose(1, 0) |
| 68 | + if normalize: |
| 69 | + data = normalize_wav(data) |
| 70 | + return data |
| 71 | + |
| 72 | + |
| 73 | +def load_wav(path: str, normalize=True, channels_first=True) -> torch.Tensor: |
| 74 | + """Load wav file without torchaudio""" |
| 75 | + sample_rate, data = scipy.io.wavfile.read(path) |
| 76 | + data = torch.from_numpy(data.copy()) |
| 77 | + if data.ndim == 1: |
| 78 | + data = data.unsqueeze(1) |
| 79 | + if normalize: |
| 80 | + data = normalize_wav(data) |
| 81 | + if channels_first: |
| 82 | + data = data.transpose(1, 0) |
| 83 | + return data, sample_rate |
| 84 | + |
| 85 | + |
| 86 | +def save_wav(path, data, sample_rate, channels_first=True): |
| 87 | + """Save wav file without torchaudio""" |
| 88 | + if channels_first: |
| 89 | + data = data.transpose(1, 0) |
| 90 | + scipy.io.wavfile.write(path, sample_rate, data.numpy()) |
0 commit comments