diff --git a/.jenkins/build.sh b/.jenkins/build.sh index d9a5b4b0d50..e398c5ab866 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -26,7 +26,7 @@ pip install -r $DIR/../requirements.txt # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link # pip uninstall -y torch torchvision torchaudio torchtext -# pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext +# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext # For Tensorboard. Until 1.14 moves to the release channel. pip install tb-nightly diff --git a/_static/img/perf_viz.png b/_static/img/perf_viz.png new file mode 100644 index 00000000000..85608557bcb Binary files /dev/null and b/_static/img/perf_viz.png differ diff --git a/_static/img/trace_img.png b/_static/img/trace_img.png index 172aeb1bef0..8c540ceb519 100644 Binary files a/_static/img/trace_img.png and b/_static/img/trace_img.png differ diff --git a/beginner_source/audio_preprocessing_tutorial.py b/beginner_source/audio_preprocessing_tutorial.py index 8b29b07bb42..d094ddf48af 100644 --- a/beginner_source/audio_preprocessing_tutorial.py +++ b/beginner_source/audio_preprocessing_tutorial.py @@ -26,7 +26,7 @@ ###################################################################### # Preparing data and utility functions (skip this section) # -------------------------------------------------------- -# +# #@title Prepare data and utility functions. {display-mode: "form"} #@markdown @@ -52,6 +52,8 @@ import requests import matplotlib import matplotlib.pyplot as plt +import pandas as pd +import time from IPython.display import Audio, display [width, height] = matplotlib.rcParams['figure.figsize'] @@ -117,7 +119,10 @@ def _get_sample(path, resample=None): ["remix", "1"] ] if resample: - effects.append(["rate", f'{resample}']) + effects.extend([ + ["lowpass", f"{resample // 2}"], + ["rate", f'{resample}'], + ]) return torchaudio.sox_effects.apply_effects_file(path, effects=effects) def get_speech_sample(*, resample=None): @@ -138,18 +143,6 @@ def get_rir_sample(*, resample=None, processed=False): def get_noise_sample(*, resample=None): return _get_sample(SAMPLE_NOISE_PATH, resample=resample) -def print_metadata(metadata, src=None): - if src: - print("-" * 10) - print("Source:", src) - print("-" * 10) - print(" - sample_rate:", metadata.sample_rate) - print(" - num_channels:", metadata.num_channels) - print(" - num_frames:", metadata.num_frames) - print(" - bits_per_sample:", metadata.bits_per_sample) - print(" - encoding:", metadata.encoding) - print() - def print_stats(waveform, sample_rate=None, src=None): if src: print("-" * 10) @@ -222,7 +215,7 @@ def inspect_file(path): print("Source:", path) print("-" * 10) print(f" - File size: {os.path.getsize(path)} bytes") - print_metadata(torchaudio.info(path)) + print(f" - {torchaudio.info(path)}") def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None): fig, axs = plt.subplots(1, 1) @@ -300,38 +293,137 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): axis.legend(lns, labels, loc=0) plt.show(block=False) +DEFAULT_OFFSET = 201 +SWEEP_MAX_SAMPLE_RATE = 48000 +DEFAULT_LOWPASS_FILTER_WIDTH = 6 +DEFAULT_ROLLOFF = 0.99 +DEFAULT_RESAMPLING_METHOD = 'sinc_interpolation' + +def _get_log_freq(sample_rate, max_sweep_rate, offset): + """Get freqs evenly spaced out in log-scale, between [0, max_sweep_rate // 2] + + offset is used to avoid negative infinity `log(offset + x)`. + + """ + half = sample_rate // 2 + start, stop = math.log(offset), math.log(offset + max_sweep_rate // 2) + return torch.exp(torch.linspace(start, stop, sample_rate, dtype=torch.double)) - offset + +def _get_inverse_log_freq(freq, sample_rate, offset): + """Find the time where the given frequency is given by _get_log_freq""" + half = sample_rate // 2 + return sample_rate * (math.log(1 + freq / offset) / math.log(1 + half / offset)) + +def _get_freq_ticks(sample_rate, offset, f_max): + # Given the original sample rate used for generating the sweep, + # find the x-axis value where the log-scale major frequency values fall in + time, freq = [], [] + for exp in range(2, 5): + for v in range(1, 10): + f = v * 10 ** exp + if f < sample_rate // 2: + t = _get_inverse_log_freq(f, sample_rate, offset) / sample_rate + time.append(t) + freq.append(f) + t_max = _get_inverse_log_freq(f_max, sample_rate, offset) / sample_rate + time.append(t_max) + freq.append(f_max) + return time, freq + +def plot_sweep(waveform, sample_rate, title, max_sweep_rate=SWEEP_MAX_SAMPLE_RATE, offset=DEFAULT_OFFSET): + x_ticks = [100, 500, 1000, 5000, 10000, 20000, max_sweep_rate // 2] + y_ticks = [1000, 5000, 10000, 20000, sample_rate//2] + + time, freq = _get_freq_ticks(max_sweep_rate, offset, sample_rate // 2) + freq_x = [f if f in x_ticks and f <= max_sweep_rate // 2 else None for f in freq] + freq_y = [f for f in freq if f >= 1000 and f in y_ticks and f <= sample_rate // 2] + + figure, axis = plt.subplots(1, 1) + axis.specgram(waveform[0].numpy(), Fs=sample_rate) + plt.xticks(time, freq_x) + plt.yticks(freq_y, freq_y) + axis.set_xlabel('Original Signal Frequency (Hz, log scale)') + axis.set_ylabel('Waveform Frequency (Hz)') + axis.xaxis.grid(True, alpha=0.67) + axis.yaxis.grid(True, alpha=0.67) + figure.suptitle(f'{title} (sample rate: {sample_rate} Hz)') + plt.show(block=True) + +def get_sine_sweep(sample_rate, offset=DEFAULT_OFFSET): + max_sweep_rate = sample_rate + freq = _get_log_freq(sample_rate, max_sweep_rate, offset) + delta = 2 * math.pi * freq / sample_rate + cummulative = torch.cumsum(delta, dim=0) + signal = torch.sin(cummulative).unsqueeze(dim=0) + return signal + +def benchmark_resample( + method, + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=DEFAULT_LOWPASS_FILTER_WIDTH, + rolloff=DEFAULT_ROLLOFF, + resampling_method=DEFAULT_RESAMPLING_METHOD, + beta=None, + librosa_type=None, + iters=5 +): + if method == "functional": + begin = time.time() + for _ in range(iters): + F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=lowpass_filter_width, + rolloff=rolloff, resampling_method=resampling_method) + elapsed = time.time() - begin + return elapsed / iters + elif method == "transforms": + resampler = T.Resample(sample_rate, resample_rate, lowpass_filter_width=lowpass_filter_width, + rolloff=rolloff, resampling_method=resampling_method, dtype=waveform.dtype) + begin = time.time() + for _ in range(iters): + resampler(waveform) + elapsed = time.time() - begin + return elapsed / iters + elif method == "librosa": + waveform_np = waveform.squeeze().numpy() + begin = time.time() + for _ in range(iters): + librosa.resample(waveform_np, sample_rate, resample_rate, res_type=librosa_type) + elapsed = time.time() - begin + return elapsed / iters + ###################################################################### # Audio I/O # ========= -# +# # torchaudio integrates ``libsox`` and provides a rich set of audio I/O. -# +# ###################################################################### # Quering audio metadata # ---------------------- -# +# # ``torchaudio.info`` function fetches metadata of audio. You can provide # a path-like object or file-like object. -# +# metadata = torchaudio.info(SAMPLE_WAV_PATH) -print_metadata(metadata, src=SAMPLE_WAV_PATH) +print(metadata) ###################################################################### # Where -# +# # - ``sample_rate`` is the sampling rate of the audio # - ``num_channels`` is the number of channels # - ``num_frames`` is the number of frames per channel # - ``bits_per_sample`` is bit depth # - ``encoding`` is the sample coding format -# +# # The values ``encoding`` can take are one of the following -# +# # - ``"PCM_S"``: Signed integer linear PCM # - ``"PCM_U"``: Unsigned integer linear PCM # - ``"PCM_F"``: Floating point linear PCM @@ -351,34 +443,35 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): # - ``"GSM"``: GSM-FR # [`wikipedia `__] # - ``"UNKNOWN"`` None of avobe -# +# ###################################################################### # **Note** -# +# # - ``bits_per_sample`` can be ``0`` for formats with compression and/or # variable bit rate. (such as mp3) # - ``num_frames`` can be ``0`` for GSM-FR format. -# +# metadata = torchaudio.info(SAMPLE_MP3_PATH) -print_metadata(metadata, src=SAMPLE_MP3_PATH) +print(metadata) metadata = torchaudio.info(SAMPLE_GSM_PATH) -print_metadata(metadata, src=SAMPLE_GSM_PATH) +print(metadata) ###################################################################### # Querying file-like object # ~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # ``info`` function works on file-like object as well. -# +# +print("Source:", SAMPLE_WAV_URL) with requests.get(SAMPLE_WAV_URL, stream=True) as response: metadata = torchaudio.info(response.raw) -print_metadata(metadata, src=SAMPLE_WAV_URL) +print(metadata) ###################################################################### @@ -387,35 +480,36 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): # Therefore, depending on the audio format, it cannot get the correct # metadata, including the format itself. The following example illustrates # this. -# +# # - Use ``format`` argument to tell what audio format it is. # - The returned metadata has ``num_frames = 0`` -# +# +print("Source:", SAMPLE_MP3_URL) with requests.get(SAMPLE_MP3_URL, stream=True) as response: metadata = torchaudio.info(response.raw, format="mp3") print(f"Fetched {response.raw.tell()} bytes.") -print_metadata(metadata, src=SAMPLE_MP3_URL) +print(metadata) ###################################################################### # Loading audio data into Tensor # ------------------------------ -# +# # To load audio data, you can use ``torchaudio.load``. -# +# # This function accepts path-like object and file-like object. -# +# # The returned value is a tuple of waveform (``Tensor``) and sample rate # (``int``). -# +# # By default, the resulting tensor object has ``dtype=torch.float32`` and # its value range is normalized within ``[-1.0, 1.0]``. -# +# # For the list of supported format, please refer to `the torchaudio # documentation `__. -# +# waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH) @@ -429,11 +523,11 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Loading from file-like object # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # ``torchaudio``\ ’s I/O functions now support file-like object. This # allows to fetch audio data and decode at the same time from the location # other than local file system. The following examples illustrates this. -# +# # Load audio data as HTTP request with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response: @@ -457,22 +551,22 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Tips on slicing # ~~~~~~~~~~~~~~~ -# +# # Providing ``num_frames`` and ``frame_offset`` arguments will slice the # resulting Tensor object while decoding. -# +# # The same result can be achieved using the regular Tensor slicing, # (i.e. ``waveform[:, frame_offset:frame_offset+num_frames]``) however, # providing ``num_frames`` and ``frame_offset`` arguments is more # efficient. -# +# # This is because the function will stop data acquisition and decoding # once it finishes decoding the requested frames. This is advantageous # when the audio data are transfered via network as the data transfer will # stop as soon as the necessary amount of data is fetched. -# +# # The following example illustrates this; -# +# # Illustration of two different decoding methods. # The first one will fetch all the data and decode them, while @@ -502,26 +596,26 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Saving audio to file # -------------------- -# +# # To save audio data in the formats intepretable by common applications, # you can use ``torchaudio.save``. -# +# # This function accepts path-like object and file-like object. -# +# # When passing file-like object, you also need to provide ``format`` # argument so that the function knows which format it should be using. In # case of path-like object, the function will detemine the format based on # the extension. If you are saving to a file without extension, you need # to provide ``format`` argument. -# +# # When saving as WAV format, the default encoding for ``float32`` Tensor # is 32-bit floating-point PCM. You can provide ``encoding`` and # ``bits_per_sample`` argument to change this. For example, to save data # in 16 bit signed integer PCM, you can do the following. -# +# # **Note** Saving data in encodings with lower bit depth reduces the # resulting file size but loses precision. -# +# waveform, sample_rate = get_sample() print_stats(waveform, sample_rate=sample_rate) @@ -545,9 +639,9 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # ``torchaudio.save`` can also handle other formats. To name a few; -# +# -waveform, sample_rate = get_sample() +waveform, sample_rate = get_sample(resample=8000) formats = [ "mp3", @@ -569,11 +663,11 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Saving to file-like object # ~~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # Similar to the other I/O functions, you can save audio into file-like # object. When saving to file-like object, ``format`` argument is # required. -# +# waveform, sample_rate = get_sample() @@ -585,48 +679,315 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): print(buffer_.read(16)) +###################################################################### +# Resampling +# ========== +# +# To resample an audio waveform from one freqeuncy to another, you can use +# ``transforms.Resample`` or ``functional.resample``. +# ``transforms.Resample`` precomputes and caches the kernel used for +# resampling, while ``functional.resample`` computes it on the fly, so +# using ``transforms.Resample`` will result in a speedup if resampling +# multiple waveforms using the same parameters (see Benchmarking section). +# +# Both resampling methods use `bandlimited sinc +# interpolation `__ to compute +# signal values at arbitrary time steps. The implementation involves +# convolution, so we can take advantage of GPU / multithreading for +# performance improvements. When using resampling in multiple +# subprocesses, such as data loading with multiple worker processes, your +# application might create more threads than your system can handle +# efficiently. Setting ``torch.set_num_threads(1)`` might help in this +# case. +# +# Because a finite number of samples can only represent a finite number of +# frequencies, resampling does not produce perfect results, and a variety +# of parameters can be used to control for its quality and computational +# speed. We demonstrate these properties through resampling a logarithmic +# sine sweep, which is a sine wave that increases exponentially in +# frequency over time. +# +# The spectrograms below show the frequency representation of the signal, +# where the x-axis labels correspond to the frequency of the original +# waveform (in log scale), the y-axis corresponds to the frequency of the +# plotted waveform, and the color intensity refers to amplitude. +# + +sample_rate = 48000 +resample_rate = 32000 + +waveform = get_sine_sweep(sample_rate) +plot_sweep(waveform, sample_rate, title="Original Waveform") +play_audio(waveform, sample_rate) + +resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype) +resampled_waveform = resampler(waveform) +plot_sweep(resampled_waveform, resample_rate, title="Resampled Waveform") +play_audio(waveform, sample_rate) + + +###################################################################### +# Controling resampling quality with parameters +# --------------------------------------------- +# +# Lowpass filter width +# ~~~~~~~~~~~~~~~~~~~~ +# +# Because the filter used for interpolation extends infinitely, the +# ``lowpass_filter_width`` parameter is used to control for the width of +# the filter to use to window the interpolation. It is also referred to as +# the number of zero crossings, since the interpolation passes through +# zero at every time unit. Using a larger ``lowpass_filter_width`` +# provides a sharper, more precise filter, but is more computationally +# expensive. +# + +sample_rate = 48000 +resample_rate = 32000 + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=6) +plot_sweep(resampled_waveform, resample_rate, title="lowpass_filter_width=6") + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=128) +plot_sweep(resampled_waveform, resample_rate, title="lowpass_filter_width=128") + + +###################################################################### +# Rolloff +# ~~~~~~~ +# +# The ``rolloff`` parameter is represented as a fraction of the Nyquist +# frequency, which is the maximal frequency representable by a given +# finite sample rate. ``rolloff`` determines the lowpass filter cutoff and +# controls the degree of aliasing, which takes place when frequencies +# higher than the Nyquist are mapped to lower frequencies. A lower rolloff +# will therefore reduce the amount of aliasing, but it will also reduce +# some of the higher frequencies. +# + +sample_rate = 48000 +resample_rate = 32000 + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, rolloff=0.99) +plot_sweep(resampled_waveform, resample_rate, title="rolloff=0.99") + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, rolloff=0.8) +plot_sweep(resampled_waveform, resample_rate, title="rolloff=0.8") + + +###################################################################### +# Window function +# ~~~~~~~~~~~~~~~ +# +# By default, torchaudio’s resample uses the Hann window filter, which is +# a weighted cosine function. It additionally supports the Kaiser window, +# which is a near optimal window function that contains an additional +# ``beta`` parameter that allows for the design of the smoothness of the +# filter and width of impulse. This can be controlled using the +# ``resampling_method`` parameter. +# + +sample_rate = 48000 +resample_rate = 32000 + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, resampling_method="sinc_interpolation") +plot_sweep(resampled_waveform, resample_rate, title="Hann Window Default") + +resampled_waveform = F.resample(waveform, sample_rate, resample_rate, resampling_method="kaiser_window") +plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Default") + + +###################################################################### +# Comparison against librosa +# -------------------------- +# +# torchaudio’s resample function can be used to produce results similar to +# that of librosa (resampy)’s kaiser window resampling, with some noise +# + +sample_rate = 48000 +resample_rate = 32000 + +### kaiser_best +resampled_waveform = F.resample( + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=64, + rolloff=0.9475937167399596, + resampling_method="kaiser_window", + beta=14.769656459379492 +) +plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)") + +librosa_resampled_waveform = torch.from_numpy( + librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type='kaiser_best')).unsqueeze(0) +plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)") + +mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item() +print("torchaudio and librosa kaiser best MSE:", mse) + +### kaiser_fast +resampled_waveform = F.resample( + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=16, + rolloff=0.85, + resampling_method="kaiser_window", + beta=8.555504641634386 +) +plot_specgram(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)") + +librosa_resampled_waveform = torch.from_numpy( + librosa.resample(waveform.squeeze().numpy(), sample_rate, resample_rate, res_type='kaiser_fast')).unsqueeze(0) +plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)") + +mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item() +print("torchaudio and librosa kaiser fast MSE:", mse) + + +###################################################################### +# Performance Benchmarking +# ------------------------ +# +# Below are benchmarks for downsampling and upsampling waveforms between +# two pairs of sampling rates. We demonstrate the performance implications +# that the ``lowpass_filter_wdith``, window type, and sample rates can +# have. Additionally, we provide a comparison against ``librosa``\ ’s +# ``kaiser_best`` and ``kaiser_fast`` using their corresponding parameters +# in ``torchaudio``. +# +# To elaborate on the results: +# - a larger ``lowpass_filter_width`` results in a larger resampling kernel, +# and therefore increases computation time for both the kernel computation +# and convolution +# - using ``kaiser_window`` results in longer computation times than the default +# ``sinc_interpolation`` because it is more complex to compute the intermediate +# window values - a large GCD between the sample and resample rate will result +# in a simplification that allows for a smaller kernel and faster kernel computation. +# + +configs = { + "downsample (48 -> 44.1 kHz)": [48000, 44100], + "downsample (16 -> 8 kHz)": [16000, 8000], + "upsample (44.1 -> 48 kHz)": [44100, 48000], + "upsample (8 -> 16 kHz)": [8000, 1600], +} + +for label in configs: + times, rows = [], [] + sample_rate = configs[label][0] + resample_rate = configs[label][1] + waveform = get_sine_sweep(sample_rate) + + # sinc 64 zero-crossings + f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=64) + t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=64) + times.append([None, 1000 * f_time, 1000 * t_time]) + rows.append(f"sinc (width 64)") + + # sinc 6 zero-crossings + f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=16) + t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=16) + times.append([None, 1000 * f_time, 1000 * t_time]) + rows.append(f"sinc (width 16)") + + # kaiser best + lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_best") + f_time = benchmark_resample( + "functional", + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=64, + rolloff=0.9475937167399596, + resampling_method="kaiser_window", + beta=14.769656459379492) + t_time = benchmark_resample( + "transforms", + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=64, + rolloff=0.9475937167399596, + resampling_method="kaiser_window", + beta=14.769656459379492) + times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time]) + rows.append(f"kaiser_best") + + # kaiser fast + lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_fast") + f_time = benchmark_resample( + "functional", + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=16, + rolloff=0.85, + resampling_method="kaiser_window", + beta=8.555504641634386) + t_time = benchmark_resample( + "transforms", + waveform, + sample_rate, + resample_rate, + lowpass_filter_width=16, + rolloff=0.85, + resampling_method="kaiser_window", + beta=8.555504641634386) + times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time]) + rows.append(f"kaiser_fast") + + df = pd.DataFrame(times, + columns=["librosa", "functional", "transforms"], + index=rows) + df.columns = pd.MultiIndex.from_product([[f"{label} time (ms)"],df.columns]) + display(df.round(2)) + + ###################################################################### # Data Augmentation # ================= -# +# # ``torchaudio`` provides a variety of ways to augment audio data. -# +# ###################################################################### # Applying effects and filtering # ------------------------------ -# +# # ``torchaudio.sox_effects`` module provides ways to apply filiters like # ``sox`` command on Tensor objects and file-object audio sources # directly. -# +# # There are two functions for this; -# +# # - ``torchaudio.sox_effects.apply_effects_tensor`` for applying effects # on Tensor # - ``torchaudio.sox_effects.apply_effects_file`` for applying effects on # other audio source -# +# # Both function takes effects in the form of ``List[List[str]]``. This # mostly corresponds to how ``sox`` command works, but one caveat is that # ``sox`` command adds some effects automatically, but torchaudio’s # implementation does not do that. -# +# # For the list of available effects, please refer to `the sox # documentation `__. -# +# # **Tip** If you need to load and resample your audio data on-the-fly, # then you can use ``torchaudio.sox_effects.apply_effects_file`` with # ``"rate"`` effect. -# +# # **Note** ``apply_effects_file`` accepts file-like object or path-like # object. Similar to ``torchaudio.load``, when the audio format cannot be # detected from either file extension or header, you can provide # ``format`` argument to tell what format the audio source is. -# +# # **Note** This process is not differentiable. -# +# # Load the data waveform1, sample_rate1 = get_sample(resample=16000) @@ -635,7 +996,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): effects = [ ["lowpass", "-1", "300"], # apply single-pole lowpass filter ["speed", "0.8"], # reduce the speed - # This only changes sample rate, so it is necessary to + # This only changes sample rate, so it is necessary to # add `rate` effect with original sample rate after this. ["rate", f"{sample_rate1}"], ["reverb", "-w"], # Reverbration gives some dramatic feeling @@ -656,7 +1017,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): # Note that the number of frames and number of channels are different from # the original after the effects. Let’s listen to the audio. Doesn’t it # sound more dramatic? -# +# plot_specgram(waveform1, sample_rate1, title="Original", xlim=(0, 3.04)) play_audio(waveform1, sample_rate1) @@ -667,19 +1028,19 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Simulating room reverbration # ---------------------------- -# +# # `Convolution # reverb `__ is a # technique used to make a clean audio data sound like in a different # environment. -# +# # Using Room Impulse Response (RIR), we can make a clean speech sound like # uttered in a conference room. -# +# # For this process, we need RIR data. The following data are from VOiCES # dataset, but you can record one by your self. Just turn on microphone # and clap you hands. -# +# sample_rate = 8000 @@ -693,7 +1054,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # First, we need to clean up the RIR. We extract the main impulse, # normalize the signal power, then flip the time axis. -# +# rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)] rir = rir / torch.norm(rir, p=2) @@ -706,7 +1067,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Then we convolve the speech signal with the RIR filter. -# +# speech, _ = get_speech_sample(resample=sample_rate) @@ -726,22 +1087,22 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Adding background noise # ----------------------- -# +# # To add background noise to audio data, you can simply add audio Tensor # and noise Tensor. A commonly way to adjust the intensity of noise is to # change Signal-to-Noise Ratio (SNR). # [`wikipedia `__] -# +# # .. math:: -# -# +# +# # \mathrm{SNR} = \frac{P_\mathrm{signal}}{P_\mathrm{noise}} -# +# # .. math:: -# -# +# +# # {\mathrm {SNR_{{dB}}}}=10\log _{{10}}\left({\mathrm {SNR}}\right) -# +# sample_rate = 8000 speech, _ = get_speech_sample(resample=sample_rate) @@ -769,11 +1130,11 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Applying codec to Tensor object # ------------------------------- -# +# # ``torchaudio.functional.apply_codec`` can apply codecs to Tensor object. -# +# # **Note** This process is not differentiable. -# +# waveform, sample_rate = get_speech_sample(resample=8000) @@ -796,11 +1157,11 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Simulating a phone recoding # --------------------------- -# +# # Combining the previous techniques, we can simulate audio that sounds # like a person talking over a phone in a echoey room with people talking # in the background. -# +# sample_rate = 16000 speech, _ = get_speech_sample(resample=sample_rate) @@ -817,7 +1178,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): play_audio(speech, sample_rate) # Add background noise -# Because the noise is recorded in the actual environment, we consider that +# Because the noise is recorded in the actual environment, we consider that # the noise contains the acoustic feature of the environment. Therefore, we add # the noise after RIR application. noise, _ = get_noise_sample(resample=sample_rate) @@ -855,34 +1216,34 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Feature Extractions # =================== -# +# # ``torchaudio`` implements feature extractions commonly used in audio # domain. They are available in ``torchaudio.functional`` and # ``torchaudio.transforms``. -# +# # ``functional`` module implements features as a stand alone functions. # They are stateless. -# +# # ``transforms`` module implements features in object-oriented manner, # using implementations from ``functional`` and ``torch.nn.Module``. -# +# # Because all the transforms are subclass of ``torch.nn.Module``, they can # be serialized using TorchScript. -# +# # For the complete list of available features, please refer to the # documentation. In this tutorial, we will look into conversion between # time domain and frequency domain (``Spectrogram``, ``GriffinLim``, # ``MelSpectrogram``) and augmentation technique called SpecAugment. -# +# ###################################################################### # Spectrogram # ----------- -# +# # To get the frequency representation of audio signal, you can use # ``Spectrogram`` transform. -# +# waveform, sample_rate = get_speech_sample() @@ -910,9 +1271,9 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # GriffinLim # ---------- -# +# # To recover a waveform from spectrogram, you can use ``GriffinLim``. -# +# torch.random.manual_seed(0) waveform, sample_rate = get_speech_sample() @@ -944,13 +1305,13 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Mel Filter Bank # --------------- -# +# # ``torchaudio.functional.create_fb_matrix`` can generate the filter bank # to convert frequency bins to Mel-scale bins. -# +# # Since this function does not require input audio/features, there is no # equivalent transform in ``torchaudio.transforms``. -# +# n_fft = 256 n_mels = 64 @@ -971,13 +1332,10 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Comparison against librosa # ~~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # As a comparison, here is the equivalent way to get the mel filter bank # with ``librosa``. -# -# **Note** Currently, the result matches only when ``htk=True``. -# ``torchaudio`` does not support the equivalent of ``htk=False`` option. -# +# mel_filters_librosa = librosa.filters.mel( sample_rate, @@ -998,11 +1356,11 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # MelSpectrogram # -------------- -# +# # Mel-scale spectrogram is a combination of Spectrogram and mel scale # conversion. In ``torchaudio``, there is a transform ``MelSpectrogram`` # which is composed of ``Spectrogram`` and ``MelScale``. -# +# waveform, sample_rate = get_speech_sample() @@ -1022,6 +1380,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): norm='slaney', onesided=True, n_mels=n_mels, + mel_scale="htk", ) melspec = mel_spectrogram(waveform) @@ -1033,13 +1392,10 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Comparison against librosa # ~~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# # As a comparison, here is the equivalent way to get Mel-scale spectrogram # with ``librosa``. -# -# **Note** Currently, the result matches only when ``htk=True``. -# ``torchaudio`` does not support the equivalent of ``htk=False`` option. -# +# melspec_librosa = librosa.feature.melspectrogram( waveform.numpy()[0], @@ -1064,7 +1420,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # MFCC # ---- -# +# waveform, sample_rate = get_speech_sample() @@ -1076,7 +1432,14 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): mfcc_transform = T.MFCC( sample_rate=sample_rate, - n_mfcc=n_mfcc, melkwargs={'n_fft': n_fft, 'n_mels': n_mels, 'hop_length': hop_length}) + n_mfcc=n_mfcc, + melkwargs={ + 'n_fft': n_fft, + 'n_mels': n_mels, + 'hop_length': hop_length, + 'mel_scale': 'htk', + } +) mfcc = mfcc_transform(waveform) @@ -1087,7 +1450,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Comparing against librosa # ~~~~~~~~~~~~~~~~~~~~~~~~~ -# +# melspec = librosa.feature.melspectrogram( y=waveform.numpy()[0], sr=sample_rate, n_fft=n_fft, @@ -1107,7 +1470,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Pitch # ----- -# +# waveform, sample_rate = get_speech_sample() @@ -1119,22 +1482,22 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Kaldi Pitch (beta) # ------------------ -# +# # Kaldi Pitch feature [1] is pitch detection mechanism tuned for ASR # application. This is a beta feature in torchaudio, and only # ``functional`` form is available. -# +# # 1. A pitch extraction algorithm tuned for automatic speech recognition -# +# # Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S. # Khudanpur -# +# # 2014 IEEE International Conference on Acoustics, Speech and Signal # Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi: # 10.1109/ICASSP.2014.6854049. # [`abstract `__], # [`paper `__] -# +# waveform, sample_rate = get_speech_sample(resample=16000) @@ -1148,25 +1511,25 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Feature Augmentation # ==================== -# +# ###################################################################### # SpecAugment # ----------- -# +# # `SpecAugment `__ # is a popular augmentation technique applied on spectrogram. -# +# # ``torchaudio`` implements ``TimeStrech``, ``TimeMasking`` and # ``FrequencyMasking``. -# +# ###################################################################### # TimeStrech # ~~~~~~~~~~ -# +# spec = get_spectrogram(power=None) strech = T.TimeStretch() @@ -1185,7 +1548,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # TimeMasking # ~~~~~~~~~~~ -# +# torch.random.manual_seed(4) @@ -1201,7 +1564,7 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # FrequencyMasking # ~~~~~~~~~~~~~~~~ -# +# torch.random.manual_seed(4) @@ -1217,13 +1580,13 @@ def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): ###################################################################### # Datasets # ======== -# +# # ``torchaudio`` provides easy access to common, publicly accessible # datasets. Please checkout the official documentation for the list of # available datasets. -# +# # Here, we take ``YESNO`` dataset and look into how to use it. -# +# YESNO_DOWNLOAD_PROCESS.join() diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py index 2dd88929821..a556192bab0 100644 --- a/beginner_source/text_sentiment_ngrams_tutorial.py +++ b/beginner_source/text_sentiment_ngrams_tutorial.py @@ -49,32 +49,35 @@ # # We have revisited the very basic components of the torchtext library, including vocab, word vectors, tokenizer. Those are the basic data processing building blocks for raw text string. # -# Here is an example for typical NLP data processing with tokenizer and vocabulary. The first step is to build a vocabulary with the raw training dataset. Users can have a customized vocab by setting up arguments in the constructor of the Vocab class. For example, the minimum frequency ``min_freq`` for the tokens to be included. +# Here is an example for typical NLP data processing with tokenizer and vocabulary. The first step is to build a vocabulary with the raw training dataset. Here we use built in +# factory function `build_vocab_from_iterator` which accepts iterator that yield list or iterator of tokens. Users can also pass any special symbols to be added to the +# vocabulary. from torchtext.data.utils import get_tokenizer -from collections import Counter -from torchtext.vocab import Vocab +from torchtext.vocab import build_vocab_from_iterator tokenizer = get_tokenizer('basic_english') train_iter = AG_NEWS(split='train') -counter = Counter() -for (label, line) in train_iter: - counter.update(tokenizer(line)) -vocab = Vocab(counter, min_freq=1) +def yield_tokens(data_iter): + for _, text in data_iter: + yield tokenizer(text) + +vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=[""]) +vocab.set_default_index(vocab[""]) ###################################################################### # The vocabulary block converts a list of tokens into integers. # # :: # -# [vocab[token] for token in ['here', 'is', 'an', 'example']] -# >>> [476, 22, 31, 5298] +# vocab(['here', 'is', 'an', 'example']) +# >>> [475, 21, 30, 5286] # # Prepare the text processing pipeline with the tokenizer and vocabulary. The text and label pipelines will be used to process the raw data strings from the dataset iterators. -text_pipeline = lambda x: [vocab[token] for token in tokenizer(x)] +text_pipeline = lambda x: vocab(tokenizer(x)) label_pipeline = lambda x: int(x) - 1 @@ -246,6 +249,7 @@ def evaluate(dataloader): from torch.utils.data.dataset import random_split +from torchtext.data.functional import to_map_style_dataset # Hyperparameters EPOCHS = 10 # epoch LR = 5 # learning rate @@ -256,8 +260,8 @@ def evaluate(dataloader): scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1) total_accu = None train_iter, test_iter = AG_NEWS() -train_dataset = list(train_iter) -test_dataset = list(test_iter) +train_dataset = to_map_style_dataset(train_iter) +test_dataset = to_map_style_dataset(test_iter) num_train = int(len(train_dataset) * 0.95) split_train_, split_valid_ = \ random_split(train_dataset, [num_train, len(train_dataset) - num_train]) @@ -285,72 +289,6 @@ def evaluate(dataloader): print('-' * 59) -###################################################################### -# Running the model on GPU with the following printout: -# -# :: -# -# | epoch 1 | 500/ 1782 batches | accuracy 0.684 -# | epoch 1 | 1000/ 1782 batches | accuracy 0.852 -# | epoch 1 | 1500/ 1782 batches | accuracy 0.877 -# ----------------------------------------------------------- -# | end of epoch 1 | time: 8.33s | valid accuracy 0.867 -# ----------------------------------------------------------- -# | epoch 2 | 500/ 1782 batches | accuracy 0.895 -# | epoch 2 | 1000/ 1782 batches | accuracy 0.900 -# | epoch 2 | 1500/ 1782 batches | accuracy 0.903 -# ----------------------------------------------------------- -# | end of epoch 2 | time: 8.18s | valid accuracy 0.890 -# ----------------------------------------------------------- -# | epoch 3 | 500/ 1782 batches | accuracy 0.914 -# | epoch 3 | 1000/ 1782 batches | accuracy 0.914 -# | epoch 3 | 1500/ 1782 batches | accuracy 0.916 -# ----------------------------------------------------------- -# | end of epoch 3 | time: 8.20s | valid accuracy 0.897 -# ----------------------------------------------------------- -# | epoch 4 | 500/ 1782 batches | accuracy 0.926 -# | epoch 4 | 1000/ 1782 batches | accuracy 0.924 -# | epoch 4 | 1500/ 1782 batches | accuracy 0.921 -# ----------------------------------------------------------- -# | end of epoch 4 | time: 8.18s | valid accuracy 0.895 -# ----------------------------------------------------------- -# | epoch 5 | 500/ 1782 batches | accuracy 0.938 -# | epoch 5 | 1000/ 1782 batches | accuracy 0.935 -# | epoch 5 | 1500/ 1782 batches | accuracy 0.937 -# ----------------------------------------------------------- -# | end of epoch 5 | time: 8.16s | valid accuracy 0.902 -# ----------------------------------------------------------- -# | epoch 6 | 500/ 1782 batches | accuracy 0.939 -# | epoch 6 | 1000/ 1782 batches | accuracy 0.939 -# | epoch 6 | 1500/ 1782 batches | accuracy 0.938 -# ----------------------------------------------------------- -# | end of epoch 6 | time: 8.16s | valid accuracy 0.906 -# ----------------------------------------------------------- -# | epoch 7 | 500/ 1782 batches | accuracy 0.941 -# | epoch 7 | 1000/ 1782 batches | accuracy 0.939 -# | epoch 7 | 1500/ 1782 batches | accuracy 0.939 -# ----------------------------------------------------------- -# | end of epoch 7 | time: 8.19s | valid accuracy 0.903 -# ----------------------------------------------------------- -# | epoch 8 | 500/ 1782 batches | accuracy 0.942 -# | epoch 8 | 1000/ 1782 batches | accuracy 0.941 -# | epoch 8 | 1500/ 1782 batches | accuracy 0.942 -# ----------------------------------------------------------- -# | end of epoch 8 | time: 8.16s | valid accuracy 0.904 -# ----------------------------------------------------------- -# | epoch 9 | 500/ 1782 batches | accuracy 0.942 -# | epoch 9 | 1000/ 1782 batches | accuracy 0.941 -# | epoch 9 | 1500/ 1782 batches | accuracy 0.942 -# ----------------------------------------------------------- -# end of epoch 9 | time: 8.16s | valid accuracy 0.904 -# ----------------------------------------------------------- -# | epoch 10 | 500/ 1782 batches | accuracy 0.940 -# | epoch 10 | 1000/ 1782 batches | accuracy 0.942 -# | epoch 10 | 1500/ 1782 batches | accuracy 0.942 -# ----------------------------------------------------------- -# | end of epoch 10 | time: 8.15s | valid accuracy 0.904 -# ----------------------------------------------------------- - ###################################################################### # Evaluate the model with test dataset @@ -366,12 +304,7 @@ def evaluate(dataloader): accu_test = evaluate(test_dataloader) print('test accuracy {:8.3f}'.format(accu_test)) -################################################ -# -# :: -# -# test accuracy 0.906 -# + ###################################################################### @@ -409,10 +342,3 @@ def predict(text, text_pipeline): print("This is a %s news" %ag_news_label[predict(ex_text_str, text_pipeline)]) - -################################################ -# -# :: -# -# This is a Sports news -# diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 680e9dc4b62..81a25c9b5c9 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -1,10 +1,10 @@ """ -Sequence-to-Sequence Modeling with nn.Transformer and TorchText +Language Modeling with nn.Transformer and TorchText =============================================================== This is a tutorial on how to train a sequence-to-sequence model that uses the -`nn.Transformer `__ module. +`nn.Transformer `__ module. PyTorch 1.2 release includes a standard transformer module based on the paper `Attention is All You @@ -12,9 +12,9 @@ has been proved to be superior in quality for many sequence-to-sequence problems while being more parallelizable. The ``nn.Transformer`` module relies entirely on an attention mechanism (another module recently -implemented as `nn.MultiheadAttention `__) to draw global dependencies +implemented as `nn.MultiheadAttention `__) to draw global dependencies between input and output. The ``nn.Transformer`` module is now highly -modularized such that a single component (like `nn.TransformerEncoder `__ +modularized such that a single component (like `nn.TransformerEncoder `__ in this tutorial) can be easily adapted/composed. .. image:: ../_static/img/transformer_architecture.jpg @@ -35,7 +35,7 @@ # layer first, followed by a positional encoding layer to account for the order # of the word (see the next paragraph for more details). The # ``nn.TransformerEncoder`` consists of multiple layers of -# `nn.TransformerEncoderLayer `__. Along with the input sequence, a square +# `nn.TransformerEncoderLayer `__. Along with the input sequence, a square # attention mask is required because the self-attention layers in # ``nn.TransformerEncoder`` are only allowed to attend the earlier positions in # the sequence. For the language modeling task, any tokens on the future @@ -144,23 +144,18 @@ def forward(self, x): # efficient batch processing. # -import io import torch from torchtext.datasets import WikiText2 from torchtext.data.utils import get_tokenizer -from collections import Counter -from torchtext.vocab import Vocab +from torchtext.vocab import build_vocab_from_iterator train_iter = WikiText2(split='train') tokenizer = get_tokenizer('basic_english') -counter = Counter() -for line in train_iter: - counter.update(tokenizer(line)) -vocab = Vocab(counter) +vocab = build_vocab_from_iterator(map(tokenizer, train_iter), specials=[""]) +vocab.set_default_index(vocab[""]) def data_process(raw_text_iter): - data = [torch.tensor([vocab[token] for token in tokenizer(item)], - dtype=torch.long) for item in raw_text_iter] + data = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter] return torch.cat(tuple(filter(lambda t: t.numel() > 0, data))) train_iter, val_iter, test_iter = WikiText2() @@ -225,7 +220,7 @@ def get_batch(source, i): # equal to the length of the vocab object. # -ntokens = len(vocab.stoi) # the size of vocabulary +ntokens = len(vocab) # the size of vocabulary emsize = 200 # embedding dimension nhid = 200 # the dimension of the feedforward network model in nn.TransformerEncoder nlayers = 2 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder diff --git a/intermediate_source/pipeline_tutorial.py b/intermediate_source/pipeline_tutorial.py index 49b37b1f564..bb5ac9339c5 100644 --- a/intermediate_source/pipeline_tutorial.py +++ b/intermediate_source/pipeline_tutorial.py @@ -148,27 +148,24 @@ def forward(self, x): # efficient batch processing. # -import io import torch -from torchtext.utils import download_from_url, extract_archive +from torchtext.datasets import WikiText2 from torchtext.data.utils import get_tokenizer from torchtext.vocab import build_vocab_from_iterator -url = 'https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip' -test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url)) +train_iter = WikiText2(split='train') tokenizer = get_tokenizer('basic_english') -vocab = build_vocab_from_iterator(map(tokenizer, - iter(io.open(train_filepath, - encoding="utf8")))) +vocab = build_vocab_from_iterator(map(tokenizer, train_iter), specials=[""]) +vocab.set_default_index(vocab[""]) def data_process(raw_text_iter): - data = [torch.tensor([vocab[token] for token in tokenizer(item)], - dtype=torch.long) for item in raw_text_iter] + data = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter] return torch.cat(tuple(filter(lambda t: t.numel() > 0, data))) -train_data = data_process(iter(io.open(train_filepath, encoding="utf8"))) -val_data = data_process(iter(io.open(valid_filepath, encoding="utf8"))) -test_data = data_process(iter(io.open(test_filepath, encoding="utf8"))) +train_iter, val_iter, test_iter = WikiText2() +train_data = data_process(train_iter) +val_data = data_process(val_iter) +test_data = data_process(test_iter) device = torch.device("cuda") @@ -244,7 +241,7 @@ def get_batch(source, i): # allows the Pipe to work with only two partitions and avoid any # cross-partition overheads. -ntokens = len(vocab.stoi) # the size of vocabulary +ntokens = len(vocab) # the size of vocabulary emsize = 4096 # embedding dimension nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder nlayers = 12 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder @@ -330,7 +327,7 @@ def train(): model.train() # Turn on the train mode total_loss = 0. start_time = time.time() - ntokens = len(vocab.stoi) + ntokens = len(vocab) # Train only for 50 batches to keep script execution time low. nbatches = min(50 * bptt, train_data.size(0) - 1) @@ -366,7 +363,7 @@ def train(): def evaluate(eval_model, data_source): eval_model.eval() # Turn on the evaluation mode total_loss = 0. - ntokens = len(vocab.stoi) + ntokens = len(vocab) # Evaluate only for 50 batches to keep script execution time low. nbatches = min(50 * bptt, data_source.size(0) - 1) with torch.no_grad(): @@ -418,39 +415,3 @@ def evaluate(eval_model, data_source): print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89) - - -###################################################################### -# Output -# ------ -# - - -###################################################################### -#.. code-block:: py -# -# Total parameters in model: 1,847,087,215 -# | epoch 1 | 10/ 50 batches | lr 5.00 | ms/batch 2387.45 | loss 42.16 | ppl 2036775646369743616.00 -# | epoch 1 | 20/ 50 batches | lr 5.00 | ms/batch 2150.93 | loss 48.24 | ppl 891334049215401558016.00 -# | epoch 1 | 30/ 50 batches | lr 5.00 | ms/batch 2155.23 | loss 34.66 | ppl 1125676483188404.62 -# | epoch 1 | 40/ 50 batches | lr 5.00 | ms/batch 2158.42 | loss 38.87 | ppl 76287208340888368.00 -# ----------------------------------------------------------------------------------------- -# | end of epoch 1 | time: 119.65s | valid loss 2.95 | valid ppl 19.15 -# ----------------------------------------------------------------------------------------- -# | epoch 2 | 10/ 50 batches | lr 4.51 | ms/batch 2376.16 | loss 34.92 | ppl 1458001430957104.00 -# | epoch 2 | 20/ 50 batches | lr 4.51 | ms/batch 2160.96 | loss 34.75 | ppl 1232463826541886.50 -# | epoch 2 | 30/ 50 batches | lr 4.51 | ms/batch 2160.66 | loss 28.10 | ppl 1599598251136.51 -# | epoch 2 | 40/ 50 batches | lr 4.51 | ms/batch 2160.07 | loss 20.25 | ppl 621174306.77 -# ----------------------------------------------------------------------------------------- -# | end of epoch 2 | time: 119.76s | valid loss 0.87 | valid ppl 2.38 -# ----------------------------------------------------------------------------------------- -# | epoch 3 | 10/ 50 batches | lr 4.29 | ms/batch 2376.49 | loss 13.20 | ppl 537727.23 -# | epoch 3 | 20/ 50 batches | lr 4.29 | ms/batch 2160.12 | loss 10.98 | ppl 58548.58 -# | epoch 3 | 30/ 50 batches | lr 4.29 | ms/batch 2160.05 | loss 12.01 | ppl 164152.79 -# | epoch 3 | 40/ 50 batches | lr 4.29 | ms/batch 2160.03 | loss 10.63 | ppl 41348.00 -# ----------------------------------------------------------------------------------------- -# | end of epoch 3 | time: 119.76s | valid loss 0.78 | valid ppl 2.17 -# ----------------------------------------------------------------------------------------- -# ========================================================================================= -# | End of training | test loss 0.69 | test ppl 1.99 -# ========================================================================================= diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index df386f4efd2..35052f4b2f4 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -168,7 +168,7 @@ def forward(self, x): # And then we can pass this logger into above APIs such as: data = img_data[0][0] -act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data, Logger=MyOutputLogger) +act_compare_dict = ns.compare_model_outputs(float_model, qmodel, data, logger_cls=MyOutputLogger) ############################################################################## # or: @@ -260,7 +260,7 @@ def forward(self, x, y): # And then we can pass this logger into above APIs such as: data = img_data[0][0] -ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, data, Logger=MyShadowLogger) +ob_dict = ns.compare_model_stub(float_model, qmodel, module_swap_list, data, logger_cls=MyShadowLogger) ############################################################################## # or: diff --git a/recipes_source/recipes/profiler_recipe.py b/recipes_source/recipes/profiler_recipe.py index 4c400107633..d399c90ca7e 100644 --- a/recipes_source/recipes/profiler_recipe.py +++ b/recipes_source/recipes/profiler_recipe.py @@ -30,9 +30,12 @@ # # 1. Import all necessary libraries # 2. Instantiate a simple Resnet model -# 3. Use profiler to analyze execution time -# 4. Use profiler to analyze memory consumption +# 3. Using profiler to analyze execution time +# 4. Using profiler to analyze memory consumption # 5. Using tracing functionality +# 6. Examining stack traces +# 7. Visualizing data as a flamegraph +# 8. Using profiler to analyze long-running jobs # # 1. Import all necessary libraries # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -43,7 +46,7 @@ import torch import torchvision.models as models -import torch.autograd.profiler as profiler +from torch.profiler import profile, record_function, ProfilerActivity ###################################################################### @@ -58,27 +61,36 @@ inputs = torch.randn(5, 3, 224, 224) ###################################################################### -# 3. Use profiler to analyze execution time +# 3. Using profiler to analyze execution time # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # PyTorch profiler is enabled through the context manager and accepts # a number of parameters, some of the most useful are: # +# - ``activities`` - a list of activities to profile: +# - ``ProfilerActivity.CPU`` - PyTorch operators, TorchScript functions and +# user-defined code labels (see ``record_function`` below); +# - ``ProfilerActivity.CUDA`` - on-device CUDA kernels; # - ``record_shapes`` - whether to record shapes of the operator inputs; # - ``profile_memory`` - whether to report amount of memory consumed by # model's Tensors; # - ``use_cuda`` - whether to measure execution time of CUDA kernels. # +# Note: when using CUDA, profiler also shows the runtime CUDA events +# occuring on the host. + +###################################################################### # Let's see how we can use profiler to analyze the execution time: -with profiler.profile(record_shapes=True) as prof: - with profiler.record_function("model_inference"): +with profile(activities=[ProfilerActivity.CPU], record_shapes=True) as prof: + with record_function("model_inference"): model(inputs) ###################################################################### # Note that we can use ``record_function`` context manager to label # arbitrary code ranges with user provided names # (``model_inference`` is used as a label in the example above). +# # Profiler allows one to check which operators were called during the # execution of a code range wrapped with a profiler context manager. # If multiple profiler ranges are active at the same time (e.g. in @@ -95,50 +107,96 @@ ###################################################################### # The output will look like (omitting some columns): -# ------------------------- -------------- ---------- ------------ --------- -# Name Self CPU total CPU total CPU time avg # Calls -# ------------------------- -------------- ---------- ------------ --------- -# model_inference 3.541ms 69.571ms 69.571ms 1 -# conv2d 69.122us 40.556ms 2.028ms 20 -# convolution 79.100us 40.487ms 2.024ms 20 -# _convolution 349.533us 40.408ms 2.020ms 20 -# mkldnn_convolution 39.822ms 39.988ms 1.999ms 20 -# batch_norm 105.559us 15.523ms 776.134us 20 -# _batch_norm_impl_index 103.697us 15.417ms 770.856us 20 -# native_batch_norm 9.387ms 15.249ms 762.471us 20 -# max_pool2d 29.400us 7.200ms 7.200ms 1 -# max_pool2d_with_indices 7.154ms 7.170ms 7.170ms 1 -# ------------------------- -------------- ---------- ------------ --------- +# --------------------------------- ------------ ------------ ------------ ------------ +# Name Self CPU CPU total CPU time avg # of Calls +# --------------------------------- ------------ ------------ ------------ ------------ +# model_inference 5.509ms 57.503ms 57.503ms 1 +# aten::conv2d 231.000us 31.931ms 1.597ms 20 +# aten::convolution 250.000us 31.700ms 1.585ms 20 +# aten::_convolution 336.000us 31.450ms 1.573ms 20 +# aten::mkldnn_convolution 30.838ms 31.114ms 1.556ms 20 +# aten::batch_norm 211.000us 14.693ms 734.650us 20 +# aten::_batch_norm_impl_index 319.000us 14.482ms 724.100us 20 +# aten::native_batch_norm 9.229ms 14.109ms 705.450us 20 +# aten::mean 332.000us 2.631ms 125.286us 21 +# aten::select 1.668ms 2.292ms 8.988us 255 +# --------------------------------- ------------ ------------ ------------ ------------ +# Self CPU time total: 57.549ms ###################################################################### # Here we see that, as expected, most of the time is spent in convolution (and specifically in ``mkldnn_convolution`` # for PyTorch compiled with MKL-DNN support). # Note the difference between self cpu time and cpu time - operators can call other operators, self cpu time exludes time -# spent in children operator calls, while total cpu time includes it. +# spent in children operator calls, while total cpu time includes it. You can choose to sort by the self cpu time by passing +# ``sort_by="self_cpu_time_total"`` into the ``table`` call. # -# To get a finer granularity of results and include operator input shapes, pass ``group_by_input_shape=True``: +# To get a finer granularity of results and include operator input shapes, pass ``group_by_input_shape=True`` +# (note: this requires running the profiler with ``record_shapes=True``): print(prof.key_averages(group_by_input_shape=True).table(sort_by="cpu_time_total", row_limit=10)) # (omitting some columns) -# ------------------------- ----------- -------- ------------------------------------- -# Name CPU total # Calls Input Shapes -# ------------------------- ----------- -------- ------------------------------------- -# model_inference 69.571ms 1 [] -# conv2d 9.019ms 4 [[5, 64, 56, 56], [64, 64, 3, 3], []] -# convolution 9.006ms 4 [[5, 64, 56, 56], [64, 64, 3, 3], []] -# _convolution 8.982ms 4 [[5, 64, 56, 56], [64, 64, 3, 3], []] -# mkldnn_convolution 8.894ms 4 [[5, 64, 56, 56], [64, 64, 3, 3], []] -# max_pool2d 7.200ms 1 [[5, 64, 112, 112]] -# conv2d 7.189ms 3 [[5, 512, 7, 7], [512, 512, 3, 3], []] -# convolution 7.180ms 3 [[5, 512, 7, 7], [512, 512, 3, 3], []] -# _convolution 7.171ms 3 [[5, 512, 7, 7], [512, 512, 3, 3], []] -# max_pool2d_with_indices 7.170ms 1 [[5, 64, 112, 112]] -# ------------------------- ----------- -------- -------------------------------------- - - -###################################################################### -# 4. Use profiler to analyze memory consumption +# --------------------------------- ------------ ------------------------------------------- +# Name CPU total Input Shapes +# --------------------------------- ------------ ------------------------------------------- +# model_inference 57.503ms [] +# aten::conv2d 8.008ms [5,64,56,56], [64,64,3,3], [], ..., []] +# aten::convolution 7.956ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::_convolution 7.909ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::mkldnn_convolution 7.834ms [[5,64,56,56], [64,64,3,3], [], ..., []] +# aten::conv2d 6.332ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::convolution 6.303ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::_convolution 6.273ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::mkldnn_convolution 6.233ms [[5,512,7,7], [512,512,3,3], [], ..., []] +# aten::conv2d 4.751ms [[5,256,14,14], [256,256,3,3], [], ..., []] +# --------------------------------- ------------ ------------------------------------------- +# Self CPU time total: 57.549ms + +###################################################################### +# Note the occurence of ``aten::convolution`` twice with different input shapes. + +###################################################################### +# Profiler can also be used to analyze performance of models executed on GPUs: + +model = models.resnet18().cuda() +inputs = torch.randn(5, 3, 224, 224).cuda() + +with profile(activities=[ + ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof: + with record_function("model_inference"): + model(inputs) + +print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10)) + +###################################################################### +# (Note: the first use of CUDA profiling may bring an extra overhead.) + +###################################################################### +# The resulting table output: + +# (omitting some columns) +# ------------------------------------------------------- ------------ ------------ +# Name Self CUDA CUDA total +# ------------------------------------------------------- ------------ ------------ +# model_inference 0.000us 11.666ms +# aten::conv2d 0.000us 10.484ms +# aten::convolution 0.000us 10.484ms +# aten::_convolution 0.000us 10.484ms +# aten::_convolution_nogroup 0.000us 10.484ms +# aten::thnn_conv2d 0.000us 10.484ms +# aten::thnn_conv2d_forward 10.484ms 10.484ms +# void at::native::im2col_kernel(long, float co... 3.844ms 3.844ms +# sgemm_32x32x32_NN 3.206ms 3.206ms +# sgemm_32x32x32_NN_vec 3.093ms 3.093ms +# ------------------------------------------------------- ------------ ------------ +# Self CPU time total: 23.015ms +# Self CUDA time total: 11.666ms + +###################################################################### +# Note the occurence of on-device kernels in the output (e.g. ``sgemm_32x32x32_NN``). + +###################################################################### +# 4. Using profiler to analyze memory consumption # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # PyTorch profiler can also show the amount of memory (used by the model's tensors) @@ -147,44 +205,50 @@ # by the operator, excluding the children calls to the other operators. # To enable memory profiling functionality pass ``profile_memory=True``. -with profiler.profile(profile_memory=True, record_shapes=True) as prof: +model = models.resnet18() +inputs = torch.randn(5, 3, 224, 224) + +with profile(activities=[ProfilerActivity.CPU], + profile_memory=True, record_shapes=True) as prof: model(inputs) print(prof.key_averages().table(sort_by="self_cpu_memory_usage", row_limit=10)) # (omitting some columns) -# --------------------------- --------------- --------------- --------------- -# Name CPU Mem Self CPU Mem Number of Calls -# --------------------------- --------------- --------------- --------------- -# empty 94.79 Mb 94.79 Mb 123 -# resize_ 11.48 Mb 11.48 Mb 2 -# addmm 19.53 Kb 19.53 Kb 1 -# empty_strided 4 b 4 b 1 -# conv2d 47.37 Mb 0 b 20 -# --------------------------- --------------- --------------- --------------- +# --------------------------------- ------------ ------------ ------------ +# Name CPU Mem Self CPU Mem # of Calls +# --------------------------------- ------------ ------------ ------------ +# aten::empty 94.79 Mb 94.79 Mb 121 +# aten::max_pool2d_with_indices 11.48 Mb 11.48 Mb 1 +# aten::addmm 19.53 Kb 19.53 Kb 1 +# aten::empty_strided 572 b 572 b 25 +# aten::resize_ 240 b 240 b 6 +# aten::abs 480 b 240 b 4 +# aten::add 160 b 160 b 20 +# aten::masked_select 120 b 112 b 1 +# aten::ne 122 b 53 b 6 +# aten::eq 60 b 30 b 2 +# --------------------------------- ------------ ------------ ------------ +# Self CPU time total: 53.064ms print(prof.key_averages().table(sort_by="cpu_memory_usage", row_limit=10)) # (omitting some columns) -# --------------------------- --------------- --------------- --------------- -# Name CPU Mem Self CPU Mem Number of Calls -# --------------------------- --------------- --------------- --------------- -# empty 94.79 Mb 94.79 Mb 123 -# batch_norm 47.41 Mb 0 b 20 -# _batch_norm_impl_index 47.41 Mb 0 b 20 -# native_batch_norm 47.41 Mb 0 b 20 -# conv2d 47.37 Mb 0 b 20 -# convolution 47.37 Mb 0 b 20 -# _convolution 47.37 Mb 0 b 20 -# mkldnn_convolution 47.37 Mb 0 b 20 -# empty_like 47.37 Mb 0 b 20 -# max_pool2d 11.48 Mb 0 b 1 -# max_pool2d_with_indices 11.48 Mb 0 b 1 -# resize_ 11.48 Mb 11.48 Mb 2 -# addmm 19.53 Kb 19.53 Kb 1 -# adaptive_avg_pool2d 10.00 Kb 0 b 1 -# mean 10.00 Kb 0 b 1 -# --------------------------- --------------- --------------- --------------- +# --------------------------------- ------------ ------------ ------------ +# Name CPU Mem Self CPU Mem # of Calls +# --------------------------------- ------------ ------------ ------------ +# aten::empty 94.79 Mb 94.79 Mb 121 +# aten::batch_norm 47.41 Mb 0 b 20 +# aten::_batch_norm_impl_index 47.41 Mb 0 b 20 +# aten::native_batch_norm 47.41 Mb 0 b 20 +# aten::conv2d 47.37 Mb 0 b 20 +# aten::convolution 47.37 Mb 0 b 20 +# aten::_convolution 47.37 Mb 0 b 20 +# aten::mkldnn_convolution 47.37 Mb 0 b 20 +# aten::max_pool2d 11.48 Mb 0 b 1 +# aten::max_pool2d_with_indices 11.48 Mb 11.48 Mb 1 +# --------------------------------- ------------ ------------ ------------ +# Self CPU time total: 53.064ms ###################################################################### # 5. Using tracing functionality @@ -192,19 +256,164 @@ # # Profiling results can be outputted as a .json trace file: -with profiler.profile() as prof: - with profiler.record_function("model_inference"): - model(inputs) +model = models.resnet18().cuda() +inputs = torch.randn(5, 3, 224, 224).cuda() + +with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof: + model(inputs) prof.export_chrome_trace("trace.json") ###################################################################### -# User can examine the sequence of profiled operators after loading the trace file -# in Chrome (``chrome://tracing``): +# You can examine the sequence of profiled operators and CUDA kernels +# in Chrome trace viewer (``chrome://tracing``): # # .. image:: ../../_static/img/trace_img.png # :scale: 25 % +###################################################################### +# 6. Examining stack traces +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Profiler can be used to analyze Python and TorchScript stack traces: + +with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + with_stack=True, +) as prof: + model(inputs) + +# Print aggregated stats +print(prof.key_averages(group_by_stack_n=5).table(sort_by="self_cuda_time_total", row_limit=2)) + +# (omitting some columns) +# ------------------------- ----------------------------------------------------------- +# Name Source Location +# ------------------------- ----------------------------------------------------------- +# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward +# .../torch/nn/modules/conv.py(443): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# .../site-packages/torchvision/models/resnet.py(63): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# +# aten::thnn_conv2d_forward .../torch/nn/modules/conv.py(439): _conv_forward +# .../torch/nn/modules/conv.py(443): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# .../site-packages/torchvision/models/resnet.py(59): forward +# .../torch/nn/modules/module.py(1051): _call_impl +# +# ------------------------- ----------------------------------------------------------- +# Self CPU time total: 34.016ms +# Self CUDA time total: 11.659ms + +###################################################################### +# Note the two convolutions and the two callsites in ``torchvision/models/resnet.py`` script. +# +# (Warning: stack tracing adds an extra profiling overhead.) + + +###################################################################### +# 7. Visualizing data as a flamegraph +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# Execution time (``self_cpu_time_total`` and ``self_cuda_time_total`` metrics) and stack traces +# can also be visualized as a flame graph. To do this, first export the raw data using ``export_stacks`` (requires ``with_stack=True``): + +prof.export_stacks("/tmp/profiler_stacks.txt", "self_cuda_time_total") + +###################################################################### +# We recommend using e.g. `Flamegraph tool `_ to generate an +# interactive SVG: + +# git clone https://github.com/brendangregg/FlameGraph +# cd FlameGraph +# ./flamegraph.pl --title "CUDA time" --countname "us." /tmp/profiler_stacks.txt > perf_viz.svg + +###################################################################### +# +# .. image:: ../../_static/img/perf_viz.png +# :scale: 25 % + + +###################################################################### +# 8. Using profiler to analyze long-running jobs +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# PyTorch profiler offers an additional API to handle long-running jobs +# (such as training loops). Tracing all of the execution can be +# slow and result in very large trace files. To avoid this, use optional +# arguments: +# +# - ``schedule`` - specifies a function that takes an integer argument (step number) +# as an input and returns an action for the profiler, the best way to use this parameter +# is to use ``torch.profiler.schedule`` helper function that can generate a schedule for you; +# - ``on_trace_ready`` - specifies a function that takes a reference to the profiler as +# an input and is called by the profiler each time the new trace is ready. +# +# To illustrate how the API works, let's first consider the following example with +# ``torch.profiler.schedule`` helper function: + +from torch.profiler import schedule + +my_schedule = schedule( + skip_first=10, + wait=5, + warmup=1, + active=3, + repeat=2) + +###################################################################### +# Profiler assumes that the long-running job is composed of steps, numbered +# starting from zero. The example above defines the following sequence of actions +# for the profiler: +# +# 1. Parameter ``skip_first`` tells profiler that it should ignore the first 10 steps +# (default value of ``skip_first`` is zero); +# 2. After the first ``skip_first`` steps, profiler starts executing profiler cycles; +# 3. Each cycle consists of three phases: +# +# - idling (``wait=5`` steps), during this phase profiler is not active; +# - warming up (``warmup=1`` steps), during this phase profiler starts tracing, but +# the results are discarded; this phase is used to discard the samples obtained by +# the profiler at the beginning of the trace since they are usually skewed by an extra +# overhead; +# - active tracing (``active=3`` steps), during this phase profiler traces and records data; +# 4. An optional ``repeat`` parameter specifies an upper bound on the number of cycles. +# By default (zero value), profiler will execute cycles as long as the job runs. + +###################################################################### +# Thus, in the example above, profiler will skip the first 15 steps, spend the next step on the warm up, +# actively record the next 3 steps, skip another 5 steps, spend the next step on the warm up, actively +# record another 3 steps. Since the ``repeat=2`` parameter value is specified, the profiler will stop +# the recording after the first two cycles. +# +# At the end of each cycle profiler calls the specified ``on_trace_ready`` function and passes itself as +# an argument. This function is used to process the new trace - either by obtaining the table output or +# by saving the output on disk as a trace file. +# +# To send the signal to the profiler that the next step has started, call ``prof.step()`` function. +# The current profiler step is stored in ``prof.step_num``. +# +# The following example shows how to use all of the concepts above: + +def trace_handler(p): + output = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=10) + print(output) + p.export_chrome_trace("/tmp/trace_" + str(p.step_num) + ".json") + +with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=torch.profiler.schedule( + wait=1, + warmup=1, + active=2), + on_trace_ready=trace_handler +) as p: + for idx in range(8): + model(inputs) + p.step() + + ###################################################################### # Learn More # ---------- @@ -212,5 +421,6 @@ # Take a look at the following recipes/tutorials to continue your learning: # # - `PyTorch Benchmark `_ +# - `PyTorch Profiler with TensorBoard `_ tutorial # - `Visualizing models, data, and training with TensorBoard `_ tutorial #