Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
01b512f
Test: add backend parameter
Sep 14, 2022
0f8e126
VideoReader object now works on backend
Sep 14, 2022
3c91963
Frame reading now passes
Sep 14, 2022
febcdfc
Keyframe seek now passes
Sep 14, 2022
d4d623f
Pyav backend now supports metadata
Sep 16, 2022
a288c49
changes in test to reflect GPU decoder change
Sep 16, 2022
9ed4644
Merge branch 'main' into bkorbar/pyavapi
bjuncek Sep 16, 2022
fc6d44e
Linter?
Sep 16, 2022
f62e955
Test GPU output
Oct 6, 2022
3b01816
Addressing Joao's comments
Oct 6, 2022
e3f8bbc
lint
Oct 6, 2022
2ae40b2
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 6, 2022
8d66cec
lint
Oct 6, 2022
e37b14a
Revert "Test GPU output"
Oct 6, 2022
35ecde2
lint?
Oct 6, 2022
92c3699
lint
Oct 6, 2022
6320cac
lint
Oct 6, 2022
a73300d
Address issues in build?
Oct 6, 2022
55aeb6f
hopefully doc fix
Oct 11, 2022
2496f3f
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 11, 2022
7b79335
Arrgh
Oct 11, 2022
d299173
arrgh
Oct 11, 2022
89f1dba
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 13, 2022
6dd4238
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 20, 2022
222f088
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 20, 2022
c78ce7e
Merge branch 'main' into bkorbar/pyavapi
jdsgomes Oct 21, 2022
e7a1840
fix typos
jdsgomes Oct 21, 2022
3e04eb6
fix input options
jdsgomes Oct 21, 2022
17082df
remove read from memory option in pyav
jdsgomes Oct 21, 2022
9d45094
skip read from mem test for gpu and pyab be
jdsgomes Oct 21, 2022
7a1d95d
fix test
jdsgomes Oct 21, 2022
8b24b2d
remove unused import
jdsgomes Oct 21, 2022
6cfcf16
Merge branch 'main' into bkorbar/pyavapi
jdsgomes Oct 21, 2022
231358b
Hack to get reading from memory work with pyav
Oct 21, 2022
c1dcc16
patch audio test
Oct 21, 2022
f900c62
Merge branch 'main' into bkorbar/pyavapi
bjuncek Oct 21, 2022
d1cc724
Merge branch 'main' into bkorbar/pyavapi
jdsgomes Oct 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions test/test_video_gpu_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

import pytest
import torch
from torchvision.io import _HAS_GPU_VIDEO_DECODER, VideoReader
import torchvision
from torchvision import _HAS_GPU_VIDEO_DECODER
from torchvision.io import VideoReader

try:
import av
Expand All @@ -29,8 +31,9 @@ class TestVideoGPUDecoder:
],
)
def test_frame_reading(self, video_file):
torchvision.set_video_backend("cuda")
full_path = os.path.join(VIDEO_DIR, video_file)
decoder = VideoReader(full_path, device="cuda")
decoder = VideoReader(full_path)
with av.open(full_path) as container:
for av_frame in container.decode(container.streams.video[0]):
av_frames = torch.tensor(av_frame.to_rgb(src_colorspace="ITU709").to_ndarray())
Expand All @@ -54,7 +57,8 @@ def test_frame_reading(self, video_file):
],
)
def test_seek_reading(self, keyframes, full_path, duration):
decoder = VideoReader(full_path, device="cuda")
torchvision.set_video_backend("cuda")
decoder = VideoReader(full_path)
time = duration / 2
decoder.seek(time, keyframes_only=keyframes)
with av.open(full_path) as container:
Expand All @@ -79,8 +83,9 @@ def test_seek_reading(self, keyframes, full_path, duration):
],
)
def test_metadata(self, video_file):
torchvision.set_video_backend("cuda")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we run this just once for all tests in this file since they all supposed to run with the cud backend?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. Let me try out :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some reason that failed (putting it in a class header) so I'll keep it as is, and then re-do the tests later.

full_path = os.path.join(VIDEO_DIR, video_file)
decoder = VideoReader(full_path, device="cuda")
decoder = VideoReader(full_path)
video_metadata = decoder.get_metadata()["video"]
with av.open(full_path) as container:
video = container.streams.video[0]
Expand Down
98 changes: 58 additions & 40 deletions test/test_videoapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def fate(name, path="."):
class TestVideoApi:
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
@pytest.mark.parametrize("test_video", test_videos.keys())
def test_frame_reading(self, test_video):
@pytest.mark.parametrize("backend", ["video_reader", "pyav"])
def test_frame_reading(self, test_video, backend):
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)
with av.open(full_path) as av_reader:
if av_reader.streams.video:
Expand Down Expand Up @@ -117,58 +119,70 @@ def test_frame_reading(self, test_video):

@pytest.mark.parametrize("stream", ["video", "audio"])
@pytest.mark.parametrize("test_video", test_videos.keys())
def test_frame_reading_mem_vs_file(self, test_video, stream):
@pytest.mark.parametrize("backend", ["video_reader", "pyav"])
def test_frame_reading_mem_vs_file(self, test_video, stream, backend):
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)

# Test video reading from file vs from memory
vr_frames, vr_frames_mem = [], []
vr_pts, vr_pts_mem = [], []
# get vr frames
video_reader = VideoReader(full_path, stream)
for vr_frame in video_reader:
vr_frames.append(vr_frame["data"])
vr_pts.append(vr_frame["pts"])

# get vr frames = read from memory
f = open(full_path, "rb")
fbytes = f.read()
f.close()
video_reader_from_mem = VideoReader(fbytes, stream)

for vr_frame_from_mem in video_reader_from_mem:
vr_frames_mem.append(vr_frame_from_mem["data"])
vr_pts_mem.append(vr_frame_from_mem["pts"])

# same number of frames
assert len(vr_frames) == len(vr_frames_mem)
assert len(vr_pts) == len(vr_pts_mem)

# compare the frames and ptss
for i in range(len(vr_frames)):
assert vr_pts[i] == vr_pts_mem[i]
mean_delta = torch.mean(torch.abs(vr_frames[i].float() - vr_frames_mem[i].float()))
# on average the difference is very small and caused
# by decoding (around 1%)
# TODO: asses empirically how to set this? atm it's 1%
# averaged over all frames
assert mean_delta.item() < 2.55

del vr_frames, vr_pts, vr_frames_mem, vr_pts_mem
reader = VideoReader(full_path)
reader_md = reader.get_metadata()

if stream in reader_md:
# Test video reading from file vs from memory
vr_frames, vr_frames_mem = [], []
vr_pts, vr_pts_mem = [], []
# get vr frames
video_reader = VideoReader(full_path, stream)
for vr_frame in video_reader:
vr_frames.append(vr_frame["data"])
vr_pts.append(vr_frame["pts"])

# get vr frames = read from memory
f = open(full_path, "rb")
fbytes = f.read()
f.close()
video_reader_from_mem = VideoReader(fbytes, stream)

for vr_frame_from_mem in video_reader_from_mem:
vr_frames_mem.append(vr_frame_from_mem["data"])
vr_pts_mem.append(vr_frame_from_mem["pts"])

# same number of frames
assert len(vr_frames) == len(vr_frames_mem)
assert len(vr_pts) == len(vr_pts_mem)

# compare the frames and ptss
for i in range(len(vr_frames)):
assert vr_pts[i] == vr_pts_mem[i]
mean_delta = torch.mean(torch.abs(vr_frames[i].float() - vr_frames_mem[i].float()))
# on average the difference is very small and caused
# by decoding (around 1%)
# TODO: asses empirically how to set this? atm it's 1%
# averaged over all frames
assert mean_delta.item() < 2.55

del vr_frames, vr_pts, vr_frames_mem, vr_pts_mem
else:
del reader, reader_md

@pytest.mark.parametrize("test_video,config", test_videos.items())
def test_metadata(self, test_video, config):
@pytest.mark.parametrize("backend", ["video_reader", "pyav"])
def test_metadata(self, test_video, config, backend):
"""
Test that the metadata returned via pyav corresponds to the one returned
by the new video decoder API
"""
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)
reader = VideoReader(full_path, "video")
reader_md = reader.get_metadata()
assert config.video_fps == approx(reader_md["video"]["fps"][0], abs=0.0001)
assert config.duration == approx(reader_md["video"]["duration"][0], abs=0.5)

@pytest.mark.parametrize("test_video", test_videos.keys())
def test_seek_start(self, test_video):
@pytest.mark.parametrize("backend", ["video_reader", "pyav"])
def test_seek_start(self, test_video, backend):
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)
video_reader = VideoReader(full_path, "video")
num_frames = 0
Expand All @@ -194,7 +208,9 @@ def test_seek_start(self, test_video):
assert start_num_frames == num_frames

@pytest.mark.parametrize("test_video", test_videos.keys())
def test_accurateseek_middle(self, test_video):
@pytest.mark.parametrize("backend", ["video_reader"])
def test_accurateseek_middle(self, test_video, backend):
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)
stream = "video"
video_reader = VideoReader(full_path, stream)
Expand Down Expand Up @@ -233,7 +249,9 @@ def test_fate_suite(self):

@pytest.mark.skipif(av is None, reason="PyAV unavailable")
@pytest.mark.parametrize("test_video,config", test_videos.items())
def test_keyframe_reading(self, test_video, config):
@pytest.mark.parametrize("backend", ["pyav", "video_reader"])
def test_keyframe_reading(self, test_video, config, backend):
torchvision.set_video_backend(backend)
full_path = os.path.join(VIDEO_DIR, test_video)

av_reader = av.open(full_path)
Expand Down
21 changes: 17 additions & 4 deletions torchvision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import os
import warnings
from modulefinder import Module

import torch
from torchvision import datasets, io, models, ops, transforms, utils

from .extension import _HAS_OPS
from .extension import _HAS_OPS, _load_library

try:
from .version import __version__ # noqa: F401
except ImportError:
pass

try:
_load_library("Decoder")
_HAS_GPU_VIDEO_DECODER = True
except (ImportError, OSError, ModuleNotFoundError):
_HAS_GPU_VIDEO_DECODER = False


# Check if torchvision is being imported within the root folder
if not _HAS_OPS and os.path.dirname(os.path.realpath(__file__)) == os.path.join(
os.path.realpath(os.getcwd()), "torchvision"
Expand Down Expand Up @@ -66,11 +74,16 @@ def set_video_backend(backend):
backend, please compile torchvision from source.
"""
global _video_backend
if backend not in ["pyav", "video_reader"]:
raise ValueError("Invalid video backend '%s'. Options are 'pyav' and 'video_reader'" % backend)
if backend not in ["pyav", "video_reader", "cuda"]:
raise ValueError("Invalid video backend '%s'. Options are 'pyav', 'video_reader' and 'cuda'" % backend)
if backend == "video_reader" and not io._HAS_VIDEO_OPT:
# TODO: better messages
message = "video_reader video backend is not available. Please compile torchvision from source and try again"
warnings.warn(message)
raise RuntimeError(message)
elif backend == "cuda" and not _HAS_GPU_VIDEO_DECODER:
# TODO: better messages
message = "cuda video backend is not available."
raise RuntimeError(message)
else:
_video_backend = backend

Expand Down
5 changes: 0 additions & 5 deletions torchvision/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@

from ..utils import _log_api_usage_once

try:
from ._load_gpu_decoder import _HAS_GPU_VIDEO_DECODER
except ModuleNotFoundError:
_HAS_GPU_VIDEO_DECODER = False
from ._video_opt import (
_HAS_VIDEO_OPT,
_probe_video_from_file,
Expand Down Expand Up @@ -47,7 +43,6 @@
"_read_video_timestamps_from_memory",
"_probe_video_from_memory",
"_HAS_VIDEO_OPT",
"_HAS_GPU_VIDEO_DECODER",
"_read_video_clip_from_memory",
"_read_video_meta_data",
"VideoMetaData",
Expand Down
8 changes: 0 additions & 8 deletions torchvision/io/_load_gpu_decoder.py

This file was deleted.

Loading