@@ -56,37 +56,68 @@ def test_frame_reading(self):
5656 for test_video , config in test_videos .items ():
5757 full_path = os .path .join (VIDEO_DIR , test_video )
5858
59- av_reader = av .open (full_path )
60-
61- if av_reader .streams .video :
62- video_reader = VideoReader (full_path , "video" )
63- for av_frame in av_reader .decode (av_reader .streams .video [0 ]):
64- vr_frame = next (video_reader )
65-
66- assert float (av_frame .pts * av_frame .time_base ) == approx (vr_frame ["pts" ], abs = 0.1 )
67-
68- av_array = torch .tensor (av_frame .to_rgb ().to_ndarray ()).permute (2 , 0 , 1 )
69- vr_array = vr_frame ["data" ]
70- mean_delta = torch .mean (torch .abs (av_array .float () - vr_array .float ()))
71- # on average the difference is very small and caused
72- # by decoding (around 1%)
73- # TODO: asses empirically how to set this? atm it's 1%
74- # averaged over all frames
75- assert mean_delta .item () < 2.5
76-
77- av_reader = av .open (full_path )
78- if av_reader .streams .audio :
79- video_reader = VideoReader (full_path , "audio" )
80- for av_frame in av_reader .decode (av_reader .streams .audio [0 ]):
81- vr_frame = next (video_reader )
82- assert float (av_frame .pts * av_frame .time_base ) == approx (vr_frame ["pts" ], abs = 0.1 )
83-
84- av_array = torch .tensor (av_frame .to_ndarray ()).permute (1 , 0 )
85- vr_array = vr_frame ["data" ]
86-
87- max_delta = torch .max (torch .abs (av_array .float () - vr_array .float ()))
88- # we assure that there is never more than 1% difference in signal
89- assert max_delta .item () < 0.001
59+ with av .open (full_path ) as av_reader :
60+ is_video = True if av_reader .streams .video else False
61+
62+ if is_video :
63+ av_frames , vr_frames = [], []
64+ av_pts , vr_pts = [], []
65+ # get av frames
66+ for av_frame in av_reader .decode (av_reader .streams .video [0 ]):
67+ av_frames .append (torch .tensor (av_frame .to_rgb ().to_ndarray ()).permute (2 , 0 , 1 ))
68+ av_pts .append (av_frame .pts * av_frame .time_base )
69+
70+ # get vr frames
71+ video_reader = VideoReader (full_path , "video" )
72+ for vr_frame in video_reader :
73+ vr_frames .append (vr_frame ["data" ])
74+ vr_pts .append (vr_frame ["pts" ])
75+
76+ # same number of frames
77+ assert len (vr_frames ) == len (av_frames )
78+ assert len (vr_pts ) == len (av_pts )
79+
80+ # compare the frames and ptss
81+ for i in range (len (vr_frames )):
82+ assert float (av_pts [i ]) == approx (vr_pts [i ], abs = 0.1 )
83+ mean_delta = torch .mean (torch .abs (av_frames [i ].float () - vr_frames [i ].float ()))
84+ # on average the difference is very small and caused
85+ # by decoding (around 1%)
86+ # TODO: asses empirically how to set this? atm it's 1%
87+ # averaged over all frames
88+ assert mean_delta .item () < 2.55
89+
90+ del vr_frames , av_frames , vr_pts , av_pts
91+
92+ # test audio reading compared to PYAV
93+ with av .open (full_path ) as av_reader :
94+ is_audio = True if av_reader .streams .audio else False
95+
96+ if is_audio :
97+ av_frames , vr_frames = [], []
98+ av_pts , vr_pts = [], []
99+ # get av frames
100+ for av_frame in av_reader .decode (av_reader .streams .audio [0 ]):
101+ av_frames .append (torch .tensor (av_frame .to_ndarray ()).permute (1 , 0 ))
102+ av_pts .append (av_frame .pts * av_frame .time_base )
103+ av_reader .close ()
104+
105+ # get vr frames
106+ video_reader = VideoReader (full_path , "audio" )
107+ for vr_frame in video_reader :
108+ vr_frames .append (vr_frame ["data" ])
109+ vr_pts .append (vr_frame ["pts" ])
110+
111+ # same number of frames
112+ assert len (vr_frames ) == len (av_frames )
113+ assert len (vr_pts ) == len (av_pts )
114+
115+ # compare the frames and ptss
116+ for i in range (len (vr_frames )):
117+ assert float (av_pts [i ]) == approx (vr_pts [i ], abs = 0.1 )
118+ max_delta = torch .max (torch .abs (av_frames [i ].float () - vr_frames [i ].float ()))
119+ # we assure that there is never more than 1% difference in signal
120+ assert max_delta .item () < 0.001
90121
91122 def test_metadata (self ):
92123 """
0 commit comments