From 9b07c43ed23aaf2f9a0f48ef8b820a155a043daa Mon Sep 17 00:00:00 2001
From: Chandresh Kanani <kananichandresh@gmail.com>
Date: Mon, 9 Sep 2019 12:53:36 +0530
Subject: [PATCH 1/2] modified code of io.video.read_video to interpret
 start_pts and end_pts in seconds

---
 torchvision/io/video.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/torchvision/io/video.py b/torchvision/io/video.py
index bd25c224ecb..9c9c6700f25 100644
--- a/torchvision/io/video.py
+++ b/torchvision/io/video.py
@@ -2,6 +2,7 @@
 import gc
 import torch
 import numpy as np
+import math
 
 try:
     import av
@@ -184,14 +185,22 @@ def read_video(filename, start_pts=0, end_pts=None):
 
     video_frames = []
     if container.streams.video:
+        video_stream = container.streams.video[0]
+        start_pts = math.floor(start_pts*(1/video_stream.time_base))
+        if end_pts != float("inf"):
+            end_pts = math.ceil(end_pts*(1/video_stream.time_base))
         video_frames = _read_from_stream(container, start_pts, end_pts,
-                                         container.streams.video[0], {'video': 0})
-        info["video_fps"] = float(container.streams.video[0].average_rate)
+                                         video_stream, {'video': 0})
+        info["video_fps"] = float(video_stream.average_rate)
     audio_frames = []
     if container.streams.audio:
+        audio_stream = container.streams.audio[0]
+        start_pts = math.floor(start_pts*(1/audio_stream.time_base))
+        if end_pts != float("inf"):
+            end_pts = math.ceil(end_pts*(1/audio_stream.time_base))
         audio_frames = _read_from_stream(container, start_pts, end_pts,
-                                         container.streams.audio[0], {'audio': 0})
-        info["audio_fps"] = container.streams.audio[0].rate
+                                         audio_stream , {'audio': 0})
+        info["audio_fps"] = audio_stream.rate
 
     container.close()
 

From 85db823f02def0c2ac3a297d233f94019b209ed6 Mon Sep 17 00:00:00 2001
From: Chandresh Kanani <kananichandresh@gmail.com>
Date: Thu, 12 Sep 2019 12:35:35 +0530
Subject: [PATCH 2/2] added pts_unit parameter in read_video and
 read_video_timestamps methods

---
 torchvision/io/video.py | 49 ++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/torchvision/io/video.py b/torchvision/io/video.py
index 9c9c6700f25..7b340cd9766 100644
--- a/torchvision/io/video.py
+++ b/torchvision/io/video.py
@@ -3,6 +3,7 @@
 import torch
 import numpy as np
 import math
+import warnings
 
 try:
     import av
@@ -146,7 +147,7 @@ def _align_audio_frames(aframes, audio_frames, ref_start, ref_end):
     return aframes[:, s_idx:e_idx]
 
 
-def read_video(filename, start_pts=0, end_pts=None):
+def read_video(filename, start_pts=0, end_pts=None, pts_unit='sec'):
     """
     Reads a video from a file, returning both the video frames as well as
     the audio frames
@@ -159,6 +160,8 @@ def read_video(filename, start_pts=0, end_pts=None):
         the start presentation time of the video
     end_pts : int, optional
         the end presentation time
+    pts_unit : str, optional
+        unit in which start_pts and end_pts values will be interpreted, either 'pts' or 'sec. Defaults to 'sec'.
 
     Returns
     -------
@@ -186,19 +189,31 @@ def read_video(filename, start_pts=0, end_pts=None):
     video_frames = []
     if container.streams.video:
         video_stream = container.streams.video[0]
-        start_pts = math.floor(start_pts*(1/video_stream.time_base))
-        if end_pts != float("inf"):
-            end_pts = math.ceil(end_pts*(1/video_stream.time_base))
-        video_frames = _read_from_stream(container, start_pts, end_pts,
+        if pts_unit == 'pts':
+            warnings.warn("The pts_unit 'pts' produces wrong results and will be removed
+                in a follow-up version. Please use pts_unit 'sec'.")
+            video_start_pts = start_pts
+            video_end_pts = end_pts
+        else:
+            video_start_pts = math.floor(start_pts*(1/video_stream.time_base))
+            if video_end_pts != float("inf"):
+                video_end_pts = math.ceil(end_pts*(1/video_stream.time_base))
+        video_frames = _read_from_stream(container, video_start_pts, video_end_pts,
                                          video_stream, {'video': 0})
         info["video_fps"] = float(video_stream.average_rate)
     audio_frames = []
     if container.streams.audio:
         audio_stream = container.streams.audio[0]
-        start_pts = math.floor(start_pts*(1/audio_stream.time_base))
-        if end_pts != float("inf"):
-            end_pts = math.ceil(end_pts*(1/audio_stream.time_base))
-        audio_frames = _read_from_stream(container, start_pts, end_pts,
+        if pts_unit == 'pts':
+            warnings.warn("The pts_unit 'pts' produces wrong results and will be removed
+                in a follow-up version. Please use pts_unit 'sec'.")
+            audio_start_pts = start_pts
+            audio_end_pts = end_pts
+        else:
+            audio_start_pts = math.floor(start_pts*(1/audio_stream.time_base))
+            if end_pts != float("inf"):
+                audio_end_pts = math.ceil(end_pts*(1/audio_stream.time_base))
+        audio_frames = _read_from_stream(container, audio_start_pts, audio_end_pts,
                                          audio_stream , {'audio': 0})
         info["audio_fps"] = audio_stream.rate
 
@@ -226,7 +241,7 @@ def _can_read_timestamps_from_packets(container):
     return False
 
 
-def read_video_timestamps(filename):
+def read_video_timestamps(filename, pts_unit='pts'):
     """
     List the video frames timestamps.
 
@@ -236,6 +251,8 @@ def read_video_timestamps(filename):
     ----------
     filename : str
         path to the video file
+    pts_unit : str, optional
+        unit in which timestamp values will be returned either 'pts' or 'sec. Defaults to 'sec'.
 
     Returns
     -------
@@ -251,12 +268,18 @@ def read_video_timestamps(filename):
     video_frames = []
     video_fps = None
     if container.streams.video:
+        video_stream = container.streams.video[0]
         if _can_read_timestamps_from_packets(container):
             # fast path
             video_frames = [x for x in container.demux(video=0) if x.pts is not None]
         else:
             video_frames = _read_from_stream(container, 0, float("inf"),
-                                             container.streams.video[0], {'video': 0})
-        video_fps = float(container.streams.video[0].average_rate)
+                                             video_stream, {'video': 0})
+        video_fps = float(video_stream.average_rate)
     container.close()
-    return [x.pts for x in video_frames], video_fps
+    if pts_unit == 'pts':
+        warnings.warn("The pts_unit 'pts' produces wrong results and will be removed in a 
+            follow-up version. Please use pts_unit 'sec'.")
+        return [x.pts for x in video_frames], video_fps
+    else:
+        return [x.pts*video_stream.time_base for x in video_frames], video_fps