Skip to content

Commit

Permalink
Use our own index to seek more accurately when it is available (#180)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadsharif1 authored Aug 15, 2024
1 parent b0bc30d commit 0a06c3d
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 6 deletions.
12 changes: 12 additions & 0 deletions src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,18 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
int firstActiveStreamIndex = *activeStreamIndices_.begin();
const auto& firstStreamInfo = streams_[firstActiveStreamIndex];
int64_t desiredPts = *maybeDesiredPts_ * firstStreamInfo.timeBase.den;

// For some encodings like H265, FFMPEG sometimes seeks past the point we
// set as the max_ts. So we use our own index to give it the exact pts of
// the key frame that we want to seek to.
// See https://github.com/pytorch/torchcodec/issues/179 for more details.
// See https://trac.ffmpeg.org/ticket/11137 for the underlying ffmpeg bug.
if (!firstStreamInfo.keyFrames.empty()) {
int desiredKeyFrameIndex =
getKeyFrameIndexForPts(firstStreamInfo, desiredPts);
desiredPts = firstStreamInfo.keyFrames[desiredKeyFrameIndex].pts;
}

int ffmepgStatus = avformat_seek_file(
formatContext_.get(),
firstStreamInfo.streamIndex,
Expand Down
7 changes: 2 additions & 5 deletions test/decoders/test_simple_video_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,9 @@ def test_get_frame_displayed_at(self):
assert isinstance(decoder.get_frame_displayed_at(6.02).duration_seconds, float)

def test_get_frame_displayed_at_h265(self):
# Non-regression test for https://github.com/pytorch/torchcodec/issues/179
decoder = SimpleVideoDecoder(H265_VIDEO.path)
# Note that for H265, FFMPEG's seeking is not precise. Even though we ask to
# seek with a max_ts=0.5, FFMPEG will seek beyond that point.
# TODO: Revert use frame5 in the test below once it's fixed upstream:
# https://trac.ffmpeg.org/ticket/11137
ref_frame6 = H265_VIDEO.get_frame_by_name("frame000006")
ref_frame6 = H265_VIDEO.get_frame_by_name("frame000005")
assert_tensor_equal(ref_frame6, decoder.get_frame_displayed_at(0.5).data)

def test_get_frame_displayed_at_fails(self):
Expand Down
2 changes: 1 addition & 1 deletion test/generate_reference_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ffmpeg -y -i "$VIDEO_PATH" -b:a 192K -vn "$VIDEO_PATH.audio.mp3"
# ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz
# ffmpeg -f lavfi -i color=size=128x128:duration=1:rate=10:color=blue -vf "drawtext=fontsize=30:fontcolor=white:x=(w-text_w)/2:y=(h-text_h)/2:text='Frame %{frame_num}'" -vcodec libx265 -pix_fmt yuv420p -g 2 -crf 10 h265_video.mp4 -y
VIDEO_PATH=$RESOURCES_DIR/h265_video.mp4
FRAMES=(6)
FRAMES=(5)
for frame in "${FRAMES[@]}"; do
frame_name=$(printf "%06d" "$frame")
ffmpeg -y -i "$VIDEO_PATH" -vf select="eq(n\,$frame)" -vsync vfr -q:v 2 "$VIDEO_PATH.frame$frame_name.bmp"
Expand Down
Binary file not shown.

0 comments on commit 0a06c3d

Please sign in to comment.