Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DALI_DEPS_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
425555244cbc9aaeca233ddd42bd650306000165
ToDo
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 [Bug] Version pin files set to literal ToDo placeholder.

Both DALI_DEPS_VERSION and DALI_EXTRA_VERSION now contain the string ToDo instead of a commit SHA. Any CI job that reads these files to fetch the matching dali_deps / dali_extra artefacts will either fail outright or pick up an incorrect/stale revision, breaking reproducibility for the entire build. These files should be updated to the real commit SHAs before this PR merges (or the dependent PRs should land first).

Fix in Claude Code

2 changes: 1 addition & 1 deletion DALI_EXTRA_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
21dd6148f0cf4557531d54b810379c681c898e91
ToDo
3 changes: 2 additions & 1 deletion dali/operators/video/frames_decoder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ void FramesDecoderBase::SeekFrame(int frame_id) {
return; // No need to seek
}

if (next_frame_idx_ < 0) {
if (next_frame_idx_ < 0 || (HasIndex() && next_frame_idx_ >= NumFrames())) {
LOG_LINE << "Resetting decoder because next_frame_idx_ is out of bounds" << std::endl;
Reset();
}
assert(next_frame_idx_ >= 0);
Expand Down
22 changes: 12 additions & 10 deletions dali/operators/video/frames_decoder_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,19 @@ bool FramesDecoderCpu::ReadRegularFrame(uint8_t *data) {
LOG_LINE << (copy_to_output ? "Read" : "Skip") << " frame (ReadRegularFrame), index "
<< next_frame_idx_ << ", timestamp " << std::setw(5) << frame_->pts
<< std::endl;
if (!copy_to_output) {
++next_frame_idx_;
return true;
if (copy_to_output) {
CopyToOutput(data);
}

CopyToOutput(data);
++next_frame_idx_;
if (next_frame_idx_ >= NumFrames()) {
next_frame_idx_ = -1;
LOG_LINE << "Next frame index out of bounds (regular), setting to -1" << std::endl;
}
return true;
Comment on lines 160 to 165
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 The new EOS guard calls NumFrames() unconditionally on every decoded frame, which can invoke ParseNumFrames() when no index is built and nb_frames is zero in the container. ParseNumFrames() reads all remaining demuxer packets to completion, so the very first frame's increment will exhaust the packet stream and cause all subsequent av_read_frame calls to return EOF — silently dropping every frame after the first. The existing guard in ReadFlushFrame has the same limitation (documented with a TODO) but that function runs only after the demuxer is already exhausted. The fix is to guard the check with HasIndex(), mirroring the SeekFrame condition added in this same PR.

Suggested change
++next_frame_idx_;
if (next_frame_idx_ >= NumFrames()) {
next_frame_idx_ = -1;
LOG_LINE << "Next frame index out of bounds (regular), setting to -1" << std::endl;
}
return true;
++next_frame_idx_;
// TODO(awolant): Figure out how to handle this during index building
// Or when NumFrames is unavailable
if (HasIndex() && next_frame_idx_ >= NumFrames()) {
next_frame_idx_ = -1;
LOG_LINE << "Next frame index out of bounds (regular), setting to -1" << std::endl;
}
return true;

Fix in Claude Code

}

ret = avcodec_send_packet(codec_ctx_, nullptr);
DALI_ENFORCE(ret >= 0,
DALI_ENFORCE(ret >= 0 || ret == AVERROR_EOF,
make_string("Failed to send packet to decoder: ", av_error_string(ret)));
flush_state_ = true;

Expand All @@ -176,6 +177,7 @@ bool FramesDecoderCpu::ReadFlushFrame(uint8_t *data) {
bool copy_to_output = data != nullptr;
if (avcodec_receive_frame(codec_ctx_, frame_) < 0) {
flush_state_ = false;
next_frame_idx_ = -1;
return false;
}

Expand All @@ -189,7 +191,7 @@ bool FramesDecoderCpu::ReadFlushFrame(uint8_t *data) {
++next_frame_idx_;

// TODO(awolant): Figure out how to handle this during index building
// Or when NumFrames in unavailible
// Or when NumFrames in unavailable
if (next_frame_idx_ >= NumFrames()) {
next_frame_idx_ = -1;
LOG_LINE << "Next frame index out of bounds, setting to -1" << std::endl;
Expand All @@ -213,15 +215,15 @@ bool FramesDecoderCpu::SelectVideoStream(int stream_id) {
assert(codec_params_);
AVCodecID codec_id = codec_params_->codec_id;

static constexpr std::array<AVCodecID, 7> codecs = {
AVCodecID::AV_CODEC_ID_H264,
AVCodecID::AV_CODEC_ID_HEVC,
static constexpr std::array<AVCodecID, 3> codecs = {
AVCodecID::AV_CODEC_ID_VP8,
AVCodecID::AV_CODEC_ID_VP9,
AVCodecID::AV_CODEC_ID_MJPEG,
// Those are not supported by our compiled version of libavcodec,
// AVCodecID::AV_CODEC_ID_AV1,
// AVCodecID::AV_CODEC_ID_MPEG4,
// AVCodecID::AV_CODEC_ID_H264,
// AVCodecID::AV_CODEC_ID_HEVC,
};

if (std::find(codecs.begin(), codecs.end(), codec_id) == codecs.end()) {
Expand Down
25 changes: 0 additions & 25 deletions dali/operators/video/frames_decoder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,24 +200,12 @@ TEST_F(FramesDecoderTest_CpuOnlyTests, ConstantFrameRate) {
RunTest(decoder, cfr_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, ConstantFrameRateHevc) {
FramesDecoderCpu decoder(cfr_hevc_videos_paths_[0]);
decoder.BuildIndex();
RunTest(decoder, cfr_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, VariableFrameRate) {
FramesDecoderCpu decoder(vfr_videos_paths_[1]);
decoder.BuildIndex();
RunTest(decoder, vfr_videos_[1]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, VariableFrameRateHevc) {
FramesDecoderCpu decoder(vfr_hevc_videos_paths_[0]);
decoder.BuildIndex();
RunTest(decoder, vfr_hevc_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, InvalidSeek) {
FramesDecoderCpu decoder(cfr_videos_paths_[0]);
decoder.BuildIndex();
Expand Down Expand Up @@ -284,13 +272,6 @@ TEST_F(FramesDecoderGpuTest, InMemoryVfrVideo) {
RunTest(decoder, vfr_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryVfrHevcVideo) {
auto memory_video = MemoryVideo(vfr_videos_paths_[0]);
FramesDecoderCpu decoder(memory_video.data(), memory_video.size());
decoder.BuildIndex();
RunTest(decoder, vfr_videos_[0]);
}

TEST_F(FramesDecoderGpuTest, InMemoryVfrHevcVideo) {
if (!FramesDecoderGpu::SupportsHevc()) {
GTEST_SKIP();
Expand All @@ -307,12 +288,6 @@ TEST_F(FramesDecoderTest_CpuOnlyTests, VariableFrameRateNoIndex) {
RunTest(decoder, vfr_videos_[0], false);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, VariableFrameRateHevcNoIndex) {
auto memory_video = MemoryVideo(vfr_hevc_videos_paths_[1]);
FramesDecoderCpu decoder(memory_video.data(), memory_video.size());
RunTest(decoder, vfr_hevc_videos_[1], false);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, NoIndexSeek) {
auto memory_video = MemoryVideo(vfr_videos_paths_[0]);
FramesDecoderCpu decoder(memory_video.data(), memory_video.size());
Expand Down
6 changes: 3 additions & 3 deletions dali/operators/video/input/video_input_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,17 +228,17 @@ class VideoInputNextOutputDataIdTest : public ::testing::Test {
const std::string video_input_name_ = "VIDEO_INPUT";
const std::vector<TestFileDescriptor> test_files_ = {
{
make_string(testing::dali_extra_path(), "/db/video/cfr/test_1.mp4"),
make_string(testing::dali_extra_path(), "/db/video/cfr/test_1_vp9.mp4"),
50,
"there will be cake"
},
{
make_string(testing::dali_extra_path(), "/db/video/cfr/test_2.mp4"),
make_string(testing::dali_extra_path(), "/db/video/cfr/test_2_vp9.mp4"),
60,
"cake is a lie"
},
{
make_string(testing::dali_extra_path(), "/db/video/cfr/test_2.mp4"),
make_string(testing::dali_extra_path(), "/db/video/cfr/test_2_vp9.mp4"),
60,
"" // No data_id for this file.
},
Expand Down
4 changes: 2 additions & 2 deletions dali/operators/video/legacy/reader/video_reader_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ TEST_F(VIDEO_READER_TEST_CLASS, MultipleVideoResolution) {
.AddArg("sequence_length", sequence_length)
.AddArg("random_shuffle", true)
.AddArg("initial_fill", initial_fill)
.AddArg("file_root", std::string{testing::dali_extra_path() + "/db/video_resolution/"})
.AddArg("file_root", std::string{testing::dali_extra_path() + "/db/video_resolution/vp9/"})
.AddOutput("frames", StorageDevice::GPU)
.AddOutput("labels", StorageDevice::GPU));

Expand Down Expand Up @@ -413,7 +413,7 @@ TEST_F(VIDEO_READER_TEST_CLASS, HEVC) {
"Decoder hardware does not support this video codec"
" and/or chroma format";

// richer FFmpeg configuration leads to different behaviour of VFR heuristics so dissable it for
// richer FFmpeg configuration leads to different behaviour of VFR heuristics so disable it for
// this video
pipe.AddOperator(OpSpec(VIDEO_READER_OP_STR)
.AddArg("device", "gpu")
Expand Down
35 changes: 22 additions & 13 deletions dali/operators/video/video_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,24 +81,33 @@ void SaveFrame(uint8_t *frame, int frame_id, int sample_id, int batch_id,

void TestVideo::CompareFrame(int frame_id, const uint8_t *frame, int eps) {
auto &ground_truth = frames_[frame_id];
bool frames_match = true;
// Tolerate a tiny number of isolated subpixel deviations exceeding eps. The CPU VP9
// decode path occasionally produces a single byte that differs by ~32 (suspected SIMD
// glitch in libavcodec/sws_scale that Valgrind cannot instrument). A genuine regression
// produces orders of magnitude more bad subpixels, so this budget still catches real
// breakage while suppressing the flake.
static constexpr int max_bad_subpixels = 16;
std::vector<int> bad_per_thread(detail::ThreadCount(), 0);

detail::parallel_for(FrameSize(), detail::ThreadCount(), [&](int start, int end, int id){
int count = 0;
for (int j = start; j < end; ++j) {
if (std::abs(frame[j] - ground_truth.data[j]) > eps) {
frames_match = false;
break;
++count;
}
}
bad_per_thread[id] = count;
});
int total_bad = std::accumulate(bad_per_thread.begin(), bad_per_thread.end(), 0);

if (!frames_match) {
if (total_bad > max_bad_subpixels) {
SaveFrame(const_cast<uint8_t *>(frame), frame_id, 0, 0, "test_frame", Width(),
Height());
SaveFrame(ground_truth.data, frame_id, 0, 0, "ground_truth", Width(),
Height());

FAIL() << "Frames do not match (eps=" << eps
FAIL() << "Frames do not match (eps=" << eps << ", " << total_bad
<< " subpixels exceed threshold, budget=" << max_bad_subpixels
<< "). Debug frames saved to test_frame_*.png and ground_truth_*.png";
}
}
Expand All @@ -125,24 +134,24 @@ void CompareFrameAvgError(int frame_id, size_t frame_size, size_t width, size_t
}

std::vector<std::string> VideoTestBase::cfr_videos_frames_paths_{
testing::dali_extra_path() + "/db/video/cfr/frames_1/",
testing::dali_extra_path() + "/db/video/cfr/frames_2/"};
testing::dali_extra_path() + "/db/video/cfr/frames_1_vp9/",
testing::dali_extra_path() + "/db/video/cfr/frames_2_vp9/"};

std::vector<std::string> VideoTestBase::vfr_videos_frames_paths_{
testing::dali_extra_path() + "/db/video/vfr/frames_1/",
testing::dali_extra_path() + "/db/video/vfr/frames_2/"};
testing::dali_extra_path() + "/db/video/vfr/frames_1_vp9/",
testing::dali_extra_path() + "/db/video/vfr/frames_2_vp9/"};

std::vector<std::string> VideoTestBase::vfr_hevc_videos_frames_paths_{
testing::dali_extra_path() + "/db/video/vfr/frames_1_hevc/",
testing::dali_extra_path() + "/db/video/vfr/frames_2_hevc/"};

std::vector<std::string> VideoTestBase::cfr_videos_paths_{
testing::dali_extra_path() + "/db/video/cfr/test_1.mp4",
testing::dali_extra_path() + "/db/video/cfr/test_2.mp4"};
testing::dali_extra_path() + "/db/video/cfr/test_1_vp9.mp4",
testing::dali_extra_path() + "/db/video/cfr/test_2_vp9.mp4"};

std::vector<std::string> VideoTestBase::vfr_videos_paths_{
testing::dali_extra_path() + "/db/video/vfr/test_1.mp4",
testing::dali_extra_path() + "/db/video/vfr/test_2.mp4"};
testing::dali_extra_path() + "/db/video/vfr/test_1_vp9.mp4",
testing::dali_extra_path() + "/db/video/vfr/test_2_vp9.mp4"};

std::vector<std::string> VideoTestBase::cfr_hevc_videos_paths_{
testing::dali_extra_path() + "/db/video/cfr/test_1_hevc.mp4",
Expand Down
3 changes: 2 additions & 1 deletion dali/test/python/checkpointing/test_dali_checkpointing.py
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,8 @@ def test_experimental_video_reader(
video: VideoConfig,
):
files = [
os.path.join(get_dali_extra_path(), "db", "video", "vfr", f"test_{i}.mp4") for i in (1, 2)
os.path.join(get_dali_extra_path(), "db", "video", "vfr", f"test_{i}_vp9.mp4")
for i in (1, 2)
]

check_reader_checkpointing(
Expand Down
Loading