Fix CPU decode_jpeg error-path leak on malformed JPEGs (setjmp/longjmp) (pytorch#9423)

MPSFuzz · pytorchstu · NicolasHug · NicolasHug · commit c9ffdfec0253 · 2026-03-10T10:51:32.000Z
Co-authored-by: MPSFuzz &lt;2286770808@qq.com&gt;
Co-authored-by: Nicolas Hug &lt;contact@nicolas-hug.com&gt;
Co-authored-by: Nicolas Hug &lt;nh.nicolas.hug@gmail.com&gt;
diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -3,6 +3,8 @@
 #include "common_jpeg.h"
 #include "exif.h"
 
+#include <optional>
+
 namespace vision {
 namespace image {
 
@@ -141,12 +143,23 @@ torch::Tensor decode_jpeg(
   struct jpeg_decompress_struct cinfo;
   struct torch_jpeg_error_mgr jerr;
 
+  // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> tensor;
+  std::optional<torch::Tensor> cmyk_line_tensor;
+
   auto datap = data.data_ptr<uint8_t>();
   // Setup decompression structure
   cinfo.err = jpeg_std_error(&jerr.pub);
   jerr.pub.error_exit = torch_jpeg_error_exit;
   /* Establish the setjmp return context for my_error_exit to use. */
   if (setjmp(jerr.setjmp_buffer)) {
+    // Release any tensors that may have been allocated after setjmp.
+    cmyk_line_tensor.reset();
+    tensor.reset();
+
     /* If we get here, the JPEG code has signaled an error.
      * We need to clean up the JPEG object.
      */
@@ -209,10 +222,10 @@ torch::Tensor decode_jpeg(
   int width = cinfo.output_width;
 
   int stride = width * channels;
-  auto tensor =
+  tensor =
       torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8);
-  auto ptr = tensor.data_ptr<uint8_t>();
-  torch::Tensor cmyk_line_tensor;
+  auto ptr = tensor->data_ptr<uint8_t>();
+
   if (cmyk_to_rgb_or_gray) {
     cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8);
   }
@@ -223,7 +236,7 @@ torch::Tensor decode_jpeg(
      * more than one scanline at a time if that's more convenient.
      */
     if (cmyk_to_rgb_or_gray) {
-      auto cmyk_line_ptr = cmyk_line_tensor.data_ptr<uint8_t>();
+      auto cmyk_line_ptr = cmyk_line_tensor->data_ptr<uint8_t>();
       jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1);
 
       if (channels == 3) {
@@ -239,7 +252,7 @@ torch::Tensor decode_jpeg(
 
   jpeg_finish_decompress(&cinfo);
   jpeg_destroy_decompress(&cinfo);
-  auto output = tensor.permute({2, 0, 1});
+  auto output = tensor->permute({2, 0, 1});
 
   if (apply_exif_orientation) {
     return exif_orientation_transform(output, exif_orientation);
diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp
@@ -3,6 +3,8 @@
 #include "common_png.h"
 #include "exif.h"
 
+#include <optional>
+
 namespace vision {
 namespace image {
 
@@ -45,7 +47,14 @@ torch::Tensor decode_png(
   auto datap = accessor.data();
   auto datap_len = accessor.size(0);
 
+  // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> tensor;
+
   if (setjmp(png_jmpbuf(png_ptr)) != 0) {
+    tensor.reset();
     png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
     TORCH_CHECK(false, "Internal error.");
   }
@@ -196,19 +205,19 @@ torch::Tensor decode_png(
 
   auto num_pixels_per_row = width * channels;
   auto is_16_bits = bit_depth == 16;
-  auto tensor = torch::empty(
+  tensor = torch::empty(
       {int64_t(height), int64_t(width), channels},
       is_16_bits ? at::kUInt16 : torch::kU8);
   if (is_little_endian()) {
     png_set_swap(png_ptr);
   }
-  auto t_ptr = (uint8_t*)tensor.data_ptr();
+  auto t_ptr = (uint8_t*)tensor->data_ptr();
   for (int pass = 0; pass < number_of_passes; pass++) {
     for (png_uint_32 i = 0; i < height; ++i) {
       png_read_row(png_ptr, t_ptr, nullptr);
       t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1);
     }
-    t_ptr = (uint8_t*)tensor.data_ptr();
+    t_ptr = (uint8_t*)tensor->data_ptr();
   }
 
   int exif_orientation = -1;
@@ -218,7 +227,7 @@ torch::Tensor decode_png(
 
   png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
 
-  auto output = tensor.permute({2, 0, 1});
+  auto output = tensor->permute({2, 0, 1});
   if (apply_exif_orientation) {
     return exif_orientation_transform(output, exif_orientation);
   }
diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp
@@ -1,5 +1,6 @@
 #include "encode_jpeg.h"
 
+#include <optional>
 #include "common_jpeg.h"
 
 namespace vision {
@@ -35,6 +36,12 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   JpegSizeType jpegSize = 0;
   uint8_t* jpegBuf = nullptr;
 
+  // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> input;
+
   cinfo.err = jpeg_std_error(&jerr.pub);
   jerr.pub.error_exit = torch_jpeg_error_exit;
 
@@ -43,6 +50,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
     /* If we get here, the JPEG code has signaled an error.
      * We need to clean up the JPEG object and the buffer.
      */
+    input.reset();
     jpeg_destroy_compress(&cinfo);
     if (jpegBuf != nullptr) {
       free(jpegBuf);
@@ -64,7 +72,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   int channels = data.size(0);
   int height = data.size(1);
   int width = data.size(2);
-  auto input = data.permute({1, 2, 0}).contiguous();
+  input = data.permute({1, 2, 0}).contiguous();
 
   TORCH_CHECK(
       channels == 1 || channels == 3,
@@ -90,7 +98,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   jpeg_start_compress(&cinfo, TRUE);
 
   auto stride = width * channels;
-  auto ptr = input.data_ptr<uint8_t>();
+  auto ptr = input->data_ptr<uint8_t>();
 
   // Encode JPEG file
   while (cinfo.next_scanline < cinfo.image_height) {
diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp
@@ -1,5 +1,7 @@
 #include "encode_jpeg.h"
 
+#include <optional>
+
 #include "common_png.h"
 
 namespace vision {
@@ -76,11 +78,19 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   buf_info.buffer = nullptr;
   buf_info.size = 0;
 
+  // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> input;
+
   /* Establish the setjmp return context for my_error_exit to use. */
   if (setjmp(err_ptr.setjmp_buffer)) {
     /* If we get here, the PNG code has signaled an error.
      * We need to clean up the PNG object and the buffer.
      */
+    input.reset();
+
     if (info_ptr != nullptr) {
       png_destroy_info_struct(png_write, &info_ptr);
     }
@@ -114,7 +124,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   int channels = data.size(0);
   int height = data.size(1);
   int width = data.size(2);
-  auto input = data.permute({1, 2, 0}).contiguous();
+  input = data.permute({1, 2, 0}).contiguous();
 
   TORCH_CHECK(
       channels == 1 || channels == 3,
@@ -150,7 +160,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   png_write_info(png_write, info_ptr);
 
   auto stride = width * channels;
-  auto ptr = input.data_ptr<uint8_t>();
+  auto ptr = input->data_ptr<uint8_t>();
 
   // Encode PNG file
   for (int y = 0; y < height; ++y) {