hjanuschka/jxl-decoder-rewrite.patch

## jxl-decoder-rewrite.patch
diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
index 5cadd5f76b2b7..995f304d6fffa 100644
--- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
+++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
@@ -16,20 +16,21 @@
 namespace blink {

 using jxl_rs::jxl_rs_decoder_create;
+using jxl_rs::jxl_rs_frame_scanner_create;
 using jxl_rs::jxl_rs_signature_check;
 using jxl_rs::JxlRsBasicInfo;
 using jxl_rs::JxlRsDecoder;
 using jxl_rs::JxlRsFrameHeader;
+using jxl_rs::JxlRsFrameScanner;
 using jxl_rs::JxlRsPixelFormat;
 using jxl_rs::JxlRsProcessResult;
 using jxl_rs::JxlRsStatus;
+using jxl_rs::JxlRsVisibleFrameInfo;

 namespace {

-// The maximum number of decoded samples we allow. This helps prevent resource
-// exhaustion from malicious files. The jxl-rs API counts pixels * channels,
-// so an RGBA image counts 4 samples per pixel. JPEG XL codestream level 5
-// limits specify ~268M pixels, so we allow ~1B samples to support that.
+// The maximum number of decoded samples we allow. JPEG XL codestream level 5
+// limits specify ~268M pixels; we allow ~1B samples for RGBA.
 constexpr uint64_t kMaxDecodedPixels = 1024ULL * 1024 * 1024;

 }  // namespace
@@ -72,6 +73,118 @@ bool JXLImageDecoder::MatchesJXLSignature(
       rust::Slice<const uint8_t>(data.data(), data.size()));
 }

+// ---------------------------------------------------------------------------
+// Frame scanning (no pixel decoding)
+// ---------------------------------------------------------------------------
+
+void JXLImageDecoder::ScanFrames() {
+  if (scanner_done_) {
+    return;
+  }
+
+  if (!scanner_.has_value()) {
+    scanner_ = jxl_rs_frame_scanner_create(kMaxDecodedPixels);
+  }
+
+  FastSharedBufferReader reader(data_.get());
+  size_t data_size = reader.size();
+  size_t remaining = data_size - scanner_input_offset_;
+
+  if (remaining == 0 && !IsAllDataReceived()) {
+    return;
+  }
+
+  Vector<uint8_t> chunk_buffer;
+  if (remaining > 0) {
+    chunk_buffer.resize(remaining);
+  }
+  auto data_span = remaining > 0
+                       ? reader.GetConsecutiveData(scanner_input_offset_,
+                                                   remaining,
+                                                   base::span(chunk_buffer))
+                       : base::span<const uint8_t>();
+
+  bool all_input =
+      IsAllDataReceived() && (scanner_input_offset_ + remaining >= data_size);
+  rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+  JxlRsProcessResult result = (*scanner_)->feed(input_slice, all_input);
+
+  if (result.status == JxlRsStatus::Error) {
+    SetFailed();
+    return;
+  }
+
+  scanner_input_offset_ += result.bytes_consumed;
+
+  if (result.status == JxlRsStatus::Success) {
+    scanner_done_ = true;
+  }
+
+  // Extract basic info from scanner if not yet available.
+  if (!have_basic_info_ && (*scanner_)->has_basic_info()) {
+    basic_info_ = (*scanner_)->get_basic_info();
+
+    if (!SetSize(basic_info_.width, basic_info_.height)) {
+      return;
+    }
+
+    if (basic_info_.bits_per_sample > 8) {
+      is_high_bit_depth_ = true;
+    }
+
+    decode_to_half_float_ =
+        ImageIsHighBitDepth() &&
+        high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat;
+
+    if (!IgnoresColorSpace()) {
+      auto icc_data = (*scanner_)->get_icc_profile();
+      if (!icc_data.empty()) {
+        auto profile = ColorProfile::Create(icc_data);
+        if (profile) {
+          SetEmbeddedColorProfile(std::move(profile));
+        }
+      }
+    }
+
+    if (basic_info_.bits_per_sample == 8 && !basic_info_.is_grayscale &&
+        !basic_info_.have_animation && !basic_info_.has_alpha) {
+      static constexpr char kType[] = "Jxl";
+      update_bpp_histogram_callback_ =
+          CrossThreadBindOnce(&UpdateBppHistogram<kType>);
+    }
+
+    have_basic_info_ = true;
+  }
+
+  // Update frame_seek_info_ from the scanner's discovered frames.
+  size_t scanned_count = (*scanner_)->frame_count();
+  base::TimeDelta cumulative_time;
+
+  if (!frame_seek_info_.empty()) {
+    const auto& last = frame_seek_info_.back();
+    cumulative_time = last.timestamp + last.duration;
+  }
+
+  for (size_t i = frame_seek_info_.size(); i < scanned_count; i++) {
+    JxlRsVisibleFrameInfo info = (*scanner_)->get_frame_info(i);
+    FrameSeekInfo seek;
+    seek.duration = base::Milliseconds(info.duration_ms);
+    seek.timestamp = cumulative_time;
+    seek.is_keyframe = info.is_keyframe;
+    seek.decode_start_file_offset = info.decode_start_file_offset;
+    seek.remaining_in_box = info.remaining_in_box;
+    seek.visible_frames_to_skip = info.visible_frames_to_skip;
+
+    cumulative_time += seek.duration;
+    frame_seek_info_.push_back(seek);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// ImageDecoder overrides
+// ---------------------------------------------------------------------------
+
 void JXLImageDecoder::DecodeSize() {
   Decode(0, /*only_size=*/true);
 }
@@ -86,21 +199,12 @@ wtf_size_t JXLImageDecoder::DecodeFrameCount() {
     return 1;
   }

-  // If we have received all the data, we must produce the correct
-  // frame count. Thus, we always decode all the data we have.
-  // TODO(veluca): for long animations, this will currently decode
-  // the entire file, using a large amount of memory and CPU time.
-  // Avoid doing that once jxl-rs supports seeking and/or frame
-  // skipping.
-  while (decoder_state_ != DecoderState::kDone) {
-    size_t offset_pre = input_offset_;
-    size_t decoded_frames_pre = num_decoded_frames_;
-    Decode(num_decoded_frames_, /*only_size=*/false);
-    // Exit the loop if the image is corrupted or we didn't make any progress.
-    if (Failed() || (offset_pre == input_offset_ &&
-                     num_decoded_frames_ == decoded_frames_pre)) {
-      break;
-    }
+  // Use the lightweight scanner to discover frames without decoding pixels.
+  ScanFrames();
+
+  // Resize the frame buffer cache to match discovered frames.
+  if (frame_seek_info_.size() > frame_buffer_cache_.size()) {
+    frame_buffer_cache_.resize(frame_seek_info_.size());
   }

   return frame_buffer_cache_.size();
@@ -120,10 +224,8 @@ void JXLImageDecoder::InitializeNewFrame(wtf_size_t index) {
   buffer.SetOriginalFrameRect(gfx::Rect(Size()));
   buffer.SetRequiredPreviousFrameIndex(kNotFound);

-  // Set duration/timestamp if the frame header has been parsed.
-  // This is available before the frame is fully decoded.
-  if (index < frame_info_.size()) {
-    const FrameInfo& info = frame_info_[index];
+  if (index < frame_seek_info_.size()) {
+    const FrameSeekInfo& info = frame_seek_info_[index];
     buffer.SetDuration(info.duration);
     buffer.SetTimestamp(info.timestamp);
   }
@@ -150,40 +252,36 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
     }
   }

-  FastSharedBufferReader reader(data_.get());
-  size_t data_size = reader.size();
-
-  // Handle animation loop rewind.
-  if (decoder_.has_value() && !only_size && basic_info_.have_animation) {
-    bool frame_already_cached =
-        index < frame_buffer_cache_.size() &&
-        frame_buffer_cache_[index].GetStatus() == ImageFrame::kFrameComplete;
-
-    if (!frame_already_cached && index < num_decoded_frames_) {
-      (*decoder_)->rewind();
-      decoder_state_ = DecoderState::kInitial;
-      num_decoded_frames_ = 0;
-      input_offset_ = 0;
-      // Keep basic_info_ and have_basic_info_ since the stream hasn't changed.
+  // For animation frames that need seeking (not the next sequential frame),
+  // use the seek path.
+  if (!only_size && have_basic_info_ && basic_info_.have_animation &&
+      index != num_decoded_frames_) {
+    // Ensure we have seek info for this frame.
+    if (index >= frame_seek_info_.size()) {
+      ScanFrames();
+      if (Failed() || index >= frame_seek_info_.size()) {
+        return;
+      }
     }
+    SeekAndDecodeFrame(index);
+    return;
   }

-  // Create decoder if needed. Pass premultiply_alpha_ so jxl-rs handles
-  // premultiplication natively (faster and handles alpha_associated correctly).
+  FastSharedBufferReader reader(data_.get());
+  size_t data_size = reader.size();
+
+  // Create decoder if needed.
   if (!decoder_.has_value()) {
     decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_);
   }

   // Process until we get what we need.
   for (;;) {
-    size_t remaining_size = data_size - input_offset_;
-    // When all data is received, process it all at once for efficiency.
-    // Only use smaller chunks for true progressive loading (streaming data).
+    size_t remaining_size = data_size - decoder_input_offset_;
     size_t chunk_size;
     if (IsAllDataReceived()) {
-      chunk_size = remaining_size;  // Process all available data
+      chunk_size = remaining_size;
     } else {
-      // Progressive streaming: use smaller chunks to allow partial rendering
       constexpr size_t kMaxChunkSize = 64 * 1024;
       chunk_size = std::min(remaining_size, kMaxChunkSize);
     }
@@ -192,12 +290,13 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
     Vector<uint8_t> chunk_buffer;
     if (chunk_size > 0) {
       chunk_buffer.resize(chunk_size);
-      data_span = reader.GetConsecutiveData(input_offset_, chunk_size,
+      data_span = reader.GetConsecutiveData(decoder_input_offset_, chunk_size,
                                             base::span(chunk_buffer));
     }

     bool all_input =
-        IsAllDataReceived() && (input_offset_ + chunk_size >= data_size);
+        IsAllDataReceived() &&
+        (decoder_input_offset_ + chunk_size >= data_size);
     rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());

     switch (decoder_state_) {
@@ -210,16 +309,16 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
           return;
         }
         if (result.status == JxlRsStatus::NeedMoreInput) {
-          input_offset_ += result.bytes_consumed;
+          decoder_input_offset_ += result.bytes_consumed;
           if (all_input) {
             SetFailed();
           }
           return;
         }

-        // Success - got basic info
+        // Success - got basic info.
         basic_info_ = (*decoder_)->get_basic_info();
-        input_offset_ += result.bytes_consumed;
+        decoder_input_offset_ += result.bytes_consumed;

         if (!SetSize(basic_info_.width, basic_info_.height)) {
           return;
@@ -233,12 +332,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
             ImageIsHighBitDepth() &&
             high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat;

-        // Set pixel format on decoder.
-        // Use native 8-bit ordering for kN32, and RGBA F16 for half float.
 #if SK_PMCOLOR_BYTE_ORDER(B, G, R, A)
-        constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8;
+        constexpr JxlRsPixelFormat kNativePixelFormat =
+            JxlRsPixelFormat::Bgra8;
 #elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A)
-        constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8;
+        constexpr JxlRsPixelFormat kNativePixelFormat =
+            JxlRsPixelFormat::Rgba8;
 #else
 #error "Unsupported Skia pixel order"
 #endif
@@ -248,7 +347,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
         (*decoder_)->set_pixel_format(pixel_format,
                                       basic_info_.num_extra_channels);

-        // Extract ICC color profile.
         if (!IgnoresColorSpace()) {
           auto icc_data = (*decoder_)->get_icc_profile();
           if (!icc_data.empty()) {
@@ -259,8 +357,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
           }
         }

-        // Record bpp information only for 8-bit, color, still images without
-        // alpha.
         if (!have_basic_info_ && basic_info_.bits_per_sample == 8 &&
             !basic_info_.is_grayscale && !basic_info_.have_animation &&
             !basic_info_.has_alpha) {
@@ -287,31 +383,27 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
           return;
         }
         if (result.status == JxlRsStatus::NeedMoreInput) {
-          input_offset_ += result.bytes_consumed;
+          decoder_input_offset_ += result.bytes_consumed;
           return;
         }

-        input_offset_ += result.bytes_consumed;
+        decoder_input_offset_ += result.bytes_consumed;

-        // Successfully parsed a frame header - increment discovered count.
         JxlRsFrameHeader header = (*decoder_)->get_frame_header();

         if (basic_info_.have_animation) {
           wtf_size_t frame_idx = num_decoded_frames_;
-          FrameInfo info;
-          info.duration = base::Milliseconds(header.duration_ms);
-          info.timestamp = base::TimeDelta();
-
-          if (frame_idx > 0 && frame_idx - 1 < frame_info_.size()) {
-            const FrameInfo& prev = frame_info_[frame_idx - 1];
-            info.timestamp = prev.timestamp + prev.duration;
-          }

-          if (frame_idx < frame_info_.size()) {
-            frame_info_[frame_idx] = info;
-          } else {
-            CHECK_EQ(frame_idx, frame_info_.size());
-            frame_info_.push_back(info);
+          // Update frame_seek_info_ if we don't have it yet from the scanner.
+          if (frame_idx >= frame_seek_info_.size()) {
+            FrameSeekInfo info;
+            info.duration = base::Milliseconds(header.duration_ms);
+            info.timestamp = base::TimeDelta();
+            if (frame_idx > 0 && frame_idx - 1 < frame_seek_info_.size()) {
+              const FrameSeekInfo& prev = frame_seek_info_[frame_idx - 1];
+              info.timestamp = prev.timestamp + prev.duration;
+            }
+            frame_seek_info_.push_back(info);
           }
         }

@@ -322,23 +414,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
       case DecoderState::kHaveFrameHeader: {
         wtf_size_t frame_index = num_decoded_frames_;

-        // Ensure frame buffer cache is large enough.
         if (frame_buffer_cache_.size() <= frame_index) {
           frame_buffer_cache_.resize(frame_index + 1);
         }

         ImageFrame& frame = frame_buffer_cache_[frame_index];
         if (frame.GetStatus() == ImageFrame::kFrameEmpty) {
-          // We call InitializeNewFrame manually here because JXLImageDecoder,
-          // unlike other image decoder classes, handles the frame buffer cache
-          // in the decode loop. This happens because decoding the frame count
-          // also fully renders the frames - when we switch to lightweight
-          // decoding for frame count + decoding individual frames via seeking,
-          // we will likely be able to remove this call.
-          //
-          // IMPORTANT: InitializeNewFrame() must run before InitFrameBuffer(),
-          // so the base class allocates the correct backing store (e.g.
-          // RGBA_F16 for high bit depth + half float).
           InitializeNewFrame(frame_index);
           if (!InitFrameBuffer(frame_index)) {
             SetFailed();
@@ -351,7 +432,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
         const uint32_t width = basic_info_.width;
         const uint32_t height = basic_info_.height;

-        // Get direct access to the frame buffer's backing store.
         const SkBitmap& bitmap = frame.Bitmap();
         uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels());
         size_t row_stride = bitmap.rowBytes();
@@ -361,12 +441,9 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
           return;
         }

-        // Calculate buffer size for the decoder.
         size_t buffer_size = row_stride * height;
         rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size);

-        // Decode directly into the frame buffer.
-        // Premultiplication is handled by jxl-rs based on premultiply_alpha_.
         JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride(
             input_slice, all_input, output_slice, width, height, row_stride);

@@ -375,56 +452,244 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
           return;
         }
         if (result.status == JxlRsStatus::NeedMoreInput) {
-          // Update offset with consumed bytes for progressive decoding.
-          input_offset_ += result.bytes_consumed;
-
-          // Signal that pixels may have changed for progressive rendering.
-          // TODO(veluca): set the frame status to kFramePartial if and only
-          // if jxl-rs signals that some data has been painted (jxl-rs
-          // does not yet expose this functionality, nor does it do
-          // progressive rendering properly).
-          frame.SetStatus(ImageFrame::kFramePartial);
-          frame.SetPixelsChanged(true);
+          decoder_input_offset_ += result.bytes_consumed;
+
+          // Progressive flush: render whatever pixels are available.
+          JxlRsProcessResult flush_result = (*decoder_)->flush_pixels(
+              output_slice, width, height, row_stride);
+          if (flush_result.status == JxlRsStatus::Success) {
+            frame.SetPixelsChanged(true);
+            frame.SetStatus(ImageFrame::kFramePartial);
+          }
+
           if (all_input) {
             SetFailed();
           }
           return;
         }

-        input_offset_ += result.bytes_consumed;
+        decoder_input_offset_ += result.bytes_consumed;
         frame.SetPixelsChanged(true);
         frame.SetStatus(ImageFrame::kFrameComplete);

-        if (frame_index < frame_info_.size()) {
-          const FrameInfo& info = frame_info_[frame_index];
+        if (frame_index < frame_seek_info_.size()) {
+          const FrameSeekInfo& info = frame_seek_info_[frame_index];
           frame.SetDuration(info.duration);
           frame.SetTimestamp(info.timestamp);
         }

         num_decoded_frames_++;

-        // Record bpp histogram for still images when fully decoded.
         if (IsAllDataReceived() && update_bpp_histogram_callback_) {
           std::move(update_bpp_histogram_callback_).Run(Size(), data_->size());
         }

         if ((*decoder_)->has_more_frames()) {
-          // Go back to waiting for next frame header.
           decoder_state_ = DecoderState::kHaveBasicInfo;
         } else {
           decoder_state_ = DecoderState::kDone;
         }

-        // Check if we've decoded the requested frame.
         if (frame_index >= index) {
           return;
         }
         break;
       }
       case DecoderState::kDone:
-        break;
+        return;
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Seek-based animation frame decode
+// ---------------------------------------------------------------------------
+
+void JXLImageDecoder::SeekAndDecodeFrame(wtf_size_t index) {
+  CHECK_LT(index, frame_seek_info_.size());
+  const FrameSeekInfo& seek = frame_seek_info_[index];
+
+  // Create a fresh decoder for seeking. The decoder needs to have basic info
+  // parsed before we can seek.
+  if (!decoder_.has_value()) {
+    decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_);
+    decoder_state_ = DecoderState::kInitial;
+    decoder_input_offset_ = 0;
+  }
+
+  FastSharedBufferReader reader(data_.get());
+  size_t data_size = reader.size();
+  bool all_input = IsAllDataReceived();
+
+  // Ensure decoder has basic info.
+  if (decoder_state_ == DecoderState::kInitial) {
+    size_t remaining = data_size - decoder_input_offset_;
+    Vector<uint8_t> chunk_buffer;
+    if (remaining > 0) {
+      chunk_buffer.resize(remaining);
+    }
+    auto data_span =
+        remaining > 0
+            ? reader.GetConsecutiveData(decoder_input_offset_, remaining,
+                                        base::span(chunk_buffer))
+            : base::span<const uint8_t>();
+    rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+    JxlRsProcessResult result =
+        (*decoder_)->parse_basic_info(input_slice, all_input);
+    if (result.status != JxlRsStatus::Success) {
+      if (result.status == JxlRsStatus::Error || all_input) {
+        SetFailed();
+      }
+      return;
+    }
+    decoder_input_offset_ += result.bytes_consumed;
+
+    // Configure pixel format.
+#if SK_PMCOLOR_BYTE_ORDER(B, G, R, A)
+    constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8;
+#elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A)
+    constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8;
+#else
+#error "Unsupported Skia pixel order"
+#endif
+    JxlRsPixelFormat pixel_format =
+        decode_to_half_float_ ? JxlRsPixelFormat::RgbaF16
+                              : kNativePixelFormat;
+    (*decoder_)->set_pixel_format(pixel_format, basic_info_.num_extra_channels);
+    decoder_state_ = DecoderState::kHaveBasicInfo;
+  }
+
+  // Seek to the frame's decode start position.
+  (*decoder_)->seek_to_frame(seek.remaining_in_box);
+  size_t input_offset = seek.decode_start_file_offset;
+
+  // Skip preceding visible frames if needed.
+  for (size_t i = 0; i < seek.visible_frames_to_skip; i++) {
+    size_t remaining = data_size - input_offset;
+    Vector<uint8_t> chunk_buffer;
+    if (remaining > 0) {
+      chunk_buffer.resize(remaining);
+    }
+    auto data_span =
+        remaining > 0
+            ? reader.GetConsecutiveData(input_offset, remaining,
+                                        base::span(chunk_buffer))
+            : base::span<const uint8_t>();
+    rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+    JxlRsProcessResult result =
+        (*decoder_)->skip_visible_frame(input_slice, all_input);
+    if (result.status != JxlRsStatus::Success) {
+      if (result.status == JxlRsStatus::Error || all_input) {
+        SetFailed();
+      }
+      return;
+    }
+    input_offset += result.bytes_consumed;
+  }
+
+  // Ensure frame buffer cache is large enough.
+  if (frame_buffer_cache_.size() <= index) {
+    frame_buffer_cache_.resize(index + 1);
+  }
+
+  ImageFrame& frame = frame_buffer_cache_[index];
+  if (frame.GetStatus() == ImageFrame::kFrameEmpty) {
+    InitializeNewFrame(index);
+    if (!InitFrameBuffer(index)) {
+      SetFailed();
+      return;
+    }
+  }
+
+  frame.SetHasAlpha(basic_info_.has_alpha);
+
+  const uint32_t width = basic_info_.width;
+  const uint32_t height = basic_info_.height;
+
+  const SkBitmap& bitmap = frame.Bitmap();
+  uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels());
+  size_t row_stride = bitmap.rowBytes();
+
+  if (!frame_pixels) {
+    SetFailed();
+    return;
+  }
+
+  size_t buffer_size = row_stride * height;
+  rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size);
+
+  // Parse frame header.
+  {
+    size_t remaining = data_size - input_offset;
+    Vector<uint8_t> chunk_buffer;
+    if (remaining > 0) {
+      chunk_buffer.resize(remaining);
+    }
+    auto data_span =
+        remaining > 0
+            ? reader.GetConsecutiveData(input_offset, remaining,
+                                        base::span(chunk_buffer))
+            : base::span<const uint8_t>();
+    rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+    JxlRsProcessResult result =
+        (*decoder_)->parse_frame_header(input_slice, all_input);
+    if (result.status != JxlRsStatus::Success) {
+      if (result.status == JxlRsStatus::Error || all_input) {
+        SetFailed();
+      }
+      return;
+    }
+    input_offset += result.bytes_consumed;
+  }
+
+  // Decode pixels.
+  {
+    size_t remaining = data_size - input_offset;
+    Vector<uint8_t> chunk_buffer;
+    if (remaining > 0) {
+      chunk_buffer.resize(remaining);
     }
+    auto data_span =
+        remaining > 0
+            ? reader.GetConsecutiveData(input_offset, remaining,
+                                        base::span(chunk_buffer))
+            : base::span<const uint8_t>();
+    rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+    JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride(
+        input_slice, all_input, output_slice, width, height, row_stride);
+
+    if (result.status == JxlRsStatus::Error) {
+      SetFailed();
+      return;
+    }
+    if (result.status == JxlRsStatus::NeedMoreInput) {
+      JxlRsProcessResult flush_result =
+          (*decoder_)->flush_pixels(output_slice, width, height, row_stride);
+      if (flush_result.status == JxlRsStatus::Success) {
+        frame.SetPixelsChanged(true);
+        frame.SetStatus(ImageFrame::kFramePartial);
+      }
+      if (all_input) {
+        SetFailed();
+      }
+      return;
+    }
+    input_offset += result.bytes_consumed;
   }
+
+  frame.SetPixelsChanged(true);
+  frame.SetStatus(ImageFrame::kFrameComplete);
+  frame.SetDuration(seek.duration);
+  frame.SetTimestamp(seek.timestamp);
+
+  // After seeking, the decoder is in an indeterminate state for sequential
+  // decode. Mark it so the next sequential decode will either continue
+  // from the right place or seek again.
+  decoder_state_ = DecoderState::kHaveBasicInfo;
 }

 bool JXLImageDecoder::CanReusePreviousFrameBuffer(
@@ -441,20 +706,15 @@ bool JXLImageDecoder::FrameIsReceivedAtIndex(wtf_size_t index) const {

 std::optional<base::TimeDelta> JXLImageDecoder::FrameTimestampAtIndex(
     wtf_size_t index) const {
-  // Use frame_info_ which is populated at header parsing time,
-  // not frame_buffer_cache_ which is only set after decoding.
-  if (index < frame_info_.size()) {
-    return frame_info_[index].timestamp;
+  if (index < frame_seek_info_.size()) {
+    return frame_seek_info_[index].timestamp;
   }
   return std::nullopt;
 }

 base::TimeDelta JXLImageDecoder::FrameDurationAtIndex(wtf_size_t index) const {
-  // Durations are available in frame_info_ for all discovered frames.
-  // Frame discovery happens in DecodeFrameCount() which is called by
-  // FrameCount() whenever new data arrives.
-  if (index < frame_info_.size()) {
-    return frame_info_[index].duration;
+  if (index < frame_seek_info_.size()) {
+    return frame_seek_info_[index].duration;
   }
   return base::TimeDelta();
 }
@@ -472,13 +732,8 @@ int JXLImageDecoder::RepetitionCount() const {

 wtf_size_t JXLImageDecoder::ClearCacheExceptFrame(
     wtf_size_t clear_except_frame) {
-  if (basic_info_.have_animation) {
-    // TODO(veluca): jxl-rs does not (yet) support seeking to specific frames.
-    // For now, deal with this by disallowing clearing the cache.
-
-    return 0;
-  }
-
+  // With frame seeking support, we can clear cached frames and re-decode
+  // them on demand by seeking to the appropriate offset.
   return ImageDecoder::ClearCacheExceptFrame(clear_except_frame);
 }

diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
index 1a3f502fdba83..b09bfda992e2f 100644
--- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
+++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
@@ -45,10 +45,11 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
   static bool MatchesJXLSignature(const FastSharedBufferReader& fast_reader);

  private:
-  // C++-managed Rust Box for JxlRsDecoder.
+  // C++-managed Rust Box types.
   using JxlRsDecoderPtr = rust::Box<jxl_rs::JxlRsDecoder>;
+  using JxlRsScannerPtr = rust::Box<jxl_rs::JxlRsFrameScanner>;

-  // Decoder state machine.
+  // Decoder state machine for the pixel decoder.
   enum class DecoderState {
     kInitial,          // Waiting for basic info
     kHaveBasicInfo,    // Have basic info, waiting for frame header
@@ -56,10 +57,14 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
     kDone              // Decoding is done
   };

-  // Frame information tracked during decoding.
-  struct FrameInfo {
+  // Seek info for a visible frame, cached from the scanner.
+  struct FrameSeekInfo {
     base::TimeDelta duration;
     base::TimeDelta timestamp;
+    bool is_keyframe = false;
+    size_t decode_start_file_offset = 0;
+    uint64_t remaining_in_box = 0;
+    size_t visible_frames_to_skip = 0;
   };

   // ImageDecoder:
@@ -72,29 +77,39 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
   // Internal decode function that optionally stops after metadata.
   void Decode(wtf_size_t index, bool only_size);

-  // Eagerly decode all animation frames upfront.
-  void DecodeAllFrames();
+  // Run the frame scanner to discover frame metadata without decoding pixels.
+  void ScanFrames();
+
+  // Seek the pixel decoder to the target frame and decode it.
+  void SeekAndDecodeFrame(wtf_size_t index);

   // Converts JXL pixel format to Skia color type.
   SkColorType GetSkColorType() const;

-  // Decoder state.
+  // Lightweight frame scanner -- discovers frame count, durations, and seek
+  // offsets without decoding any pixels.
+  std::optional<JxlRsScannerPtr> scanner_;
+  size_t scanner_input_offset_ = 0;
+  bool scanner_done_ = false;
+
+  // Full pixel decoder with state machine.
   std::optional<JxlRsDecoderPtr> decoder_;
   DecoderState decoder_state_ = DecoderState::kInitial;
+  size_t decoder_input_offset_ = 0;
+  wtf_size_t num_decoded_frames_ = 0;
+
+  // Cached metadata.
   jxl_rs::JxlRsBasicInfo basic_info_{};
   bool have_basic_info_ = false;
-  wtf_size_t num_decoded_frames_ = 0;     // Frames whose pixels we've decoded.
-  size_t input_offset_ = 0;  // Current position in input stream.

-  // Animation frame tracking.
-  Vector<FrameInfo> frame_info_;
+  // Per-frame seek info populated by the scanner.
+  Vector<FrameSeekInfo> frame_seek_info_;

   // Color management.
   bool is_high_bit_depth_ = false;
   bool decode_to_half_float_ = false;

-  // Used to call UpdateBppHistogram<"Jxl">() at most once to record the
-  // bits-per-pixel value of the image when the image is successfully decoded.
+  // Used to call UpdateBppHistogram<"Jxl">() at most once.
   CrossThreadOnceFunction<void(gfx::Size, size_t)>
       update_bpp_histogram_callback_;
 };
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
index 2c3ce9855618b..ba55581559ced 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
@@ -937,7 +937,29 @@ impl JxlColorEncoding {
             }
         } else {
             match self {
-                JxlColorEncoding::XYB { .. } => todo!("implement A2B0 and B2A0 tags"),
+                JxlColorEncoding::XYB { .. } => {
+                    // Create A2B0 tag for XYB color space
+                    let a2b0_start = tags_data.len() as u32;
+                    create_icc_lut_atob_tag_for_xyb(&mut tags_data)?;
+                    pad_to_4_byte_boundary(&mut tags_data);
+                    let a2b0_size = (tags_data.len() as u32) - a2b0_start;
+                    collected_tags.push(TagInfo {
+                        signature: *b"A2B0",
+                        offset_in_tags_blob: a2b0_start,
+                        size_unpadded: a2b0_size,
+                    });
+
+                    // Create B2A0 tag (no-op, required by Apple software)
+                    let b2a0_start = tags_data.len() as u32;
+                    create_icc_noop_btoa_tag(&mut tags_data)?;
+                    pad_to_4_byte_boundary(&mut tags_data);
+                    let b2a0_size = (tags_data.len() as u32) - b2a0_start;
+                    collected_tags.push(TagInfo {
+                        signature: *b"B2A0",
+                        offset_in_tags_blob: b2a0_start,
+                        size_unpadded: b2a0_size,
+                    });
+                }
                 JxlColorEncoding::RgbColorSpace {
                     transfer_function, ..
                 }
@@ -2047,6 +2069,108 @@ fn tone_map_pixel(
     ])
 }

+/// Create mAB A2B0 tag for XYB color space.
+fn create_icc_lut_atob_tag_for_xyb(tags: &mut Vec<u8>) -> Result<(), Error> {
+    use super::xyb_constants::*;
+    use byteorder::{BigEndian, WriteBytesExt};
+
+    // Tag signature: 'mAB '
+    tags.extend_from_slice(b"mAB ");
+    // 4 reserved bytes set to 0
+    tags.write_u32::<BigEndian>(0)
+        .map_err(|_| Error::InvalidIccStream)?;
+    // Number of input channels
+    tags.push(3);
+    // Number of output channels
+    tags.push(3);
+    // 2 reserved bytes for padding
+    tags.write_u16::<BigEndian>(0)
+        .map_err(|_| Error::InvalidIccStream)?;
+
+    // Offsets (calculated based on structure size)
+    // offset to first B curve: 32
+    tags.write_u32::<BigEndian>(32)
+        .map_err(|_| Error::InvalidIccStream)?;
+    // offset to matrix: 244
+    tags.write_u32::<BigEndian>(244)
+        .map_err(|_| Error::InvalidIccStream)?;
+    // offset to first M curve: 148
+    tags.write_u32::<BigEndian>(148)
+        .map_err(|_| Error::InvalidIccStream)?;
+    // offset to CLUT: 80
+    tags.write_u32::<BigEndian>(80)
+        .map_err(|_| Error::InvalidIccStream)?;
+    // offset to first A curve (reuse linear B curves): 32
+    tags.write_u32::<BigEndian>(32)
+        .map_err(|_| Error::InvalidIccStream)?;
+
+    // offset = 32: B curves (3 identity/linear curves)
+    // Each curve is 12 bytes: 'para' (4) + reserved (4) + function type (2) + reserved (2)
+    // For type 0: Y = X^gamma, with gamma = 1.0 (identity)
+    for _ in 0..3 {
+        create_icc_curv_para_tag(tags, &[1.0], 0)?;
+    }
+
+    // offset = 80: CLUT
+    // 16 bytes for grid points (only first 3 used, rest 0)
+    for i in 0..16 {
+        tags.push(if i < 3 { 2 } else { 0 });
+    }
+    // precision = 2 (16-bit)
+    tags.push(2);
+    // 3 bytes padding
+    tags.push(0);
+    tags.write_u16::<BigEndian>(0)
+        .map_err(|_| Error::InvalidIccStream)?;
+
+    // 2x2x2x3 entries of 2 bytes each = 48 bytes
+    let cube = unscaled_a2b_cube_full();
+    for row_x in &cube {
+        for row_y in row_x {
+            for out_f in row_y {
+                for &val_f in out_f {
+                    let val = (65535.0 * val_f).round().clamp(0.0, 65535.0) as u16;
+                    tags.write_u16::<BigEndian>(val)
+                        .map_err(|_| Error::InvalidIccStream)?;
+                }
+            }
+        }
+    }
+
+    // offset = 148: M curves (3 parametric curves)
+    // Type 3 parametric curve: Y = (aX + b)^gamma + c for X >= d, else Y = cX
+    // Each curve: 12 + 5*4 = 32 bytes
+    let scale = xyb_scale();
+    for i in 0..3 {
+        let b = -XYB_OFFSET[i] - NEG_OPSIN_ABSORBANCE_BIAS_RGB[i].cbrt();
+        let params = [
+            3.0,                      // gamma
+            1.0 / scale[i],           // a
+            b,                        // b
+            0.0,                      // c (unused)
+            (-b * scale[i]).max(0.0), // d (make skcms happy)
+        ];
+        create_icc_curv_para_tag(tags, &params, 3)?;
+    }
+
+    // offset = 244: Matrix (12 values as s15Fixed16)
+    // 9 matrix values + 3 intercepts = 12 * 4 = 48 bytes
+    for v in XYB_ICC_MATRIX {
+        append_s15_fixed_16(tags, v as f32)?;
+    }
+
+    // Intercepts
+    for i in 0..3 {
+        let mut intercept: f64 = 0.0;
+        for j in 0..3 {
+            intercept += XYB_ICC_MATRIX[i * 3 + j] * (NEG_OPSIN_ABSORBANCE_BIAS_RGB[j] as f64);
+        }
+        append_s15_fixed_16(tags, intercept as f32)?;
+    }
+
+    Ok(())
+}
+
 /// Create mft1 (8-bit LUT) A2B0 tag for HDR tone mapping.
 fn create_icc_lut_atob_tag_for_hdr(
     transfer_function: &JxlTransferFunction,
@@ -2642,4 +2766,17 @@ mod test {
         assert!(!rgb.same_color_encoding(&gray));
         assert!(!gray.same_color_encoding(&rgb));
     }
+
+    /// Verify XYB color profiles generate valid ICC profiles with A2B0/B2A0 tags.
+    #[test]
+    fn test_xyb_icc_profile_generation() {
+        let xyb = JxlColorProfile::Simple(JxlColorEncoding::XYB {
+            rendering_intent: RenderingIntent::Perceptual,
+        });
+
+        let icc = xyb.try_as_icc().expect("XYB should generate ICC profile");
+        assert!(!icc.is_empty());
+        assert!(icc.windows(4).any(|w| w == b"mAB "));
+        assert!(icc.windows(4).any(|w| w == b"mBA "));
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
index 5debb0c1df1d6..966033c5a5d50 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
@@ -40,6 +40,13 @@ impl JxlColorType {
             Self::Rgba | Self::Bgra => false,
         }
     }
+    pub fn add_alpha(&self) -> Self {
+        match self {
+            Self::Grayscale | Self::GrayscaleAlpha => Self::GrayscaleAlpha,
+            Self::Rgb | Self::Rgba => Self::Rgba,
+            Self::Bgr | Self::Bgra => Self::Bgra,
+        }
+    }
 }

 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
index d74b0ffa18e06..848adce67153f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
@@ -9,7 +9,7 @@ use super::{
 };
 #[cfg(test)]
 use crate::frame::Frame;
-use crate::{api::JxlFrameHeader, error::Result};
+use crate::{api::JxlFrameHeader, container::frame_index::FrameIndexBox, error::Result};
 use states::*;
 use std::marker::PhantomData;

@@ -35,6 +35,42 @@ pub struct JxlDecoder<State: JxlState> {
 #[cfg(test)]
 pub type FrameCallback = dyn FnMut(&Frame, usize) -> Result<()>;

+/// Information about a single visible frame discovered while decoding.
+#[derive(Debug, Clone, PartialEq)]
+pub struct VisibleFrameInfo {
+    /// Zero-based index among visible frames.
+    pub index: usize,
+    /// Duration in milliseconds (0 for still images or the last frame).
+    pub duration_ms: f64,
+    /// Duration in raw ticks from the animation header.
+    pub duration_ticks: u32,
+    /// Byte offset of this frame's header in the input file.
+    pub(crate) file_offset: usize,
+    /// Whether this is the last frame in the codestream.
+    pub is_last: bool,
+    /// Whether this frame is a seek-keyframe for visible-frame playback.
+    ///
+    /// This is equivalent to `seek_target.visible_frames_to_skip == 0`.
+    pub is_keyframe: bool,
+    /// Precomputed seek inputs for this visible frame.
+    pub seek_target: VisibleFrameSeekTarget,
+    /// Frame name, if any.
+    pub name: String,
+}
+
+/// Computed seek inputs for a target visible frame.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct VisibleFrameSeekTarget {
+    /// File byte offset to start feeding input from.
+    pub decode_start_file_offset: usize,
+    /// Remaining codestream bytes in the current container box at the seek
+    /// point. Pass this to [`JxlDecoder::start_new_frame`].
+    pub remaining_in_box: u64,
+    /// Number of visible frames to skip after seek-start before decoding the
+    /// requested target frame.
+    pub visible_frames_to_skip: usize,
+}
+
 impl<S: JxlState> JxlDecoder<S> {
     fn wrap_inner(inner: Box<JxlDecoderInner>) -> Self {
         Self {
@@ -54,6 +90,26 @@ impl<S: JxlState> JxlDecoder<S> {
         self.inner.decoded_frames()
     }

+    /// Returns the parsed frame index box, if the file contained one.
+    ///
+    /// The frame index box (`jxli`) is an optional part of the JXL container
+    /// format that provides a seek table for animated files, listing keyframe
+    /// byte offsets, timestamps, and frame counts.
+    ///
+    /// TODO(veluca): Provide a higher-level frame-index API aligned with
+    /// `scanned_frames()` / `VisibleFrameInfo` seek metadata.
+    pub fn frame_index(&self) -> Option<&FrameIndexBox> {
+        self.inner.frame_index()
+    }
+
+    /// Returns visible frame info entries collected so far.
+    ///
+    /// When `JxlDecoderOptions::scan_frames_only` is enabled this is the
+    /// primary output of decoding.
+    pub fn scanned_frames(&self) -> &[VisibleFrameInfo] {
+        self.inner.scanned_frames()
+    }
+
     /// Rewinds a decoder to the start of the file, allowing past frames to be displayed again.
     pub fn rewind(mut self) -> JxlDecoder<Initialized> {
         self.inner.rewind();
@@ -93,8 +149,6 @@ impl JxlDecoder<Initialized> {
 }

 impl JxlDecoder<WithImageInfo> {
-    // TODO(veluca): once frame skipping is implemented properly, expose that in the API.
-
     /// Obtains the image's basic information.
     pub fn basic_info(&self) -> &JxlBasicInfo {
         self.inner.basic_info().unwrap()
@@ -116,10 +170,15 @@ impl JxlDecoder<WithImageInfo> {
         self.inner.set_output_color_profile(profile)
     }

+    /// Retrieves the current pixel format for output buffers.
     pub fn current_pixel_format(&self) -> &JxlPixelFormat {
         self.inner.current_pixel_format().unwrap()
     }

+    /// Specifies pixel format for output buffers.
+    ///
+    /// Setting this may also change output color profile in some cases, if the profile was not set
+    /// manually before.
     pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) {
         self.inner.set_pixel_format(pixel_format);
     }
@@ -132,10 +191,54 @@ impl JxlDecoder<WithImageInfo> {
         Ok(self.map_inner_processing_result(inner_result))
     }

+    /// Draws all the pixels we have data for. This is useful for i.e. previewing LF frames.
+    ///
+    /// Note: see `process` for alignment requirements for the buffer data.
+    pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer<'_>]) -> Result<()> {
+        self.inner.flush_pixels(buffers)
+    }
+
     pub fn has_more_frames(&self) -> bool {
         self.inner.has_more_frames()
     }

+    /// Resets frame-level decoder state to prepare for decoding a new frame.
+    ///
+    /// This clears intermediate buffers (frame header, TOC, section data) while
+    /// preserving image-level state (file header, color profiles, pixel format,
+    /// reference frames). The box parser is restored to the correct
+    /// mid-codestream state using `remaining_in_box`, so the next `process()`
+    /// call correctly parses a new frame header from the input.
+    ///
+    /// # Arguments
+    ///
+    /// * `seek_target` -- from `VisibleFrameInfo::seek_target`.
+    ///   Includes both the box-parser state (`remaining_in_box`) and the input
+    ///   resume offset (`decode_start_file_offset`).
+    ///
+    /// After calling this, provide raw file input starting from
+    /// `seek_target.decode_start_file_offset`.
+    ///
+    /// # Example
+    ///
+    /// ```rust,ignore
+    /// // 1. Scan frame info using the regular decoder API.
+    /// let options = JxlDecoderOptions {
+    ///     scan_frames_only: true,
+    ///     ..Default::default()
+    /// };
+    /// let decoder = JxlDecoder::<states::Initialized>::new(options);
+    /// // ...advance decoder and call `scanned_frames()`...
+    ///
+    /// // 2. Seek to frame N (bare codestream).
+    /// let target = &frames[n];
+    /// decoder.start_new_frame(target.seek_target);
+    /// // 3. Provide input from target.seek_target.decode_start_file_offset and process().
+    /// ```
+    pub fn start_new_frame(&mut self, seek_target: VisibleFrameSeekTarget) {
+        self.inner.start_new_frame(seek_target.remaining_in_box);
+    }
+
     #[cfg(test)]
     pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) {
         self.inner.set_use_simple_pipeline(u);
@@ -143,7 +246,17 @@ impl JxlDecoder<WithImageInfo> {
 }

 impl JxlDecoder<WithFrameInfo> {
-    /// Skip the current frame.
+    /// Skip the current frame without decoding pixels.
+    ///
+    /// This reads section data from the input to advance past the frame, but
+    /// does not render pixels. Reference frames that may be needed by later
+    /// frames are still decoded internally.
+    ///
+    /// For efficient frame seeking in animations, enable
+    /// `JxlDecoderOptions::scan_frames_only` and use
+    /// [`scanned_frames`](JxlDecoder::scanned_frames), then
+    /// [`start_new_frame`](JxlDecoder::start_new_frame) to jump directly to a
+    /// target frame.
     pub fn skip_frame(
         mut self,
         input: &mut impl JxlBitstreamInput,
@@ -191,7 +304,6 @@ pub(crate) mod tests {
     use crate::api::{JxlDataFormat, JxlDecoderOptions};
     use crate::error::Error;
     use crate::image::{Image, Rect};
-    use crate::util::test::assert_almost_abs_eq_coords;
     use jxl_macros::for_each_test_file;
     use std::path::Path;

@@ -202,6 +314,7 @@ pub(crate) mod tests {
                 &std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap(),
                 u.arbitrary::<u8>().unwrap() as usize + 1,
                 false,
+                false,
                 None,
             )
             .unwrap();
@@ -214,6 +327,7 @@ pub(crate) mod tests {
         mut input: &[u8],
         chunk_size: usize,
         use_simple_pipeline: bool,
+        do_flush: bool,
         callback: Option<Box<dyn FnMut(&Frame, usize) -> Result<(), Error>>>,
     ) -> Result<(usize, Vec<Vec<Image<f32>>>), Error> {
         let options = JxlDecoderOptions::default();
@@ -226,7 +340,7 @@ pub(crate) mod tests {
         let mut chunk_input = &input[0..0];

         macro_rules! advance_decoder {
-            ($decoder: ident $(, $extra_arg: expr)?) => {
+            ($decoder: ident $(, $extra_arg: expr)? $(; $flush_arg: expr)?) => {
                 loop {
                     chunk_input =
                         &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
@@ -236,6 +350,12 @@ pub(crate) mod tests {
                     match process_result.unwrap() {
                         ProcessingResult::Complete { result } => break result,
                         ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                            $(
+                                let mut fallback = fallback;
+                                if do_flush && !input.is_empty() {
+                                    fallback.flush_pixels($flush_arg)?;
+                                }
+                            )?
                             if input.is_empty() {
                                 panic!("Unexpected end of input");
                             }
@@ -281,9 +401,6 @@ pub(crate) mod tests {
         let mut frames = vec![];

         loop {
-            // Process until we have frame info
-            let mut decoder_with_frame_info = advance_decoder!(decoder_with_image_info);
-
             // First channel is interleaved.
             let mut buffers = vec![Image::new_with_value(
                 (buffer_width * num_channels, buffer_height),
@@ -313,7 +430,11 @@ pub(crate) mod tests {
                 })
                 .collect();

-            decoder_with_image_info = advance_decoder!(decoder_with_frame_info, &mut api_buffers);
+            // Process until we have frame info
+            let mut decoder_with_frame_info =
+                advance_decoder!(decoder_with_image_info; &mut api_buffers);
+            decoder_with_image_info =
+                advance_decoder!(decoder_with_frame_info, &mut api_buffers; &mut api_buffers);

             // All pixels should have been overwritten, so they should no longer be NaNs.
             for buf in buffers.iter() {
@@ -341,76 +462,108 @@ pub(crate) mod tests {
     }

     fn decode_test_file(path: &Path) -> Result<(), Error> {
-        decode(&std::fs::read(path)?, usize::MAX, false, None)?;
+        decode(&std::fs::read(path)?, usize::MAX, false, false, None)?;
         Ok(())
     }

     for_each_test_file!(decode_test_file);

     fn decode_test_file_chunks(path: &Path) -> Result<(), Error> {
-        decode(&std::fs::read(path)?, 1, false, None)?;
+        decode(&std::fs::read(path)?, 1, false, false, None)?;
         Ok(())
     }

     for_each_test_file!(decode_test_file_chunks);

+    fn compare_frames(
+        path: &Path,
+        fc: usize,
+        f: &[Image<f32>],
+        sf: &[Image<f32>],
+    ) -> Result<(), Error> {
+        assert_eq!(
+            f.len(),
+            sf.len(),
+            "Frame {fc} has different channels counts",
+        );
+        for (c, (b, sb)) in f.iter().zip(sf.iter()).enumerate() {
+            assert_eq!(
+                b.size(),
+                sb.size(),
+                "Channel {c} in frame {fc} has different sizes",
+            );
+            let sz = b.size();
+            if false {
+                let f = std::fs::File::create(Path::new("/tmp/").join(format!(
+                    "{}_diff_chan{c}.pbm",
+                    path.as_os_str().to_string_lossy().replace("/", "_")
+                )))?;
+                use std::io::Write;
+                let mut f = std::io::BufWriter::new(f);
+                writeln!(f, "P1\n{} {}", sz.0, sz.1)?;
+                for y in 0..sz.1 {
+                    for x in 0..sz.0 {
+                        if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 {
+                            write!(f, "1")?;
+                        } else {
+                            write!(f, "0")?;
+                        }
+                    }
+                }
+                drop(f);
+            }
+            for y in 0..sz.1 {
+                for x in 0..sz.0 {
+                    assert_eq!(
+                        b.row(y)[x],
+                        sb.row(y)[x],
+                        "Pixels differ at position ({x}, {y}), channel {c}"
+                    );
+                }
+            }
+        }
+        Ok(())
+    }
+
     fn compare_pipelines(path: &Path) -> Result<(), Error> {
         let file = std::fs::read(path)?;
-        let simple_frames = decode(&file, usize::MAX, true, None)?.1;
-        let frames = decode(&file, usize::MAX, false, None)?.1;
+        let simple_frames = decode(&file, usize::MAX, true, false, None)?.1;
+        let frames = decode(&file, usize::MAX, false, false, None)?.1;
         assert_eq!(frames.len(), simple_frames.len());
         for (fc, (f, sf)) in frames
             .into_iter()
             .zip(simple_frames.into_iter())
             .enumerate()
         {
-            assert_eq!(
-                f.len(),
-                sf.len(),
-                "Frame {fc} has different channels counts",
-            );
-            for (c, (b, sb)) in f.into_iter().zip(sf.into_iter()).enumerate() {
-                assert_eq!(
-                    b.size(),
-                    sb.size(),
-                    "Channel {c} in frame {fc} has different sizes",
-                );
-                // TODO(veluca): This check actually succeeds if we disable SIMD.
-                // With SIMD, the exact output of computations in epf.rs appear to depend on the
-                // lane that the computation was done in (???). We should investigate this.
-                // b.as_rect().check_equal(sb.as_rect());
-                let sz = b.size();
-                if false {
-                    let f = std::fs::File::create(Path::new("/tmp/").join(format!(
-                        "{}_diff_chan{c}.pbm",
-                        path.as_os_str().to_string_lossy().replace("/", "_")
-                    )))?;
-                    use std::io::Write;
-                    let mut f = std::io::BufWriter::new(f);
-                    writeln!(f, "P1\n{} {}", sz.0, sz.1)?;
-                    for y in 0..sz.1 {
-                        for x in 0..sz.0 {
-                            if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 {
-                                write!(f, "1")?;
-                            } else {
-                                write!(f, "0")?;
-                            }
-                        }
-                    }
-                    drop(f);
-                }
-                for y in 0..sz.1 {
-                    for x in 0..sz.0 {
-                        assert_almost_abs_eq_coords(b.row(y)[x], sb.row(y)[x], 1e-5, (x, y), c);
-                    }
-                }
-            }
+            compare_frames(path, fc, &f, &sf)?;
         }
         Ok(())
     }

     for_each_test_file!(compare_pipelines);

+    fn compare_incremental(path: &Path) -> Result<(), Error> {
+        let file = std::fs::read(path).unwrap();
+        // One-shot decode
+        let (_, one_shot_frames) = decode(&file, usize::MAX, false, false, None)?;
+        // Incremental decode with arbitrary flushes.
+        let (_, frames) = decode(&file, 123, false, true, None)?;
+
+        // Compare one_shot_frames and frames
+        assert_eq!(one_shot_frames.len(), frames.len());
+        for (fc, (f, sf)) in frames
+            .into_iter()
+            .zip(one_shot_frames.into_iter())
+            .enumerate()
+        {
+            compare_frames(path, fc, &f, &sf)?;
+        }
+
+        Ok(())
+    }
+
+    for_each_test_file!(compare_incremental);
+
     #[test]
     fn test_preview_size_none_for_regular_files() {
         let file = std::fs::read("resources/test/basic.jxl").unwrap();
@@ -539,6 +692,55 @@ pub(crate) mod tests {
         assert!(result.is_err());
     }

+    #[test]
+    fn test_default_output_tf_by_pixel_format() {
+        use crate::api::{JxlColorEncoding, JxlTransferFunction};
+
+        // Using test image with ICC profile to trigger default transfer function path
+        let file = std::fs::read("resources/test/lossy_with_icc.jxl").unwrap();
+        let options = JxlDecoderOptions::default();
+        let mut decoder = JxlDecoder::<states::Initialized>::new(options);
+        let mut input = file.as_slice();
+        let mut decoder = loop {
+            match decoder.process(&mut input).unwrap() {
+                ProcessingResult::Complete { result } => break result,
+                ProcessingResult::NeedsMoreInput { fallback, .. } => decoder = fallback,
+            }
+        };
+
+        // Output data format will default to F32, so output color profile will be linear sRGB
+        assert_eq!(
+            *decoder.output_color_profile().transfer_function().unwrap(),
+            JxlTransferFunction::Linear,
+        );
+
+        // Integer data format will set output color profile to sRGB
+        decoder.set_pixel_format(JxlPixelFormat::rgba8(0));
+        assert_eq!(
+            *decoder.output_color_profile().transfer_function().unwrap(),
+            JxlTransferFunction::SRGB,
+        );
+
+        decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0));
+        assert_eq!(
+            *decoder.output_color_profile().transfer_function().unwrap(),
+            JxlTransferFunction::Linear,
+        );
+
+        decoder.set_pixel_format(JxlPixelFormat::rgba16(0));
+        assert_eq!(
+            *decoder.output_color_profile().transfer_function().unwrap(),
+            JxlTransferFunction::SRGB,
+        );
+
+        // Once output color profile is set by user, it will remain as is regardless of what pixel
+        // format is set
+        let profile = JxlColorProfile::Simple(JxlColorEncoding::srgb(false));
+        decoder.set_output_color_profile(profile.clone()).unwrap();
+        decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0));
+        assert!(decoder.output_color_profile() == &profile);
+    }
+
     #[test]
     fn test_fill_opaque_alpha_both_pipelines() {
         use crate::api::{JxlColorType, JxlDataFormat, JxlPixelFormat};
@@ -1230,7 +1432,7 @@ pub(crate) mod tests {
         // The test passes if it doesn't panic with "attempt to add with overflow"
         // It's OK if it returns an error or panics with "Unexpected end of input"
         let result = panic::catch_unwind(|| {
-            let _ = decode(data, 1024, false, None);
+            let _ = decode(data, 1024, false, false, None);
         });

         // If it panicked, make sure it wasn't an overflow panic
@@ -1247,4 +1449,538 @@ pub(crate) mod tests {
             );
         }
     }
+
+    fn make_box(ty: &[u8; 4], content: &[u8]) -> Vec<u8> {
+        let len = (8 + content.len()) as u32;
+        let mut buf = Vec::new();
+        buf.extend(len.to_be_bytes());
+        buf.extend(ty);
+        buf.extend(content);
+        buf
+    }
+
+    fn add_container_header(container: &mut Vec<u8>) {
+        // JXL signature box
+        let sig = [
+            0x00, 0x00, 0x00, 0x0c, 0x4a, 0x58, 0x4c, 0x20, 0x0d, 0x0a, 0x87, 0x0a,
+        ];
+        // ftyp box
+        let ftyp = make_box(b"ftyp", b"jxl \x00\x00\x00\x00jxl ");
+        container.extend(&sig);
+        container.extend(&ftyp);
+    }
+
+    /// Helper to wrap a bare codestream in a JXL container with a jxli frame index box.
+    fn wrap_with_frame_index(
+        codestream: &[u8],
+        tnum: u32,
+        tden: u32,
+        entries: &[(u64, u64, u64)], // (OFF_delta, T, F)
+    ) -> Vec<u8> {
+        use crate::util::test::build_frame_index_content;
+
+        let jxli_content = build_frame_index_content(tnum, tden, entries);
+
+        let jxli = make_box(b"jxli", &jxli_content);
+        let jxlc = make_box(b"jxlc", codestream);
+
+        let mut container = Vec::new();
+        add_container_header(&mut container);
+        container.extend(&jxli);
+        container.extend(&jxlc);
+        container
+    }
+
+    /// Helper to wrap a bare codestream in a container split across jxlp boxes.
+    ///
+    /// `chunk_starts` are codestream offsets where each new jxlp chunk begins.
+    fn wrap_with_jxlp_chunks(codestream: &[u8], chunk_starts: &[usize]) -> Vec<u8> {
+        let mut starts = chunk_starts.to_vec();
+        starts.sort_unstable();
+        starts.dedup();
+        if starts.first().copied() != Some(0) {
+            starts.insert(0, 0);
+        }
+        if starts.last().copied() != Some(codestream.len()) {
+            starts.push(codestream.len());
+        }
+        assert!(starts.len() >= 2);
+
+        let mut container = Vec::new();
+        add_container_header(&mut container);
+
+        let num_chunks = starts.len() - 1;
+        for i in 0..num_chunks {
+            let begin = starts[i];
+            let end = starts[i + 1];
+            assert!(begin <= end && end <= codestream.len());
+
+            let mut payload = Vec::with_capacity(4 + (end - begin));
+            let mut index = i as u32;
+            if i + 1 == num_chunks {
+                index |= 0x8000_0000;
+            }
+            payload.extend(index.to_be_bytes());
+            payload.extend(&codestream[begin..end]);
+            container.extend(make_box(b"jxlp", &payload));
+        }
+
+        container
+    }
+
+    #[test]
+    fn test_frame_index_parsed_from_container() {
+        // Read a bare animation codestream and wrap it in a container with a jxli box.
+        let codestream =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+        // Create synthetic frame index entries (delta offsets).
+        // These are synthetic -- we don't know real frame offsets, but we can verify parsing.
+        let entries = vec![
+            (0u64, 100u64, 1u64), // Frame 0 at offset 0
+            (500, 100, 1),        // Frame 1 at offset 500
+            (600, 100, 1),        // Frame 2 at offset 1100
+        ];
+
+        let container = wrap_with_frame_index(&codestream, 1, 1000, &entries);
+
+        // Decode with a large chunk size so the jxli box is fully consumed.
+        let options = JxlDecoderOptions::default();
+        let mut dec = JxlDecoder::<states::Initialized>::new(options);
+        let mut input: &[u8] = &container;
+        let dec = loop {
+            match dec.process(&mut input).unwrap() {
+                ProcessingResult::Complete { result } => break result,
+                ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                    if input.is_empty() {
+                        panic!("Unexpected end of input");
+                    }
+                    dec = fallback;
+                }
+            }
+        };
+
+        // Check that frame index was parsed.
+        let fi = dec.frame_index().expect("frame_index should be Some");
+        assert_eq!(fi.num_frames(), 3);
+        assert_eq!(fi.tnum, 1);
+        assert_eq!(fi.tden.get(), 1000);
+        // Verify absolute offsets (accumulated from deltas)
+        assert_eq!(fi.entries[0].codestream_offset, 0);
+        assert_eq!(fi.entries[1].codestream_offset, 500);
+        assert_eq!(fi.entries[2].codestream_offset, 1100);
+        assert_eq!(fi.entries[0].duration_ticks, 100);
+        assert_eq!(fi.entries[2].frame_count, 1);
+    }
+
+    #[test]
+    fn test_frame_index_none_for_bare_codestream() {
+        // A bare codestream has no container, so no frame index.
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+        let options = JxlDecoderOptions::default();
+        let mut dec = JxlDecoder::<states::Initialized>::new(options);
+        let mut input: &[u8] = &data;
+        let dec = loop {
+            match dec.process(&mut input).unwrap() {
+                ProcessingResult::Complete { result } => break result,
+                ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                    if input.is_empty() {
+                        panic!("Unexpected end of input");
+                    }
+                    dec = fallback;
+                }
+            }
+        };
+        assert!(dec.frame_index().is_none());
+    }
+
+    fn scan_frames_with_decoder(mut input: &[u8], chunk_size: usize) -> Vec<VisibleFrameInfo> {
+        let mut chunk_input = &input[0..0];
+        let options = JxlDecoderOptions {
+            scan_frames_only: true,
+            skip_preview: false,
+            ..Default::default()
+        };
+        let mut initialized_decoder = JxlDecoder::<states::Initialized>::new(options);
+
+        macro_rules! advance_process {
+            ($decoder: ident) => {
+                loop {
+                    chunk_input =
+                        &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
+                    let available_before = chunk_input.len();
+                    let process_result = $decoder.process(&mut chunk_input);
+                    input = &input[(available_before - chunk_input.len())..];
+                    match process_result.unwrap() {
+                        ProcessingResult::Complete { result } => break result,
+                        ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                            if input.is_empty() {
+                                panic!("Unexpected end of input");
+                            }
+                            $decoder = fallback;
+                        }
+                    }
+                }
+            };
+        }
+
+        macro_rules! advance_skip {
+            ($decoder: ident) => {
+                loop {
+                    chunk_input =
+                        &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
+                    let available_before = chunk_input.len();
+                    let process_result = $decoder.skip_frame(&mut chunk_input);
+                    input = &input[(available_before - chunk_input.len())..];
+                    match process_result.unwrap() {
+                        ProcessingResult::Complete { result } => break result,
+                        ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                            if input.is_empty() {
+                                panic!("Unexpected end of input");
+                            }
+                            $decoder = fallback;
+                        }
+                    }
+                }
+            };
+        }
+
+        let mut decoder_with_image_info = advance_process!(initialized_decoder);
+
+        if !decoder_with_image_info.has_more_frames() {
+            return decoder_with_image_info.scanned_frames().to_vec();
+        }
+
+        loop {
+            let mut decoder_with_frame_info = advance_process!(decoder_with_image_info);
+            decoder_with_image_info = advance_skip!(decoder_with_frame_info);
+            if !decoder_with_image_info.has_more_frames() {
+                break;
+            }
+        }
+
+        decoder_with_image_info.scanned_frames().to_vec()
+    }
+
+    fn assert_start_new_frame_matches_sequential(data: &[u8], expect_bare_codestream: bool) {
+        use crate::api::{JxlDataFormat, JxlPixelFormat};
+        use crate::image::{Image, Rect};
+
+        // 1. Scan frame info to get seek offsets.
+        let scanned_frames = scan_frames_with_decoder(data, usize::MAX);
+        assert!(scanned_frames.len() > 1, "need multiple frames");
+
+        // Compare against second visible frame from regular sequential decode.
+        let target_visible_index = 1;
+        let seek_target = scanned_frames[target_visible_index].seek_target;
+
+        if expect_bare_codestream {
+            assert_eq!(seek_target.remaining_in_box, u64::MAX);
+        } else {
+            assert_ne!(seek_target.remaining_in_box, u64::MAX);
+        }
+
+        // 2. Decode all frames sequentially and keep the reference frame.
+        let (_n, sequential_frames) = decode(data, usize::MAX, false, false, None).unwrap();
+        let expected = &sequential_frames[target_visible_index];
+
+        // 3. Create decoder and parse image info.
+        let options = JxlDecoderOptions::default();
+        let decoder = JxlDecoder::<states::Initialized>::new(options);
+        let mut input = data;
+
+        let ProcessingResult::Complete {
+            result: mut decoder,
+        } = decoder.process(&mut input).unwrap()
+        else {
+            panic!("expected Complete with full data");
+        };
+
+        let basic_info = decoder.basic_info().clone();
+        let (width, height) = basic_info.size;
+
+        // Match the same requested output format as the sequential helper.
+        let default_format = decoder.current_pixel_format().clone();
+        let requested_format = JxlPixelFormat {
+            color_type: default_format.color_type,
+            color_data_format: Some(JxlDataFormat::f32()),
+            extra_channel_format: default_format
+                .extra_channel_format
+                .iter()
+                .map(|_| Some(JxlDataFormat::f32()))
+                .collect(),
+        };
+        decoder.set_pixel_format(requested_format.clone());
+
+        let channels = requested_format.color_type.samples_per_pixel();
+        let num_ec = requested_format.extra_channel_format.len();
+
+        // 4. Seek to decode-start and advance to the target visible frame.
+        decoder.start_new_frame(seek_target);
+        let mut input = &data[seek_target.decode_start_file_offset..];
+
+        for _ in 0..seek_target.visible_frames_to_skip {
+            let mut decoder_frame = loop {
+                match decoder.process(&mut input).unwrap() {
+                    ProcessingResult::Complete { result } => break result,
+                    ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                        decoder = fallback;
+                    }
+                }
+            };
+
+            decoder = loop {
+                match decoder_frame.skip_frame(&mut input).unwrap() {
+                    ProcessingResult::Complete { result } => break result,
+                    ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                        decoder_frame = fallback;
+                    }
+                }
+            };
+        }
+
+        let mut decoder_frame = loop {
+            match decoder.process(&mut input).unwrap() {
+                ProcessingResult::Complete { result } => break result,
+                ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                    decoder = fallback;
+                }
+            }
+        };
+
+        let mut color_buffer = Image::<f32>::new((width * channels, height)).unwrap();
+        let mut ec_buffers: Vec<Image<f32>> = (0..num_ec)
+            .map(|_| Image::<f32>::new((width, height)).unwrap())
+            .collect();
+        let mut buffers: Vec<JxlOutputBuffer> = vec![JxlOutputBuffer::from_image_rect_mut(
+            color_buffer
+                .get_rect_mut(Rect {
+                    origin: (0, 0),
+                    size: (width * channels, height),
+                })
+                .into_raw(),
+        )];
+        for ec in ec_buffers.iter_mut() {
+            buffers.push(JxlOutputBuffer::from_image_rect_mut(
+                ec.get_rect_mut(Rect {
+                    origin: (0, 0),
+                    size: (width, height),
+                })
+                .into_raw(),
+            ));
+        }
+
+        let _decoder = loop {
+            match decoder_frame.process(&mut input, &mut buffers).unwrap() {
+                ProcessingResult::Complete { result } => break result,
+                ProcessingResult::NeedsMoreInput { fallback, .. } => {
+                    decoder_frame = fallback;
+                }
+            }
+        };
+
+        // 5. Compare seek-decoded frame against sequential decode reference.
+        let mut seek_decoded = Vec::with_capacity(1 + num_ec);
+        seek_decoded.push(color_buffer);
+        seek_decoded.extend(ec_buffers);
+        compare_frames(
+            Path::new("start_new_frame_seek"),
+            target_visible_index,
+            expected,
+            &seek_decoded,
+        )
+        .unwrap();
+    }
+
+    /// Test that `start_new_frame()` + scanner seek info decodes the same
+    /// frame as regular sequential decode for bare codestream input.
+    #[test]
+    fn test_start_new_frame_bare_codestream() {
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+        assert_start_new_frame_matches_sequential(&data, true);
+    }
+
+    /// Test that `start_new_frame()` + scanner seek info also works for boxed input.
+    #[test]
+    fn test_start_new_frame_boxed_codestream() {
+        let codestream =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+        let entries = vec![(0u64, 100u64, 1u64), (500, 100, 1), (600, 100, 1)];
+        let container = wrap_with_frame_index(&codestream, 1, 1000, &entries);
+        assert_start_new_frame_matches_sequential(&container, false);
+    }
+
+    /// Test seek/scanner behavior when codestream data is split across jxlp boxes,
+    /// with each visible frame starting in its own chunk.
+    #[test]
+    fn test_start_new_frame_boxed_jxlp_per_visible_frame() {
+        let codestream =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+        let scanned_frames = scan_frames_with_decoder(&codestream, usize::MAX);
+        assert!(scanned_frames.len() > 1, "need multiple frames");
+
+        let (decoded_frames, _) = decode(&codestream, usize::MAX, false, false, None).unwrap();
+        assert_eq!(
+            decoded_frames,
+            scanned_frames.len(),
+            "test file should have one codestream frame per visible frame",
+        );
+
+        let mut chunk_starts: Vec<usize> = scanned_frames.iter().map(|f| f.file_offset).collect();
+        chunk_starts.sort_unstable();
+        chunk_starts.dedup();
+        assert_eq!(chunk_starts.len(), scanned_frames.len());
+
+        let container = wrap_with_jxlp_chunks(&codestream, &chunk_starts);
+        assert_start_new_frame_matches_sequential(&container, false);
+    }
+
+    #[test]
+    fn test_scan_still_image() {
+        let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        assert_eq!(frames.len(), 1);
+        assert!(frames[0].is_last);
+        assert!(frames[0].is_keyframe);
+        let total_duration_ms: f64 = frames.iter().map(|f| f.duration_ms).sum();
+        assert_eq!(total_duration_ms, 0.0);
+    }
+
+    #[test]
+    fn test_scan_bare_animation() {
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        assert!(frames.len() > 1, "expected multiple frames");
+
+        for (i, frame) in frames.iter().enumerate() {
+            assert_eq!(frame.index, i);
+        }
+
+        assert!(frames.last().unwrap().is_last);
+
+        assert!(frames[0].is_keyframe);
+        assert_eq!(
+            frames[0].seek_target.decode_start_file_offset,
+            frames[0].file_offset
+        );
+    }
+
+    #[test]
+    fn test_scan_animation_offsets_increase() {
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        for i in 1..frames.len() {
+            assert!(
+                frames[i].file_offset > frames[i - 1].file_offset,
+                "frame {} offset {} should be > frame {} offset {}",
+                i,
+                frames[i].file_offset,
+                i - 1,
+                frames[i - 1].file_offset,
+            );
+        }
+    }
+
+    #[test]
+    fn test_scan_incremental() {
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+        let frames = scan_frames_with_decoder(&data, 128);
+        assert!(frames.len() > 1);
+        assert!(frames.last().unwrap().is_last);
+    }
+
+    #[test]
+    fn test_scan_keyframe_detection_still() {
+        let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        assert_eq!(frames.len(), 1);
+        let f = &frames[0];
+        assert!(f.is_keyframe);
+        assert_eq!(f.seek_target.decode_start_file_offset, f.file_offset);
+        assert_eq!(f.seek_target.visible_frames_to_skip, 0);
+    }
+
+    #[test]
+    fn test_scan_decode_start_file_offset_consistency() {
+        let data =
+            std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        for frame in &frames {
+            assert!(
+                frame.seek_target.decode_start_file_offset <= frame.file_offset,
+                "frame {}: decode_start_file_offset {} > file_offset {}",
+                frame.index,
+                frame.seek_target.decode_start_file_offset,
+                frame.file_offset,
+            );
+            assert_eq!(
+                frame.is_keyframe,
+                frame.seek_target.visible_frames_to_skip == 0,
+                "frame {}: keyframe flag should match visible_frames_to_skip",
+                frame.index,
+            );
+        }
+    }
+
+    #[test]
+    fn test_scan_with_preview() {
+        let data = std::fs::read("resources/test/with_preview.jxl");
+        if data.is_err() {
+            return;
+        }
+        let data = data.unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        assert!(frames.len() <= 1);
+    }
+
+    #[test]
+    fn test_scan_patches_not_keyframe() {
+        let data = std::fs::read("resources/test/grayscale_patches_var_dct.jxl");
+        if data.is_err() {
+            return;
+        }
+        let data = data.unwrap();
+        let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+        assert!(!frames.is_empty());
+    }
+
+    /// Regression test for Chromium ClusterFuzz issue 474401148.
+    #[test]
+    fn test_fuzzer_xyb_icc_no_panic() {
+        use crate::api::ProcessingResult;
+
+        #[rustfmt::skip]
+        let data: &[u8] = &[
+            0xff, 0x0a, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x25, 0x00,
+        ];
+
+        let opts = JxlDecoderOptions {
+            pixel_limit: Some(1024 * 1024 * 1024),
+            ..Default::default()
+        };
+        let mut decoder = JxlDecoderInner::new(opts);
+        let mut input = data;
+
+        if let Ok(ProcessingResult::Complete { .. }) = decoder.process(&mut input, None)
+            && let Some(profile) = decoder.output_color_profile()
+        {
+            let _ = profile.try_as_icc();
+        }
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
index eb66cb3b1f4cf..e2b452cbc81c9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
@@ -3,6 +3,9 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::io::IoSliceMut;
+
+use crate::container::frame_index::FrameIndexBox;
 use crate::error::{Error, Result};

 use crate::api::{
@@ -15,6 +18,8 @@ enum ParseState {
     BoxNeeded,
     CodestreamBox(u64),
     SkippableBox(u64),
+    /// Buffering a jxli box: (remaining bytes, accumulated content).
+    BufferingFrameIndex(u64, Vec<u8>),
 }

 enum CodestreamBoxType {
@@ -28,6 +33,10 @@ pub(super) struct BoxParser {
     pub(super) box_buffer: SmallBuffer,
     state: ParseState,
     box_type: CodestreamBoxType,
+    /// Parsed frame index box, if present in the file.
+    pub(super) frame_index: Option<FrameIndexBox>,
+    /// Total file bytes consumed from the underlying input.
+    pub(super) total_file_consumed: u64,
 }

 impl BoxParser {
@@ -36,6 +45,8 @@ impl BoxParser {
             box_buffer: SmallBuffer::new(128),
             state: ParseState::SignatureNeeded,
             box_type: CodestreamBoxType::None,
+            frame_index: None,
+            total_file_consumed: 0,
         }
     }

@@ -49,7 +60,8 @@ impl BoxParser {
         loop {
             match self.state.clone() {
                 ParseState::SignatureNeeded => {
-                    self.box_buffer.refill(|b| input.read(b), None)?;
+                    let read = self.box_buffer.refill(|b| input.read(b), None)?;
+                    self.total_file_consumed += read as u64;
                     match check_signature_internal(&self.box_buffer)? {
                         None => return Err(Error::InvalidSignature),
                         Some(JxlSignatureType::Codestream) => {
@@ -71,7 +83,9 @@ impl BoxParser {
                     let skipped = if !self.box_buffer.is_empty() {
                         self.box_buffer.consume(num)
                     } else {
-                        input.skip(num)?
+                        let skipped = input.skip(num)?;
+                        self.total_file_consumed += skipped as u64;
+                        skipped
                     };
                     if skipped == 0 {
                         return Err(Error::OutOfBounds(num));
@@ -83,8 +97,35 @@ impl BoxParser {
                         self.state = ParseState::SkippableBox(s);
                     }
                 }
+                ParseState::BufferingFrameIndex(mut remaining, mut buf) => {
+                    let num = remaining.min(usize::MAX as u64) as usize;
+                    if !self.box_buffer.is_empty() {
+                        let take = num.min(self.box_buffer.len());
+                        buf.extend_from_slice(&self.box_buffer[..take]);
+                        self.box_buffer.consume(take);
+                        remaining -= take as u64;
+                    } else {
+                        let old_len = buf.len();
+                        buf.resize(old_len + num, 0);
+                        let read = input.read(&mut [IoSliceMut::new(&mut buf[old_len..])])?;
+                        self.total_file_consumed += read as u64;
+                        if read == 0 {
+                            return Err(Error::OutOfBounds(num));
+                        }
+                        buf.truncate(old_len + read);
+                        remaining -= read as u64;
+                    }
+                    if remaining == 0 {
+                        // Parse the buffered frame index box.
+                        self.frame_index = Some(FrameIndexBox::parse(&buf)?);
+                        self.state = ParseState::BoxNeeded;
+                    } else {
+                        self.state = ParseState::BufferingFrameIndex(remaining, buf);
+                    }
+                }
                 ParseState::BoxNeeded => {
-                    self.box_buffer.refill(|b| input.read(b), None)?;
+                    let read = self.box_buffer.refill(|b| input.read(b), None)?;
+                    self.total_file_consumed += read as u64;
                     let min_len = match &self.box_buffer[..] {
                         [0, 0, 0, 1, ..] => 16,
                         _ => 8,
@@ -148,6 +189,20 @@ impl BoxParser {
                             };
                             self.state = ParseState::CodestreamBox(content_len);
                         }
+                        b"jxli" => {
+                            if content_len == u64::MAX {
+                                return Err(Error::InvalidBox);
+                            }
+                            // Reasonable size limit for a frame index box (16 MB).
+                            if content_len > 16 * 1024 * 1024 {
+                                self.state = ParseState::SkippableBox(content_len);
+                            } else {
+                                self.state = ParseState::BufferingFrameIndex(
+                                    content_len,
+                                    Vec::with_capacity(content_len as usize),
+                                );
+                            }
+                        }
                         _ => {
                             self.state = ParseState::SkippableBox(content_len);
                         }
@@ -158,6 +213,26 @@ impl BoxParser {
         }
     }

+    /// Accounts file bytes consumed directly by codestream parser reads/skips.
+    pub(super) fn mark_file_consumed(&mut self, amount: usize) {
+        self.total_file_consumed += amount as u64;
+    }
+
+    /// Resets the box parser for seeking to a specific codestream position.
+    ///
+    /// Sets the parser to `CodestreamBox(remaining)` state with cleared
+    /// buffers.  The caller must provide raw input starting from the file
+    /// position that corresponds to the target codestream offset.
+    ///
+    /// `remaining` is the number of codestream bytes left in the current
+    /// box from the target file position.  For bare-codestream files this
+    /// is `u64::MAX`.
+    pub(super) fn reset_for_codestream_seek(&mut self, remaining: u64) {
+        self.box_buffer = SmallBuffer::new(128);
+        self.state = ParseState::CodestreamBox(remaining);
+        // Keep frame_index unchanged.
+    }
+
     pub(super) fn consume_codestream(&mut self, amount: u64) {
         if let ParseState::CodestreamBox(cb) = &mut self.state {
             *cb = cb.checked_sub(amount).unwrap();
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
index a5b650eacd226..ab645281dfffd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
@@ -14,8 +14,9 @@ use sections::SectionState;
 use crate::api::FrameCallback;
 use crate::{
     api::{
-        JxlBasicInfo, JxlBitstreamInput, JxlColorProfile, JxlDecoderOptions, JxlOutputBuffer,
-        JxlPixelFormat,
+        JxlBasicInfo, JxlBitstreamInput, JxlColorEncoding, JxlColorProfile, JxlDataFormat,
+        JxlDecoderOptions, JxlOutputBuffer, JxlPixelFormat, VisibleFrameInfo,
+        VisibleFrameSeekTarget,
         inner::{box_parser::BoxParser, process::SmallBuffer},
     },
     error::{Error, Result},
@@ -33,6 +34,13 @@ struct SectionBuffer {
     section: Section,
 }

+#[derive(Clone, Copy)]
+struct FrameStartInfo {
+    file_offset: usize,
+    remaining_in_box: u64,
+    visible_count_before: usize,
+}
+
 pub(super) struct CodestreamParser {
     // TODO(veluca): this would probably be cleaner with some kind of state enum.
     pub(super) file_header: Option<FileHeader>,
@@ -44,6 +52,9 @@ pub(super) struct CodestreamParser {
     pub(super) embedded_color_profile: Option<JxlColorProfile>,
     pub(super) output_color_profile: Option<JxlColorProfile>,
     pub(super) pixel_format: Option<JxlPixelFormat>,
+    xyb_encoded: bool,
+    is_gray: bool,
+    pub(super) output_color_profile_set_by_user: bool,

     // These fields are populated when starting to decode a frame, and cleared once
     // the frame is done.
@@ -79,6 +90,27 @@ pub(super) struct CodestreamParser {

     header_needed_bytes: Option<u64>,

+    // --- Frame info tracking (for frame scanning) ---
+    /// Collected visible frame info entries.
+    pub(super) scanned_frames: Vec<VisibleFrameInfo>,
+    /// Zero-based visible frame index counter.
+    visible_frame_index: usize,
+    /// File offsets and visibility info for every non-preview frame (visible
+    /// and non-visible), in parse order.
+    frame_starts: Vec<FrameStartInfo>,
+    /// For each reference slot, earliest frame index required to reconstruct
+    /// the current contents of that slot.
+    reference_slot_decode_start: [Option<usize>; DecoderState::MAX_STORED_FRAMES],
+    /// For each LF slot, earliest frame index required to reconstruct the
+    /// current contents of that slot.
+    lf_slot_decode_start: [Option<usize>; DecoderState::NUM_LF_FRAMES],
+    /// File byte offset where the current frame header parse started.
+    /// Set when we begin parsing a frame header.
+    current_frame_file_offset: usize,
+    /// Remaining codestream bytes in the current box at frame start.
+    /// Captured alongside `current_frame_file_offset`.
+    current_frame_remaining_in_box: u64,
+
     #[cfg(test)]
     pub frame_callback: Option<Box<FrameCallback>>,
     #[cfg(test)]
@@ -96,6 +128,9 @@ impl CodestreamParser {
             embedded_color_profile: None,
             output_color_profile: None,
             pixel_format: None,
+            xyb_encoded: false,
+            is_gray: false,
+            output_color_profile_set_by_user: false,
             frame_header: None,
             toc_parser: None,
             frame: None,
@@ -115,6 +150,13 @@ impl CodestreamParser {
             candidate_hf_sections: HashSet::new(),
             has_more_frames: true,
             header_needed_bytes: None,
+            scanned_frames: Vec::new(),
+            visible_frame_index: 0,
+            frame_starts: Vec::new(),
+            reference_slot_decode_start: [None; DecoderState::MAX_STORED_FRAMES],
+            lf_slot_decode_start: [None; DecoderState::NUM_LF_FRAMES],
+            current_frame_file_offset: 0,
+            current_frame_remaining_in_box: u64::MAX,
             #[cfg(test)]
             frame_callback: None,
             #[cfg(test)]
@@ -130,6 +172,125 @@ impl CodestreamParser {
         }
     }

+    /// Record frame info for the just-parsed frame.
+    /// Called after process_non_section() creates a Frame, for frame scanning.
+    fn record_frame_info(&mut self) {
+        let frame = match self.frame.as_ref() {
+            Some(f) => f,
+            None => return,
+        };
+        let header = frame.header();
+
+        let current_frame_index = self.frame_starts.len();
+        let is_visible = header.is_visible();
+        self.frame_starts.push(FrameStartInfo {
+            file_offset: self.current_frame_file_offset,
+            remaining_in_box: self.current_frame_remaining_in_box,
+            visible_count_before: self.visible_frame_index,
+        });
+
+        let mut decode_start_frame_index = current_frame_index;
+
+        // Track frame dependencies through reference slots. For blending we know
+        // exactly which slots are used. For patches we conservatively assume any
+        // reference slot may be used.
+        let mut used_reference_slots = [false; DecoderState::MAX_STORED_FRAMES];
+        if header.needs_blending() {
+            for blending_info in header
+                .ec_blending_info
+                .iter()
+                .chain(std::iter::once(&header.blending_info))
+            {
+                let source = blending_info.source as usize;
+                assert!(
+                    source < DecoderState::MAX_STORED_FRAMES,
+                    "invalid blending source slot {source}, max {}",
+                    DecoderState::MAX_STORED_FRAMES - 1
+                );
+                used_reference_slots[source] = true;
+            }
+        }
+        if header.has_patches() {
+            used_reference_slots.fill(true);
+        }
+
+        for (slot, used) in used_reference_slots.iter().enumerate() {
+            if *used && let Some(dep_start) = self.reference_slot_decode_start[slot] {
+                decode_start_frame_index = decode_start_frame_index.min(dep_start);
+            }
+        }
+
+        if header.has_lf_frame() {
+            let lf_slot = header.lf_level as usize;
+            assert!(
+                lf_slot < DecoderState::NUM_LF_FRAMES,
+                "invalid lf slot {lf_slot}, max {}",
+                DecoderState::NUM_LF_FRAMES - 1
+            );
+            if let Some(dep_start) = self.lf_slot_decode_start[lf_slot] {
+                decode_start_frame_index = decode_start_frame_index.min(dep_start);
+            }
+        }
+
+        if is_visible {
+            let duration_ticks = header.duration;
+            let duration_ms = if let Some(ref anim) = self.animation {
+                if anim.tps_numerator > 0 {
+                    (duration_ticks as f64) * 1000.0 * (anim.tps_denominator as f64)
+                        / (anim.tps_numerator as f64)
+                } else {
+                    0.0
+                }
+            } else {
+                0.0
+            };
+
+            let decode_start = self.frame_starts[decode_start_frame_index];
+            let seek_target = VisibleFrameSeekTarget {
+                decode_start_file_offset: decode_start.file_offset,
+                remaining_in_box: decode_start.remaining_in_box,
+                visible_frames_to_skip: self
+                    .visible_frame_index
+                    .saturating_sub(decode_start.visible_count_before),
+            };
+            let is_keyframe = seek_target.visible_frames_to_skip == 0;
+
+            self.scanned_frames.push(VisibleFrameInfo {
+                index: self.visible_frame_index,
+                duration_ms,
+                duration_ticks,
+                file_offset: self.current_frame_file_offset,
+                is_last: header.is_last,
+                is_keyframe,
+                seek_target,
+                name: header.name.clone(),
+            });
+
+            self.visible_frame_index += 1;
+        }
+
+        // Update slot dependency origins after processing this frame.
+        if header.can_be_referenced {
+            let slot = header.save_as_reference as usize;
+            assert!(
+                slot < DecoderState::MAX_STORED_FRAMES,
+                "invalid save_as_reference slot {slot}, max {}",
+                DecoderState::MAX_STORED_FRAMES - 1
+            );
+            self.reference_slot_decode_start[slot] = Some(decode_start_frame_index);
+        }
+
+        if header.lf_level != 0 {
+            let slot = (header.lf_level - 1) as usize;
+            assert!(
+                slot < DecoderState::NUM_LF_FRAMES,
+                "invalid lf save slot {slot}, max {}",
+                DecoderState::NUM_LF_FRAMES - 1
+            );
+            self.lf_slot_decode_start[slot] = Some(decode_start_frame_index);
+        }
+    }
+
     /// Returns the number of passes that are fully completed across all groups.
     pub(super) fn num_completed_passes(&self) -> usize {
         self.section_state.num_completed_passes()
@@ -151,12 +312,41 @@ impl CodestreamParser {
         pixel_format
     }

+    /// Resets frame-level state for seeking to a new frame.
+    ///
+    /// Preserves: file_header, decoder_state (including reference frames),
+    /// basic_info, animation, color profiles, pixel_format, xyb_encoded,
+    /// is_gray, output_color_profile_set_by_user, preview_done.
+    ///
+    /// Clears: frame_header, toc_parser, frame, all section buffers,
+    /// non_section_buf, and processing flags.
+    pub(super) fn start_new_frame(&mut self) {
+        self.frame_header = None;
+        self.toc_parser = None;
+        self.frame = None;
+        self.non_section_buf = SmallBuffer::new(4096);
+        self.non_section_bit_offset = 0;
+        self.sections.clear();
+        self.ready_section_data = 0;
+        self.skip_sections = false;
+        self.process_without_output = false;
+        self.section_state = SectionState::new(0, 0);
+        self.lf_global_section = None;
+        self.lf_sections.clear();
+        self.hf_global_section = None;
+        self.hf_sections.clear();
+        self.candidate_hf_sections.clear();
+        self.has_more_frames = true;
+        self.header_needed_bytes = None;
+    }
+
     pub(super) fn process(
         &mut self,
         box_parser: &mut BoxParser,
         input: &mut dyn JxlBitstreamInput,
         decode_options: &JxlDecoderOptions,
         mut output_buffers: Option<&mut [JxlOutputBuffer]>,
+        do_flush: bool,
     ) -> Result<()> {
         if let Some(output_buffers) = &output_buffers {
             let px = self.pixel_format.as_ref().unwrap();
@@ -179,7 +369,11 @@ impl CodestreamParser {
                     .frame
                     .as_ref()
                     .is_some_and(|f| f.header().can_be_referenced);
-                if !self.process_without_output && output_buffers.is_none() && !can_be_referenced {
+                if decode_options.scan_frames_only
+                    || (!self.process_without_output
+                        && output_buffers.is_none()
+                        && !can_be_referenced)
+                {
                     self.skip_sections = true;
                 }

@@ -229,7 +423,9 @@ impl CodestreamParser {
                         let num = if !box_parser.box_buffer.is_empty() {
                             box_parser.box_buffer.take(buffers)
                         } else {
-                            input.read(buffers)?
+                            let num = input.read(buffers)?;
+                            box_parser.mark_file_consumed(num);
+                            num
                         };
                         self.ready_section_data += num;
                         box_parser.consume_codestream(num as u64);
@@ -238,7 +434,7 @@ impl CodestreamParser {
                             break;
                         }
                     }
-                    match self.process_sections(decode_options, &mut output_buffers) {
+                    match self.process_sections(decode_options, &mut output_buffers, do_flush) {
                         Ok(None) => Ok(()),
                         Ok(Some(missing)) => Err(Error::OutOfBounds(missing)),
                         Err(Error::OutOfBounds(_)) => Err(Error::SectionTooShort),
@@ -256,7 +452,9 @@ impl CodestreamParser {
                         let skipped = if !box_parser.box_buffer.is_empty() {
                             box_parser.box_buffer.consume(to_skip)
                         } else {
-                            input.skip(to_skip)?
+                            let skipped = input.skip(to_skip)?;
+                            box_parser.mark_file_consumed(skipped);
+                            skipped
                         };
                         box_parser.consume_codestream(skipped as u64);
                         self.ready_section_data += skipped;
@@ -295,25 +493,59 @@ impl CodestreamParser {
             } else {
                 // Trying to read a frame or a file header.
                 assert!(self.frame.is_none());
-                assert!(self.has_more_frames);
+                if !self.has_more_frames {
+                    // If this is a flush request and the file is complete, we are done.
+                    // Otherwise, this is an API usage error.
+                    assert!(do_flush);
+                    return Ok(());
+                }
+
+                // Capture frame-start metadata once before parsing the next
+                // frame header. We do this after `get_more_codestream()` so we
+                // are robust to the previous frame ending exactly at a box
+                // boundary (BoxNeeded -> CodestreamBox transition).
+                let mut capture_frame_start =
+                    self.decoder_state.is_some() && self.frame_header.is_none();

                 // Loop to handle incremental parsing (e.g. large ICC profiles) that may need
                 // multiple buffer refills to complete.
                 loop {
                     let available_codestream = match box_parser.get_more_codestream(input) {
                         Err(Error::OutOfBounds(_)) => 0,
-                        Ok(c) => c as usize,
+                        Ok(c) => c,
                         Err(e) => return Err(e),
                     };
+
+                    if capture_frame_start {
+                        // total_file_consumed counts bytes read/skipped from
+                        // raw input. non_section_buf and box_buffer contain
+                        // unread bytes already accounted there.
+                        self.current_frame_file_offset = (box_parser.total_file_consumed as usize)
+                            .saturating_sub(self.non_section_buf.len())
+                            .saturating_sub(box_parser.box_buffer.len());
+
+                        // `available_codestream` includes bytes still in
+                        // box_buffer and not yet in non_section_buf.
+                        self.current_frame_remaining_in_box = if available_codestream > u64::MAX / 2
+                        {
+                            u64::MAX
+                        } else {
+                            available_codestream.saturating_add(self.non_section_buf.len() as u64)
+                        };
+                        capture_frame_start = false;
+                    }
+
                     let c = self.non_section_buf.refill(
                         |buf| {
                             if !box_parser.box_buffer.is_empty() {
                                 Ok(box_parser.box_buffer.take(buf))
                             } else {
-                                input.read(buf)
+                                let read = input.read(buf)?;
+                                box_parser.mark_file_consumed(read);
+                                Ok(read)
                             }
                         },
-                        Some(available_codestream),
+                        Some(available_codestream as usize),
                     )? as u64;
                     box_parser.consume_codestream(c);

@@ -379,6 +611,11 @@ impl CodestreamParser {
                         }
                     }

+                    // Record frame info for scanning (after preview check).
+                    if !is_preview_frame {
+                        self.record_frame_info();
+                    }
+
                     if self.has_visible_frame() {
                         // Return to caller if we found visible frame info.
                         return Ok(());
@@ -390,4 +627,49 @@ impl CodestreamParser {
             }
         }
     }
+
+    pub(super) fn update_default_output_color_profile(&mut self) {
+        // Only set default output_color_profile if not already configured by user
+        if self.output_color_profile_set_by_user {
+            return;
+        }
+
+        let embedded_color_profile = self.embedded_color_profile.as_ref().unwrap();
+        let pixel_format = self.pixel_format.as_ref().unwrap();
+
+        // Determine default output color profile following libjxl logic:
+        // - For XYB: use embedded if can_output_to(), else:
+        //   - if float samples are requested: linear sRGB,
+        //   - else: sRGB
+        // - For non-XYB: use embedded color profile
+        let output_color_profile = if self.xyb_encoded {
+            // Use embedded if we can output to it, otherwise fall back to sRGB
+            let base_encoding = if embedded_color_profile.can_output_to() {
+                match &embedded_color_profile {
+                    JxlColorProfile::Simple(enc) => enc.clone(),
+                    JxlColorProfile::Icc(_) => {
+                        unreachable!("can_output_to returns false for ICC")
+                    }
+                }
+            } else {
+                let data_format = pixel_format
+                    .color_data_format
+                    .unwrap_or(JxlDataFormat::U8 { bit_depth: 8 });
+                let is_float = matches!(
+                    data_format,
+                    JxlDataFormat::F32 { .. } | JxlDataFormat::F16 { .. }
+                );
+                if is_float {
+                    JxlColorEncoding::linear_srgb(self.is_gray)
+                } else {
+                    JxlColorEncoding::srgb(self.is_gray)
+                }
+            };
+
+            JxlColorProfile::Simple(base_encoding)
+        } else {
+            embedded_color_profile.clone()
+        };
+        self.output_color_profile = Some(output_color_profile);
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
index 342fd6729f7a5..48ec9e6b9480f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
@@ -117,6 +117,7 @@ impl CodestreamParser {

         if self.decoder_state.is_none() && self.embedded_color_profile.is_none() {
             let file_header = self.file_header.as_ref().unwrap();
+
             // Parse (or extract from file header) the ICC profile.
             let mut br = BitReader::new(&self.non_section_buf);
             br.skip_bits(self.non_section_bit_offset as usize)?;
@@ -147,50 +148,17 @@ impl CodestreamParser {
                     &file_header.image_metadata.color_encoding,
                 )?)
             };
-            // Determine default output color profile following libjxl logic:
-            // - For XYB: use embedded if can_output_to(), else linear sRGB fallback
-            // - For non-XYB: use embedded color profile
-            let output_color_profile = if file_header.image_metadata.xyb_encoded {
-                let is_gray =
-                    file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray;
-
-                // Use embedded if we can output to it, otherwise fall back to linear sRGB
-                let base_encoding = if embedded_color_profile.can_output_to() {
-                    match &embedded_color_profile {
-                        JxlColorProfile::Simple(enc) => enc.clone(),
-                        JxlColorProfile::Icc(_) => {
-                            unreachable!("can_output_to returns false for ICC")
-                        }
-                    }
-                } else {
-                    JxlColorEncoding::linear_srgb(is_gray)
-                };
-
-                JxlColorProfile::Simple(base_encoding)
-            } else {
-                embedded_color_profile.clone()
-            };
             self.embedded_color_profile = Some(embedded_color_profile.clone());
-            // Only set default output_color_profile if not already configured by user
-            if self.output_color_profile.is_none() {
-                self.output_color_profile = Some(output_color_profile);
-            } else {
-                // Validate user's output color profile choice (libjxl compatibility)
-                // For non-XYB without CMS: only same encoding as embedded is allowed
-                let user_profile = self.output_color_profile.as_ref().unwrap();
-                if !file_header.image_metadata.xyb_encoded
-                    && decode_options.cms.is_none()
-                    && *user_profile != embedded_color_profile
-                {
-                    return Err(Error::NonXybOutputNoCMS);
-                }
-            }
+
+            let xyb_encoded = file_header.image_metadata.xyb_encoded;
+            let is_gray = file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray;
+            self.xyb_encoded = xyb_encoded;
+            self.is_gray = is_gray;
+
             // Only set default pixel_format if not already configured (e.g. via rewind)
             if self.pixel_format.is_none() {
                 self.pixel_format = Some(JxlPixelFormat {
-                    color_type: if file_header.image_metadata.color_encoding.color_space
-                        == ColorSpace::Gray
-                    {
+                    color_type: if is_gray {
                         JxlColorType::Grayscale
                     } else {
                         JxlColorType::Rgb
@@ -207,6 +175,19 @@ impl CodestreamParser {
                 });
             }

+            if let Some(user_profile) = &self.output_color_profile {
+                // Validate user's output color profile choice (libjxl compatibility)
+                // For non-XYB without CMS: only same encoding as embedded is allowed
+                if !xyb_encoded
+                    && decode_options.cms.is_none()
+                    && *user_profile != embedded_color_profile
+                {
+                    return Err(Error::NonXybOutputNoCMS);
+                }
+            } else {
+                self.update_default_output_color_profile();
+            }
+
             let mut br = BitReader::new(&self.non_section_buf);
             br.skip_bits(self.non_section_bit_offset as usize)?;
             br.jump_to_byte_boundary()?;
@@ -298,7 +279,7 @@ impl CodestreamParser {
         // Save file_header before creating frame (for preview frame recovery)
         self.saved_file_header = self.decoder_state.as_ref().map(|ds| ds.file_header.clone());

-        let frame = Frame::from_header_and_toc(
+        let mut frame = Frame::from_header_and_toc(
             self.frame_header.take().unwrap(),
             toc,
             self.decoder_state.take().unwrap(),
@@ -360,6 +341,17 @@ impl CodestreamParser {
         self.section_state =
             SectionState::new(frame.header().num_lf_groups(), frame.header().num_groups());

+        frame.prepare_render_pipeline(
+            self.pixel_format.as_ref().unwrap(),
+            decode_options.cms.as_deref(),
+            self.embedded_color_profile
+                .as_ref()
+                .expect("embedded_color_profile should be set before pipeline preparation"),
+            self.output_color_profile
+                .as_ref()
+                .expect("output_color_profile should be set before pipeline preparation"),
+        )?;
+
         self.frame = Some(frame);

         Ok(())
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
index ffdb3588b7334..4fc5278c5fc15 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
@@ -8,15 +8,18 @@ use crate::{
     bit_reader::BitReader,
     error::Result,
     frame::Section,
+    headers::frame_header::{Encoding, FrameType},
 };

 use super::CodestreamParser;

+#[derive(Debug)]
 pub(super) struct SectionState {
     lf_global_done: bool,
     remaining_lf: usize,
     hf_global_done: bool,
     completed_passes: Vec<u8>,
+    lf_global_flush_len: usize,
 }

 impl SectionState {
@@ -26,6 +29,7 @@ impl SectionState {
             remaining_lf: num_lf_groups,
             hf_global_done: false,
             completed_passes: vec![0; num_groups],
+            lf_global_flush_len: 0,
         }
     }

@@ -41,8 +45,15 @@ impl CodestreamParser {
         &mut self,
         decode_options: &JxlDecoderOptions,
         output_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>,
+        do_flush: bool,
     ) -> Result<Option<usize>> {
         let frame = self.frame.as_mut().unwrap();
+
+        let output_profile = self
+            .output_color_profile
+            .as_ref()
+            .expect("output_color_profile should be set before pipeline preparation");
+
         let frame_header = frame.header();

         // Dequeue ready sections.
@@ -72,40 +83,81 @@ impl CodestreamParser {
         }

         let mut processed_section = false;
+        let mut called_render_hf = false;
         let pixel_format = self.pixel_format.as_ref().unwrap();
+
+        let complete_lf_global;
+        let (lf_global, lf_global_is_complete) = if let Some(d) = self.lf_global_section.take() {
+            complete_lf_global = d;
+            (
+                Some(&complete_lf_global.data[..complete_lf_global.len]),
+                true,
+            )
+        } else if do_flush
+            && self
+                .sections
+                .front()
+                .is_some_and(|s| s.section == Section::LfGlobal)
+            && 2 * self.ready_section_data > 3 * self.section_state.lf_global_flush_len
+            && frame_header.encoding == Encoding::Modular
+            && matches!(
+                frame_header.frame_type,
+                FrameType::RegularFrame | FrameType::LFFrame
+            )
+        {
+            self.section_state.lf_global_flush_len = self.ready_section_data;
+            (
+                Some(&self.sections[0].data[..self.ready_section_data]),
+                false,
+            )
+        } else {
+            (None, false)
+        };
+
         'process: {
             if frame_header.num_groups() == 1 && frame_header.passes.num_passes == 1 {
                 // Single-group special case.
-                let Some(sec) = self.lf_global_section.take() else {
+                let Some(buf) = lf_global else {
                     break 'process;
                 };
-                assert!(self.sections.is_empty());
-                let mut br = BitReader::new(&sec.data);
-                frame.decode_lf_global(&mut br)?;
-                frame.decode_lf_group(0, &mut br)?;
-                frame.decode_hf_global(&mut br)?;
-                frame.prepare_render_pipeline(
-                    self.pixel_format.as_ref().unwrap(),
-                    decode_options.cms.as_deref(),
-                    self.embedded_color_profile
-                        .as_ref()
-                        .expect("embedded_color_profile should be set before pipeline preparation"),
-                    self.output_color_profile
-                        .as_ref()
-                        .expect("output_color_profile should be set before pipeline preparation"),
-                )?;
-                frame.finalize_lf()?;
-                frame.decode_and_render_hf_groups(
-                    output_buffers,
-                    pixel_format,
-                    vec![(0, vec![(0, br)])],
-                )?;
-                processed_section = true;
+                assert!(self.sections.is_empty() || !lf_global_is_complete);
+                let mut br = BitReader::new(buf);
+                let res = (|| -> Result<()> {
+                    frame.decode_lf_global(&mut br, !lf_global_is_complete)?;
+                    frame.decode_lf_group(0, &mut br)?;
+                    frame.decode_hf_global(&mut br)?;
+                    frame.finalize_lf()?;
+                    frame.decode_and_render_hf_groups(
+                        output_buffers,
+                        pixel_format,
+                        vec![(0, vec![(0, br)])],
+                        do_flush,
+                        output_profile,
+                    )?;
+                    called_render_hf = true;
+                    Ok(())
+                })();
+                match res {
+                    Ok(_) => {
+                        processed_section = true;
+                    }
+                    Err(_) if !lf_global_is_complete => {
+                        // Ignore errors if we are doing partial parsing.
+                    }
+                    Err(e) => return Err(e),
+                }
             } else {
-                if let Some(lf_global) = self.lf_global_section.take() {
-                    frame.decode_lf_global(&mut BitReader::new(&lf_global.data))?;
-                    self.section_state.lf_global_done = true;
-                    processed_section = true;
+                if let Some(buf) = lf_global {
+                    match frame.decode_lf_global(&mut BitReader::new(buf), !lf_global_is_complete) {
+                        Ok(_) => {
+                            self.section_state.lf_global_done = true;
+                            processed_section = true;
+                        }
+                        Err(_) if !lf_global_is_complete => {
+                            // Ignore errors if we are doing partial parsing.
+                        }
+                        Err(e) => return Err(e),
+                    }
                 }

                 if !self.section_state.lf_global_done {
@@ -127,16 +179,6 @@ impl CodestreamParser {

                 if let Some(hf_global) = self.hf_global_section.take() {
                     frame.decode_hf_global(&mut BitReader::new(&hf_global.data))?;
-                    frame.prepare_render_pipeline(
-                        self.pixel_format.as_ref().unwrap(),
-                        decode_options.cms.as_deref(),
-                        self.embedded_color_profile.as_ref().expect(
-                            "embedded_color_profile should be set before pipeline preparation",
-                        ),
-                        self.output_color_profile.as_ref().expect(
-                            "output_color_profile should be set before pipeline preparation",
-                        ),
-                    )?;
                     frame.finalize_lf()?;
                     self.section_state.hf_global_done = true;
                     processed_section = true;
@@ -184,7 +226,14 @@ impl CodestreamParser {
                     self.candidate_hf_sections.clear();
                 }

-                frame.decode_and_render_hf_groups(output_buffers, pixel_format, group_readers)?;
+                frame.decode_and_render_hf_groups(
+                    output_buffers,
+                    pixel_format,
+                    group_readers,
+                    do_flush,
+                    output_profile,
+                )?;
+                called_render_hf = true;

                 for g in processed_groups.into_iter() {
                     for i in 0..self.section_state.completed_passes[g] {
@@ -195,6 +244,16 @@ impl CodestreamParser {
             }
         }

+        if do_flush && !called_render_hf && frame.can_do_early_rendering() {
+            frame.decode_and_render_hf_groups(
+                output_buffers,
+                pixel_format,
+                vec![],
+                do_flush,
+                output_profile,
+            )?;
+        }
+
         if !processed_section {
             let data_for_next_section =
                 self.sections.front().unwrap().len - self.ready_section_data;
@@ -230,7 +289,6 @@ impl CodestreamParser {
             if let Some(fh) = self.saved_file_header.take() {
                 let mut new_state = crate::frame::DecoderState::new(fh);
                 new_state.render_spotcolors = decode_options.render_spot_colors;
-                new_state.enable_output = decode_options.enable_output;
                 self.decoder_state = Some(new_state);
             }
         } else {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
index 44aa57ce701d8..ac8dd81cb6600 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
@@ -6,11 +6,12 @@
 #[cfg(test)]
 use crate::api::FrameCallback;
 use crate::{
-    api::JxlFrameHeader,
+    api::{JxlFrameHeader, VisibleFrameInfo},
     error::{Error, Result},
 };

 use super::{JxlBasicInfo, JxlColorProfile, JxlDecoderOptions, JxlPixelFormat};
+use crate::container::frame_index::FrameIndexBox;
 use box_parser::BoxParser;
 use codestream_parser::CodestreamParser;

@@ -67,6 +68,7 @@ impl JxlDecoderInner {
             return Err(Error::ICCOutputNoCMS);
         }
         self.codestream_parser.output_color_profile = Some(profile);
+        self.codestream_parser.output_color_profile_set_by_user = true;
         Ok(())
     }

@@ -75,7 +77,10 @@ impl JxlDecoderInner {
     }

     pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) {
+        // TODO(veluca): return an error if we are asking for both planar and
+        // interleaved-in-color alpha.
         self.codestream_parser.pixel_format = Some(pixel_format);
+        self.codestream_parser.update_default_output_color_profile();
     }

     pub fn frame_header(&self) -> Option<JxlFrameHeader> {
@@ -131,6 +136,35 @@ impl JxlDecoderInner {
         self.codestream_parser.has_more_frames
     }

+    /// Returns the parsed frame index box, if the file contained one.
+    pub fn frame_index(&self) -> Option<&FrameIndexBox> {
+        self.box_parser.frame_index.as_ref()
+    }
+
+    /// Returns visible frame info entries collected during parsing.
+    pub fn scanned_frames(&self) -> &[VisibleFrameInfo] {
+        &self.codestream_parser.scanned_frames
+    }
+
+    /// Resets frame-level state to prepare for decoding a new frame.
+    ///
+    /// Preserves image-level state (file header, decoder state including
+    /// reference frames, color profiles, pixel format). Clears frame header,
+    /// TOC, section buffers, and restores the box parser to the correct
+    /// state so the next `process()` call parses a new frame header.
+    ///
+    /// `remaining_in_box` comes from
+    /// `VisibleFrameInfo::seek_target.remaining_in_box` and tells the box
+    /// parser how many codestream bytes remain in the current container box at
+    /// the target position. For bare-codestream files this is `u64::MAX`.
+    ///
+    /// The caller must provide raw file input starting from the target
+    /// frame's `seek_target.decode_start_file_offset`.
+    pub fn start_new_frame(&mut self, remaining_in_box: u64) {
+        self.box_parser.reset_for_codestream_seek(remaining_in_box);
+        self.codestream_parser.start_new_frame();
+    }
+
     #[cfg(test)]
     pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) {
         self.codestream_parser.set_use_simple_pipeline(u);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
index 50e6fe338613c..ecdb966d757f6 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
@@ -127,11 +127,23 @@ impl JxlDecoderInner {
             input,
             &self.options,
             buffers,
+            false,
         ))
     }

     /// Draws all the pixels we have data for.
-    pub fn flush_pixels(&mut self, _buffers: &mut [JxlOutputBuffer]) -> Result<()> {
-        todo!()
+    pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer]) -> Result<()> {
+        let mut input: &[u8] = &[];
+        match self.codestream_parser.process(
+            &mut self.box_parser,
+            &mut input,
+            &self.options,
+            Some(buffers),
+            true,
+        ) {
+            Ok(()) => Ok(()),
+            Err(crate::error::Error::OutOfBounds(_)) => Ok(()),
+            Err(e) => Err(e),
+        }
     }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
index 5be3ef129622e..18c4b430f8dab 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
@@ -12,6 +12,7 @@ mod inner;
 mod input;
 mod options;
 mod signature;
+mod xyb_constants;

 pub use crate::image::JxlOutputBuffer;
 pub use color::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
index 2bff60cda75ba..327a4456dbe70 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
@@ -22,7 +22,6 @@ pub struct JxlDecoderOptions {
     pub desired_intensity_target: Option<f32>,
     pub skip_preview: bool,
     pub progressive_mode: JxlProgressiveMode,
-    pub enable_output: bool,
     pub cms: Option<Box<dyn JxlCms>>,
     /// Fail decoding images with more than this number of pixels, or with frames with
     /// more than this number of pixels. The limit counts the product of pixels and
@@ -40,6 +39,11 @@ pub struct JxlDecoderOptions {
     /// This produces premultiplied alpha output, which is useful for compositing.
     /// Default: false (output straight alpha)
     pub premultiply_output: bool,
+    /// If true, only parse frame headers/TOC and skip section decoding.
+    ///
+    /// This is useful for collecting [`VisibleFrameInfo`](crate::api::VisibleFrameInfo)
+    /// via the regular decoder API without producing pixels.
+    pub scan_frames_only: bool,
 }

 impl Default for JxlDecoderOptions {
@@ -51,11 +55,11 @@ impl Default for JxlDecoderOptions {
             skip_preview: true,
             desired_intensity_target: None,
             progressive_mode: JxlProgressiveMode::Pass,
-            enable_output: true,
             cms: None,
             pixel_limit: None,
             high_precision: false,
             premultiply_output: false,
+            scan_frames_only: false,
         }
     }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs
new file mode 100644
index 0000000000000..eb9356b228a56
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//! XYB color space constants (matching libjxl)
+//!
+//! Allow excessive precision as these constants are copied verbatim from libjxl for compatibility
+
+#![allow(clippy::excessive_precision)]
+
+pub const OPSIN_ABSORBANCE_BIAS: f32 = 0.0037930732552754493;
+
+#[allow(dead_code)]
+pub const NEG_OPSIN_ABSORBANCE_BIAS_RGB: [f32; 3] = [
+    -OPSIN_ABSORBANCE_BIAS,
+    -OPSIN_ABSORBANCE_BIAS,
+    -OPSIN_ABSORBANCE_BIAS,
+];
+
+const SCALED_XYB_OFFSET: [f32; 3] = [0.015386134, 0.0, 0.27770459];
+const SCALED_XYB_SCALE: [f32; 3] = [22.995788804, 1.183000077, 1.502141333];
+
+const fn reciprocal_sum(r1: f32, r2: f32) -> f32 {
+    (r1 * r2) / (r1 + r2)
+}
+
+pub const XYB_OFFSET: [f32; 3] = [
+    SCALED_XYB_OFFSET[0] + SCALED_XYB_OFFSET[1],
+    SCALED_XYB_OFFSET[1] - SCALED_XYB_OFFSET[0] + (1.0 / SCALED_XYB_SCALE[0]),
+    SCALED_XYB_OFFSET[1] + SCALED_XYB_OFFSET[2],
+];
+
+pub const fn xyb_scale() -> [f32; 3] {
+    [
+        reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]),
+        reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]),
+        reciprocal_sum(SCALED_XYB_SCALE[1], SCALED_XYB_SCALE[2]),
+    ]
+}
+
+const fn xyb_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 {
+    let val = match idx {
+        0 => x,
+        1 => y,
+        _ => b,
+    };
+    (val as f32 / SCALED_XYB_SCALE[idx]) - SCALED_XYB_OFFSET[idx]
+}
+
+const fn scaled_a2b_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 {
+    match idx {
+        0 => xyb_corner(x, y, b, 1) + xyb_corner(x, y, b, 0),
+        1 => xyb_corner(x, y, b, 1) - xyb_corner(x, y, b, 0),
+        _ => xyb_corner(x, y, b, 2) + xyb_corner(x, y, b, 1),
+    }
+}
+
+const fn unscaled_a2b_corner(x: usize, y: usize, b: usize) -> [f32; 3] {
+    let scale = xyb_scale();
+    [
+        (scaled_a2b_corner(x, y, b, 0) + XYB_OFFSET[0]) * scale[0],
+        (scaled_a2b_corner(x, y, b, 1) + XYB_OFFSET[1]) * scale[1],
+        (scaled_a2b_corner(x, y, b, 2) + XYB_OFFSET[2]) * scale[2],
+    ]
+}
+
+/// Compute the 2x2x2 CLUT cube for XYB to linear RGB conversion.
+pub const fn unscaled_a2b_cube_full() -> [[[[f32; 3]; 2]; 2]; 2] {
+    [
+        [
+            [unscaled_a2b_corner(0, 0, 0), unscaled_a2b_corner(0, 0, 1)],
+            [unscaled_a2b_corner(0, 1, 0), unscaled_a2b_corner(0, 1, 1)],
+        ],
+        [
+            [unscaled_a2b_corner(1, 0, 0), unscaled_a2b_corner(1, 0, 1)],
+            [unscaled_a2b_corner(1, 1, 0), unscaled_a2b_corner(1, 1, 1)],
+        ],
+    ]
+}
+
+/// Matrix for XYB ICC profile (from libjxl).
+pub const XYB_ICC_MATRIX: [f64; 9] = [
+    1.5170095, -1.1065225, 0.071623, -0.050022, 0.5683655, -0.018344, -1.387676, 1.1145555,
+    0.6857255,
+];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs
new file mode 100644
index 0000000000000..8a81b02864676
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs
@@ -0,0 +1,236 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//! Parser for the JPEG XL Frame Index box (`jxli`), as specified in
+//! the JPEG XL container specification.
+//!
+//! The frame index box provides a seek table for animated JXL files,
+//! listing keyframe byte offsets in the codestream, timestamps, and
+//! frame counts.
+
+use std::num::NonZero;
+
+use byteorder::{BigEndian, ReadBytesExt};
+
+use crate::error::{Error, Result};
+use crate::icc::read_varint_from_reader;
+use crate::util::NewWithCapacity;
+
+/// A single entry in the frame index.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FrameIndexEntry {
+    /// Absolute byte offset of this keyframe in the codestream.
+    /// (Accumulated from the delta-coded OFFi values.)
+    pub codestream_offset: u64,
+    /// Duration in ticks from this indexed frame to the next indexed frame
+    /// (or end of stream for the last entry). A tick lasts TNUM/TDEN seconds.
+    pub duration_ticks: u64,
+    /// Number of displayed frames from this indexed frame to the next indexed
+    /// frame (or end of stream for the last entry).
+    pub frame_count: u64,
+}
+
+/// Parsed contents of a Frame Index box (`jxli`).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FrameIndexBox {
+    /// Tick numerator. A tick lasts `tnum / tden` seconds.
+    pub tnum: u32,
+    /// Tick denominator (non-zero per spec).
+    pub tden: NonZero<u32>,
+    /// Indexed frame entries.
+    pub entries: Vec<FrameIndexEntry>,
+}
+
+impl FrameIndexBox {
+    /// Returns the number of indexed frames.
+    pub fn num_frames(&self) -> usize {
+        self.entries.len()
+    }
+
+    /// Returns the duration of one tick in seconds.
+    pub fn tick_duration_secs(&self) -> f64 {
+        self.tnum as f64 / self.tden.get() as f64
+    }
+
+    /// Finds the index entry for the keyframe at or before the given
+    /// codestream byte offset.
+    pub fn entry_for_offset(&self, offset: u64) -> Option<&FrameIndexEntry> {
+        // Entries are sorted by codestream_offset (monotonically increasing).
+        match self
+            .entries
+            .binary_search_by_key(&offset, |e| e.codestream_offset)
+        {
+            Ok(i) => Some(&self.entries[i]),
+            Err(0) => None,
+            Err(i) => Some(&self.entries[i - 1]),
+        }
+    }
+
+    /// Parse a frame index box from its raw content bytes (after the box header).
+    pub fn parse(data: &[u8]) -> Result<Self> {
+        let mut reader = data;
+
+        let nf = read_varint_from_reader(&mut reader)?;
+        if nf > u32::MAX as u64 {
+            return Err(Error::InvalidBox);
+        }
+        let nf = nf as usize;
+
+        let tnum = reader
+            .read_u32::<BigEndian>()
+            .map_err(|_| Error::InvalidBox)?;
+        let tden = NonZero::new(
+            reader
+                .read_u32::<BigEndian>()
+                .map_err(|_| Error::InvalidBox)?,
+        )
+        .ok_or(Error::InvalidBox)?;
+
+        // Each entry requires at least 3 bytes (three varints, min 1 byte each).
+        // Cap the pre-allocation to avoid OOM from a crafted NF value.
+        // Use new_with_capacity to return Err on allocation failure instead of aborting.
+        let mut entries = Vec::new_with_capacity(nf.min(reader.len() / 3))?;
+        let mut absolute_offset: u64 = 0;
+
+        for _ in 0..nf {
+            let off_delta = read_varint_from_reader(&mut reader)?;
+            let duration_ticks = read_varint_from_reader(&mut reader)?;
+            let frame_count = read_varint_from_reader(&mut reader)?;
+
+            absolute_offset = absolute_offset
+                .checked_add(off_delta)
+                .ok_or(Error::InvalidBox)?;
+
+            entries.push(FrameIndexEntry {
+                codestream_offset: absolute_offset,
+                duration_ticks,
+                frame_count,
+            });
+        }
+
+        Ok(FrameIndexBox {
+            tnum,
+            tden,
+            entries,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::util::test::{build_frame_index_content, encode_varint};
+
+    fn build_frame_index(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> {
+        build_frame_index_content(tnum, tden, entries)
+    }
+
+    #[test]
+    fn test_parse_empty_index() {
+        let data = build_frame_index(1, 1000, &[]);
+        let index = FrameIndexBox::parse(&data).unwrap();
+        assert_eq!(index.num_frames(), 0);
+        assert_eq!(index.tnum, 1);
+        assert_eq!(index.tden.get(), 1000);
+    }
+
+    #[test]
+    fn test_parse_single_entry() {
+        // One frame at offset 0, duration 100 ticks, 1 frame
+        let data = build_frame_index(1, 1000, &[(0, 100, 1)]);
+        let index = FrameIndexBox::parse(&data).unwrap();
+        assert_eq!(index.num_frames(), 1);
+        assert_eq!(
+            index.entries[0],
+            FrameIndexEntry {
+                codestream_offset: 0,
+                duration_ticks: 100,
+                frame_count: 1,
+            }
+        );
+    }
+
+    #[test]
+    fn test_parse_multiple_entries_delta_coding() {
+        // Three frames with delta-coded offsets:
+        //   OFF0=100 (absolute: 100), T0=50, F0=2
+        //   OFF1=200 (absolute: 300), T1=50, F1=2
+        //   OFF2=150 (absolute: 450), T2=30, F2=1
+        let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]);
+        let index = FrameIndexBox::parse(&data).unwrap();
+        assert_eq!(index.num_frames(), 3);
+        assert_eq!(index.entries[0].codestream_offset, 100);
+        assert_eq!(index.entries[1].codestream_offset, 300);
+        assert_eq!(index.entries[2].codestream_offset, 450);
+        assert_eq!(index.entries[0].duration_ticks, 50);
+        assert_eq!(index.entries[1].duration_ticks, 50);
+        assert_eq!(index.entries[2].duration_ticks, 30);
+    }
+
+    #[test]
+    fn test_parse_large_varint() {
+        // Test with a value that requires multiple varint bytes
+        let mut data = Vec::new();
+        data.extend(encode_varint(1)); // NF = 1
+        data.extend(1u32.to_be_bytes()); // TNUM
+        data.extend(1000u32.to_be_bytes()); // TDEN
+        data.extend(encode_varint(0x1234_5678_9ABC)); // large offset
+        data.extend(encode_varint(42));
+        data.extend(encode_varint(1));
+        let index = FrameIndexBox::parse(&data).unwrap();
+        assert_eq!(index.entries[0].codestream_offset, 0x1234_5678_9ABC);
+    }
+
+    #[test]
+    fn test_entry_for_offset() {
+        let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]);
+        let index = FrameIndexBox::parse(&data).unwrap();
+        // Absolute offsets: 100, 300, 450
+
+        // Before first entry
+        assert!(index.entry_for_offset(50).is_none());
+        // Exact match
+        assert_eq!(index.entry_for_offset(100).unwrap().codestream_offset, 100);
+        // Between entries
+        assert_eq!(index.entry_for_offset(200).unwrap().codestream_offset, 100);
+        assert_eq!(index.entry_for_offset(350).unwrap().codestream_offset, 300);
+        // Exact match on last
+        assert_eq!(index.entry_for_offset(450).unwrap().codestream_offset, 450);
+        // Past last
+        assert_eq!(index.entry_for_offset(999).unwrap().codestream_offset, 450);
+    }
+
+    #[test]
+    fn test_zero_tden_rejected() {
+        let data = build_frame_index(1, 0, &[]);
+        assert!(FrameIndexBox::parse(&data).is_err());
+    }
+
+    #[test]
+    fn test_truncated_data() {
+        // Just NF=1, no TNUM/TDEN
+        let data = encode_varint(1);
+        assert!(FrameIndexBox::parse(&data).is_err());
+    }
+
+    #[test]
+    fn test_huge_nf_no_oom() {
+        // Crafted input: NF claims billions of entries but the data is tiny.
+        // This must not OOM -- Vec::with_capacity should be bounded by data length.
+        let mut data = Vec::new();
+        data.extend(encode_varint(u32::MAX as u64)); // NF = 4 billion
+        data.extend(1u32.to_be_bytes()); // TNUM
+        data.extend(1000u32.to_be_bytes()); // TDEN
+        // No actual entry data -- parse should fail gracefully, not OOM.
+        assert!(FrameIndexBox::parse(&data).is_err());
+    }
+
+    #[test]
+    fn test_tick_duration() {
+        let data = build_frame_index(1, 1000, &[]);
+        let index = FrameIndexBox::parse(&data).unwrap();
+        assert!((index.tick_duration_secs() - 0.001).abs() < 1e-9);
+    }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
index c6e9e505076b6..65f91b5e57ae0 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
@@ -6,6 +6,7 @@
 // Originally written for jxl-oxide.

 pub mod box_header;
+pub mod frame_index;
 pub mod parse;

 use box_header::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
index c6b95d1a8ef1f..9da5cbee0388f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
@@ -15,6 +15,11 @@ const RLE_MARKER_SYM: u16 = LOG_SUM_PROBS as u16 + 1;

 #[derive(Debug)]
 struct AnsHistogram {
+    // Safety invariant:
+    // - log_bucket_size <= LOG_SUM_PROBS
+    // - buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size)
+    // This relationship ensures that for any ANS state (12 bits), the bucket index
+    // computed as (state & 0xfff) >> log_bucket_size is always < buckets.len()
     buckets: Vec<Bucket>,
     log_bucket_size: usize,
     bucket_mask: u32,
@@ -265,7 +270,7 @@ impl AnsHistogram {
         debug_assert!((5..=8).contains(&log_alpha_size));
         let table_size = (1u16 << log_alpha_size) as usize;
         // 4 <= log_bucket_size <= 7
-        let log_bucket_size = LOG_SUM_PROBS - log_alpha_size;
+        let log_bucket_size = LOG_SUM_PROBS.checked_sub(log_alpha_size).unwrap();
         let bucket_size = 1u16 << log_bucket_size;
         let bucket_mask = bucket_size as u32 - 1;

@@ -281,10 +286,9 @@ impl AnsHistogram {
         } else {
             Self::decode_dist_complex(br, &mut dist)?
         };
-
-        if let Some(single_sym_idx) = dist.iter().position(|&d| d == SUM_PROBS) {
-            let buckets = dist
-                .into_iter()
+        let single_symbol = dist.iter().position(|&d| d == SUM_PROBS).map(|x| x as u32);
+        let buckets = if let Some(single_sym_idx) = single_symbol {
+            dist.into_iter()
                 .enumerate()
                 .map(|(i, dist)| Bucket {
                     dist,
@@ -293,20 +297,19 @@ impl AnsHistogram {
                     alias_cutoff: 0,
                     alias_dist_xor: dist ^ SUM_PROBS,
                 })
-                .collect();
-            return Ok(Self {
-                buckets,
-                log_bucket_size,
-                bucket_mask,
-                single_symbol: Some(single_sym_idx as u32),
-            });
-        }
+                .collect()
+        } else {
+            Self::build_alias_map(alphabet_size, log_bucket_size, &dist)
+        };

+        assert_eq!(buckets.len(), 1 << (LOG_SUM_PROBS - log_bucket_size));
+        // Safety note: log_bucket_size <= LOG_SUM_PROBS by construction, and we
+        // just checked that buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size)
         Ok(Self {
-            buckets: Self::build_alias_map(alphabet_size, log_bucket_size, &dist),
+            buckets,
             log_bucket_size,
             bucket_mask,
-            single_symbol: None,
+            single_symbol,
         })
     }

@@ -356,7 +359,19 @@ impl AnsHistogram {
         let pos = idx & self.bucket_mask;

         debug_assert!(self.buckets.len().is_power_of_two());
-        let bucket = self.buckets[i & (self.buckets.len() - 1)];
+        debug_assert!(
+            i < self.buckets.len(),
+            "bucket index {} out of bounds (len = {})",
+            i,
+            self.buckets.len()
+        );
+        // SAFETY: The struct-level safety invariant (see AnsHistogram::buckets) ensures that
+        // buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size). Since idx = state & 0xfff
+        // (12 bits) and i = idx >> log_bucket_size, we have i < buckets.len() always.
+        #[allow(unsafe_code)]
+        let bucket = unsafe { *self.buckets.get_unchecked(i) };
+        // Safe version: (~3% slower for e2 lossless decoding)
+        // let bucket = self.buckets[i & (self.buckets.len() - 1)];
         let alias_symbol = bucket.alias_symbol as u32;
         let alias_cutoff = bucket.alias_cutoff as u32;
         let dist = bucket.dist as u32;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
index dd23c6247e226..e57a3211aafea 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
@@ -12,6 +12,7 @@ use crate::entropy_coding::huffman::*;
 use crate::entropy_coding::hybrid_uint::*;
 use crate::error::{Error, Result};
 use crate::headers::encodings::*;
+use crate::util::NewWithCapacity;
 use crate::util::tracing_wrappers::*;

 pub fn decode_varint16(br: &mut BitReader) -> Result<u16> {
@@ -259,7 +260,7 @@ impl SymbolReader {
                     min_symbol,
                     min_length,
                     dist_multiplier,
-                    window: Vec::new(),
+                    window: Vec::new_with_capacity(1 << Lz77State::LOG_WINDOW_SIZE)?,
                     num_to_copy: 0,
                     copy_pos: 0,
                     num_decoded: 0,
@@ -278,30 +279,50 @@ impl SymbolReader {
 }

 impl SymbolReader {
-    #[inline]
-    pub fn read_unsigned(
+    #[inline(always)]
+    pub fn read_unsigned_inline(
         &mut self,
         histograms: &Histograms,
         br: &mut BitReader,
         context: usize,
     ) -> u32 {
         let cluster = histograms.map_context_to_cluster(context);
-        self.read_unsigned_clustered(histograms, br, cluster)
+        self.read_unsigned_clustered_inline(histograms, br, cluster)
+    }
+
+    #[inline(never)]
+    pub fn read_unsigned(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        context: usize,
+    ) -> u32 {
+        self.read_unsigned_inline(histograms, br, context)
     }

     #[inline(always)]
-    pub fn read_signed(
+    pub fn read_signed_inline(
         &mut self,
         histograms: &Histograms,
         br: &mut BitReader,
         context: usize,
     ) -> i32 {
-        let unsigned = self.read_unsigned(histograms, br, context);
+        let unsigned = self.read_unsigned_inline(histograms, br, context);
         unpack_signed(unsigned)
     }

-    #[inline]
-    pub fn read_unsigned_clustered(
+    #[inline(never)]
+    pub fn read_signed(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        context: usize,
+    ) -> i32 {
+        self.read_signed_inline(histograms, br, context)
+    }
+
+    #[inline(always)]
+    pub fn read_unsigned_clustered_inline(
         &mut self,
         histograms: &Histograms,
         br: &mut BitReader,
@@ -382,14 +403,69 @@ impl SymbolReader {
         }
     }

+    #[inline(never)]
+    pub fn read_unsigned_clustered(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        cluster: usize,
+    ) -> u32 {
+        self.read_unsigned_clustered_inline(histograms, br, cluster)
+    }
+
     #[inline(always)]
+    pub fn read_signed_clustered_inline(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        cluster: usize,
+    ) -> i32 {
+        let unsigned = self.read_unsigned_clustered_inline(histograms, br, cluster);
+        unpack_signed(unsigned)
+    }
+
+    #[inline(never)]
     pub fn read_signed_clustered(
         &mut self,
         histograms: &Histograms,
         br: &mut BitReader,
         cluster: usize,
     ) -> i32 {
-        let unsigned = self.read_unsigned_clustered(histograms, br, cluster);
+        self.read_signed_clustered_inline(histograms, br, cluster)
+    }
+
+    /// Specialized fast path for when all HybridUint configs are 420.
+    ///
+    /// # Preconditions
+    /// - `histograms.can_use_config_420_fast_path()` must be true (no LZ77, all configs are 420)
+    /// - This assumes `SymbolReaderState::None` (verified by debug_assert)
+    #[inline(always)]
+    pub fn read_unsigned_clustered_config_420(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        cluster: usize,
+    ) -> u32 {
+        debug_assert!(matches!(self.state, SymbolReaderState::None));
+        debug_assert!(histograms.can_use_config_420_fast_path());
+
+        let token = match &histograms.codes {
+            Codes::Huffman(hc) => hc.read(br, cluster),
+            Codes::Ans(ans) => self.ans_reader.read(ans, br, cluster),
+        };
+        HybridUint::read_config_420(token, br)
+    }
+
+    /// Specialized fast path for signed reads when all configs are 420.
+    /// See [`read_unsigned_clustered_config_420`] for preconditions.
+    #[inline(always)]
+    pub fn read_signed_clustered_config_420(
+        &mut self,
+        histograms: &Histograms,
+        br: &mut BitReader,
+        cluster: usize,
+    ) -> i32 {
+        let unsigned = self.read_unsigned_clustered_config_420(histograms, br, cluster);
         unpack_signed(unsigned)
     }

@@ -553,6 +629,17 @@ impl Histograms {
     pub fn num_histograms(&self) -> usize {
         *self.context_map.iter().max().unwrap() as usize + 1
     }
+
+    pub fn resize(&mut self, num_contexts: usize) {
+        self.context_map.resize(num_contexts, 0);
+    }
+
+    /// Returns true if the config 420 fast path can be safely used.
+    /// Config 420: split_exponent=4, msb_in_token=2, lsb_in_token=0 (common pattern)
+    /// Requires: all configs are 420 AND LZ77 is disabled
+    pub fn can_use_config_420_fast_path(&self) -> bool {
+        !self.lz77_params.enabled && self.uint_configs.iter().all(|cfg| cfg.is_config_420())
+    }
 }

 #[cfg(test)]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
index fc6e7f6db4fba..447bca94abefa 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
@@ -53,6 +53,33 @@ impl HybridUint {
         })
     }

+    /// Returns true if this config matches the 420 pattern (common in e3 images):
+    /// split_exponent=4, msb_in_token=2, lsb_in_token=0
+    #[inline(always)]
+    pub fn is_config_420(&self) -> bool {
+        self.split_exponent == 4
+            && self.split_token == 16
+            && self.msb_in_token == 2
+            && self.lsb_in_token == 0
+    }
+
+    /// Specialized fast path for 420 config:
+    /// split_exponent=4, msb_in_token=2, lsb_in_token=0
+    #[inline(always)]
+    pub fn read_config_420(symbol: u32, br: &mut BitReader) -> u32 {
+        if symbol < 16 {
+            return symbol;
+        }
+
+        // Equivalent to: 2 + ((symbol - 16) >> 2)
+        let nbits = (symbol >> 2) - 2;
+        let nbits = nbits & 31;
+        let bits = br.read_optimistic(nbits as usize) as u32;
+        let hi = (symbol & 3) | 4;
+
+        (hi << nbits) | bits
+    }
+
     #[inline]
     pub fn read(&self, symbol: u32, br: &mut BitReader) -> u32 {
         if symbol < self.split_token {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
index c21679bf8c844..a8e299c70f73e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
@@ -133,16 +133,12 @@ pub enum Error {
     // Generic arithmetic overflow. Prefer using other errors if possible.
     #[error("Arithmetic overflow")]
     ArithmeticOverflow,
-    #[error("Empty frame sequence")]
-    NoFrames,
     #[error(
         "Pipeline channel type mismatch: stage {0} channel {1}, expected {2:?} but found {3:?}"
     )]
     PipelineChannelTypeMismatch(String, usize, DataTypeTag, DataTypeTag),
     #[error("Invalid stage {0} after extend stage")]
     PipelineInvalidStageAfterExtend(String),
-    #[error("Channel {0} was not used in the render pipeline")]
-    PipelineChannelUnused(usize),
     #[error("Trying to copy rects of different size, src: {0}x{1} dst {2}x{3}")]
     CopyOfDifferentSize(usize, usize, usize, usize),
     #[error("LF quantization factor is too small: {0}")]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
index c28bf701bd2b2..55dcedc8153ad 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
@@ -25,6 +25,15 @@ pub enum SigmaSource {
     Constant(f32),
 }

+#[allow(clippy::excessive_precision)]
+const INV_SIGMA_NUM: f32 = -1.1715728752538099024;
+
+impl Default for SigmaSource {
+    fn default() -> Self {
+        Self::Constant(INV_SIGMA_NUM / 2.0)
+    }
+}
+
 impl SigmaSource {
     pub fn new(
         frame_header: &FrameHeader,
@@ -32,9 +41,6 @@ impl SigmaSource {
         hf_meta: &Option<HfMetadata>,
     ) -> Result<Self> {
         let rf = &frame_header.restoration_filter;
-        #[allow(clippy::excessive_precision)]
-        const INV_SIGMA_NUM: f32 = -1.1715728752538099024;
-
         if frame_header.encoding == Encoding::VarDCT {
             let size_blocks = frame_header.size_blocks();
             let sigma_xsize = size_blocks.0;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
index b73a2d7306ec2..d981ef3d96bbd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
@@ -172,6 +172,13 @@ pub struct PatchesDictionary {
 }

 impl PatchesDictionary {
+    pub fn new(num_extra_channels: usize) -> Self {
+        Self {
+            blendings_stride: num_extra_channels + 1,
+            ..Default::default()
+        }
+    }
+
     #[cfg(test)]
     pub fn random<R: rand::Rng>(
         size: (usize, usize),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
index b4a19b95e49e4..3e947d6cf23a3 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
@@ -14,8 +14,9 @@ use crate::{
     entropy_coding::decode::{Histograms, SymbolReader, unpack_signed},
     error::{Error, Result},
     frame::color_correlation_map::ColorCorrelationParams,
-    util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff, tracing_wrappers::*},
+    util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff_simd, tracing_wrappers::*},
 };
+use jxl_simd::{F32SimdVec, ScalarDescriptor, SimdDescriptor, simd_function};
 const MAX_NUM_CONTROL_POINTS: u32 = 1 << 20;
 const MAX_NUM_CONTROL_POINTS_PER_PIXEL_RATIO: u32 = 2;
 const DELTA_LIMIT: i64 = 1 << 30;
@@ -520,7 +521,111 @@ impl Dct32 {
     }
 }

+#[inline(always)]
+fn draw_segment_inner<D: SimdDescriptor>(
+    d: D,
+    row: &mut [&mut [f32]],
+    row_pos: (usize, usize),
+    x_range: (usize, usize),
+    segment: &SplineSegment,
+) -> usize {
+    let (x_start, x_end) = x_range;
+    let (row_x0, y) = row_pos;
+    let len = D::F32Vec::LEN;
+    if x_start + len > x_end {
+        return x_start;
+    }
+
+    let inv_sigma = D::F32Vec::splat(d, segment.inv_sigma);
+    let half = D::F32Vec::splat(d, 0.5);
+    let one_over_2s2 = D::F32Vec::splat(d, 0.353_553_38);
+    let sigma_over_4_times_intensity = D::F32Vec::splat(d, segment.sigma_over_4_times_intensity);
+    let center_x = D::F32Vec::splat(d, segment.center_x);
+    let center_y = D::F32Vec::splat(d, segment.center_y);
+    let dy = D::F32Vec::splat(d, y as f32) - center_y;
+    let dy2 = dy * dy;
+
+    let mut x_base_arr = [0.0f32; 16];
+    for (i, val) in x_base_arr.iter_mut().enumerate() {
+        *val = i as f32;
+    }
+    let vx_base = D::F32Vec::load(d, &x_base_arr);
+
+    let start_offset = x_start - row_x0;
+    let end_offset = x_end - row_x0;
+
+    let [r0, r1, r2] = row else { unreachable!() };
+
+    let mut it0 = r0[start_offset..end_offset].chunks_exact_mut(len);
+    let mut it1 = r1[start_offset..end_offset].chunks_exact_mut(len);
+    let mut it2 = r2[start_offset..end_offset].chunks_exact_mut(len);
+
+    let cm0 = D::F32Vec::splat(d, segment.color[0]);
+    let cm1 = D::F32Vec::splat(d, segment.color[1]);
+    let cm2 = D::F32Vec::splat(d, segment.color[2]);
+
+    let num_chunks = (end_offset - start_offset) / len;
+    let mut x = x_start;
+    for _ in 0..num_chunks {
+        let vx = D::F32Vec::splat(d, x as f32) + vx_base;
+        let dx = vx - center_x;
+        let sqd = dx.mul_add(dx, dy2);
+        let distance = sqd.sqrt();
+
+        let arg1 = distance.mul_add(half, one_over_2s2) * inv_sigma;
+        let arg2 = distance.mul_add(half, D::F32Vec::splat(d, -0.353_553_38)) * inv_sigma;
+        let one_dimensional_factor = fast_erff_simd(d, arg1) - fast_erff_simd(d, arg2);
+        let local_intensity =
+            sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor;
+
+        let c0 = it0.next().unwrap();
+        cm0.mul_add(local_intensity, D::F32Vec::load(d, c0))
+            .store(c0);
+        let c1 = it1.next().unwrap();
+        cm1.mul_add(local_intensity, D::F32Vec::load(d, c1))
+            .store(c1);
+        let c2 = it2.next().unwrap();
+        cm2.mul_add(local_intensity, D::F32Vec::load(d, c2))
+            .store(c2);
+
+        x += len;
+    }
+    x
+}
+
+simd_function!(
+    draw_segment_dispatch,
+    d: D,
+    fn draw_segment_simd(
+        row: &mut [&mut [f32]],
+        row_pos: (usize, usize),
+        xsize: usize,
+        segment: &SplineSegment,
+    ) {
+        let (x0, y) = row_pos;
+        let x1 = x0 + xsize;
+        let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize);
+        let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1);
+
+        if clamped_x1 <= clamped_x0 {
+            return;
+        }
+
+        let x = clamped_x0;
+        let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+        let d = d.maybe_downgrade_256bit();
+        let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+        let d = d.maybe_downgrade_128bit();
+        let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+        draw_segment_inner(ScalarDescriptor, row, (x0, y), (x, clamped_x1), segment);
+    }
+);
+
 impl Splines {
+    pub fn is_initialized(&self) -> bool {
+        !self.segment_y_start.is_empty()
+    }
+
     #[cfg(test)]
     pub fn create(
         quantization_adjustment: i32,
@@ -540,7 +645,7 @@ impl Splines {
         let first_segment_index_pos = self.segment_y_start[row_pos.1];
         let last_segment_index_pos = self.segment_y_start[row_pos.1 + 1];
         for segment_index_pos in first_segment_index_pos..last_segment_index_pos {
-            self.draw_segment(
+            draw_segment_dispatch(
                 row,
                 row_pos,
                 xsize,
@@ -548,48 +653,6 @@ impl Splines {
             );
         }
     }
-    fn draw_segment(
-        &self,
-        row: &mut [&mut [f32]],
-        row_pos: (usize, usize),
-        xsize: usize,
-        segment: &SplineSegment,
-    ) {
-        let (x0, y) = row_pos;
-        let x1 = x0 + xsize;
-        let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize);
-        // one-past-the-end
-        let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1);
-        for x in clamped_x0..clamped_x1 {
-            self.draw_segment_at(row, (x, y), x0, segment);
-        }
-    }
-    fn draw_segment_at(
-        &self,
-        row: &mut [&mut [f32]],
-        pixel_pos: (usize, usize),
-        row_x0: usize,
-        segment: &SplineSegment,
-    ) {
-        let (x, y) = pixel_pos;
-        let inv_sigma = segment.inv_sigma;
-        let half = 0.5f32;
-        let one_over_2s2 = 0.353_553_38_f32;
-        let sigma_over_4_times_intensity = segment.sigma_over_4_times_intensity;
-        let dx = x as f32 - segment.center_x;
-        let dy = y as f32 - segment.center_y;
-        let sqd = dx * dx + dy * dy;
-        let distance = sqd.sqrt();
-        let one_dimensional_factor = fast_erff((distance * half + one_over_2s2) * inv_sigma)
-            - fast_erff((distance * half - one_over_2s2) * inv_sigma);
-        let local_intensity =
-            sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor;
-        for (channel_index, row) in row.iter_mut().enumerate() {
-            let cm = segment.color[channel_index];
-            let inp = row[x - row_x0];
-            row[x - row_x0] = cm * local_intensity + inp;
-        }
-    }

     fn add_segment(
         &mut self,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
index c48e1e22ddc70..9051f59650354 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
@@ -12,7 +12,11 @@ use crate::{
 };

 pub const NON_ZERO_BUCKETS: usize = 37;
+
+// Supremum of zero_density_context(x, y) + 1, when x + y <= 64.
 pub const ZERO_DENSITY_CONTEXT_COUNT: usize = 458;
+// Supremum of zero_density_context(x, y) + 1.
+pub const ZERO_DENSITY_CONTEXT_LIMIT: usize = 474;

 pub const COEFF_FREQ_CONTEXT: [usize; 64] = [
     0xBAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19,
@@ -27,7 +31,7 @@ pub const COEFF_NUM_NONZERO_CONTEXT: [usize; 64] = [
     206, 206, 206, 206, 206, 206,
 ];

-#[inline]
+#[inline(always)]
 pub fn zero_density_context(
     nonzeros_left: usize,
     k: usize,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
index f2435eb5ddf48..f58b1044de7cd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
@@ -3,6 +3,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::collections::BTreeSet;
 use std::sync::Arc;

 use super::render::pipeline;
@@ -16,9 +17,14 @@ use super::{
     quantizer::{LfQuantFactors, QuantizerParams},
 };
 use crate::error::Error;
+use crate::features::epf::SigmaSource;
+use crate::frame::block_context_map::{ZERO_DENSITY_CONTEXT_COUNT, ZERO_DENSITY_CONTEXT_LIMIT};
+use crate::headers::frame_header::FrameType;
 #[cfg(test)]
 use crate::render::SimpleRenderPipeline;
 use crate::render::buffer_splitter::BufferSplitter;
+use crate::util::AtomicRefCell;
+use crate::util::{ShiftRightCeil, mirror};
 use crate::{
     GROUP_DIM,
     bit_reader::BitReader,
@@ -39,6 +45,111 @@ use crate::{
 };
 use jxl_transforms::transform_map::*;

+use crate::headers::CustomTransformData;
+use crate::render::RenderPipelineInOutStage;
+use crate::render::stages::Upsample8x;
+use crate::render::{Channels, ChannelsMut};
+
+fn upsample_lf_group(
+    group: usize,
+    pixels: &mut [Image<f32>; 3],
+    lf_image: &[Image<f32>; 3],
+    header: &FrameHeader,
+    factors: &CustomTransformData,
+) -> Result<()> {
+    let group_dim = header.group_dim();
+    let lf_group_dim = group_dim / 8;
+    let (width_groups, _) = header.size_groups();
+    let gx = group % width_groups;
+    let gy = group / width_groups;
+
+    let upsample = Upsample8x::new(factors, 0);
+    let mut state = upsample.init_local_state(0)?.unwrap();
+
+    let max_width = pixels.iter().map(|x| x.size().0).max().unwrap();
+
+    // Temporary buffer for 8 output rows
+    // We reuse this buffer for each iteration to minimize allocation
+    let mut temp_out_buf: [_; 8] = std::array::from_fn(|_| vec![0.0f32; max_width + 128]);
+
+    let mut input_rows_storage: [_; 5] = std::array::from_fn(|_| vec![0.0; max_width / 8 + 32]);
+
+    for c in 0..3 {
+        let lf_img = &lf_image[c];
+        let out_img = &mut pixels[c];
+        let (out_width, out_height) = out_img.size();
+
+        let vs = header.vshift(c);
+        let hs = header.hshift(c);
+
+        let lf_group_dim_x = lf_group_dim >> hs;
+        let lf_group_dim_y = lf_group_dim >> vs;
+        let lf_x0 = gx * lf_group_dim_x;
+        let lf_y0 = gy * lf_group_dim_y;
+
+        let lf_width = lf_img.size().0.shrc(hs);
+        let lf_height = lf_img.size().1.shrc(hs);
+
+        let start_x = lf_x0.saturating_sub(2);
+        let lf_x1 = (lf_x0 + lf_group_dim_x).min(lf_width);
+        let end_x = (lf_x1 + 2).min(lf_width);
+        let copy_width = end_x - start_x;
+
+        for y in 0..lf_group_dim_y {
+            let cy = lf_y0 + y;
+
+            for dy in -2..=2 {
+                let iy = cy as isize + dy;
+                let iy = mirror(iy, lf_height);
+
+                let storage = &mut input_rows_storage[(dy + 2) as usize];
+
+                let save_start = if start_x == lf_x0 { 2 } else { 0 };
+                let save_end = save_start + copy_width;
+
+                storage[save_start..save_end].copy_from_slice(&lf_img.row(iy)[start_x..end_x]);
+
+                if start_x == lf_x0 {
+                    storage[0] = storage[2 + mirror(-2, copy_width)];
+                    storage[1] = storage[2 + mirror(-1, copy_width)];
+                }
+                if end_x == lf_x1 {
+                    storage[save_end] = storage[save_start + mirror(save_end as isize, save_end)];
+                    storage[save_end + 1] =
+                        storage[save_start + mirror(save_end as isize + 1, save_end)];
+                }
+            }
+
+            let input_rows_refs = input_rows_storage.iter().map(|x| &x[..]).collect();
+            let input_channels = Channels::new(input_rows_refs, 1, 5);
+
+            {
+                // Prepare output refs
+                let output_rows_refs = temp_out_buf.iter_mut().map(|x| &mut x[..]).collect();
+                let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8);
+
+                upsample.process_row_chunk(
+                    (0, 0),
+                    lf_x1 - lf_x0,
+                    &input_channels,
+                    &mut output_channels,
+                    Some(state.as_mut()),
+                );
+            }
+
+            // Copy back to out_img
+            let base_y = y * 8;
+            for (i, buf) in temp_out_buf.iter().enumerate() {
+                let out_y = base_y + i;
+                if out_y < out_height {
+                    out_img.row_mut(out_y)[..out_width].copy_from_slice(&buf[..out_width]);
+                }
+            }
+        }
+    }
+    Ok(())
+}
+
 impl Frame {
     pub fn from_header_and_toc(
         frame_header: FrameHeader,
@@ -51,6 +162,9 @@ impl Frame {
         } else {
             decoder_state.nonvisible_frame_index += 1;
         }
+        if frame_header.frame_type == FrameType::LFFrame && frame_header.lf_level == 1 {
+            decoder_state.lf_frame_was_rendered = false;
+        }
         let image_metadata = &decoder_state.file_header.image_metadata;
         let is_gray = !frame_header.do_ycbcr
             && !image_metadata.xyb_encoded
@@ -124,9 +238,13 @@ impl Frame {
             None
         };

+        let num_extra_channels = image_metadata.extra_channel_info.len();
+
         Ok(Self {
             #[cfg(test)]
             use_simple_pipeline: decoder_state.use_simple_pipeline,
+            last_rendered_pass: vec![None; frame_header.num_groups()],
+            incomplete_groups: frame_header.num_groups(),
             header: frame_header,
             color_channels,
             toc,
@@ -139,10 +257,39 @@ impl Frame {
             render_pipeline: None,
             reference_frame_data,
             lf_frame_data,
-            lf_global_was_rendered: false,
+            was_flushed_once: false,
             vardct_buffers: None,
+            groups_to_flush: BTreeSet::new(),
+            changed_since_last_flush: BTreeSet::new(),
+            patches: Arc::new(AtomicRefCell::new(PatchesDictionary::new(
+                num_extra_channels,
+            ))),
+            splines: Arc::new(AtomicRefCell::new(Splines::default())),
+            noise: Arc::new(AtomicRefCell::new(Noise::default())),
+            lf_quant: Arc::new(AtomicRefCell::new(LfQuantFactors::default())),
+            color_correlation_params: Arc::new(AtomicRefCell::new(
+                ColorCorrelationParams::default(),
+            )),
+            epf_sigma: Arc::new(AtomicRefCell::new(SigmaSource::default())),
         })
     }
+
+    pub fn allow_rendering_before_last_pass(&self) -> bool {
+        if self
+            .lf_global
+            .as_ref()
+            .is_none_or(|x| !x.modular_global.can_do_partial_render())
+        {
+            return false;
+        }
+
+        self.header.frame_type == FrameType::RegularFrame
+            || (self.header.frame_type == FrameType::LFFrame
+                && self.header.lf_level == 1
+                // TODO(veluca): this should probably be "there is no alpha".
+                && self.header.num_extra_channels == 0)
+    }
+
     /// Given a bit reader pointing at the end of the TOC, returns a vector of `BitReader`s, each
     /// of which reads a specific section.
     pub fn sections<'a>(&self, br: &'a mut BitReader) -> Result<Vec<BitReader<'a>>> {
@@ -166,97 +313,109 @@ impl Frame {
         }
         Ok(shuffled_ret)
     }
+
     #[instrument(level = "debug", skip_all)]
-    pub fn decode_lf_global(&mut self, br: &mut BitReader) -> Result<()> {
+    pub fn decode_lf_global(&mut self, br: &mut BitReader, allow_partial: bool) -> Result<()> {
         debug!(section_size = br.total_bits_available());
-        assert!(self.lf_global.is_none());
-        trace!(pos = br.total_bits_read());

-        let patches = if self.header.has_patches() {
-            info!("decoding patches");
-            Some(PatchesDictionary::read(
-                br,
-                self.header.size_padded().0,
-                self.header.size_padded().1,
-                self.decoder_state.extra_channel_info().len(),
-                &self.decoder_state.reference_frames[..],
-            )?)
+        if let Some(lfg) = &self.lf_global {
+            br.skip_bits(lfg.total_bits_read)?;
         } else {
-            None
-        };
+            trace!(pos = br.total_bits_read());

-        let splines = if self.header.has_splines() {
-            info!("decoding splines");
-            Some(Splines::read(br, self.header.width * self.header.height)?)
-        } else {
-            None
-        };
+            if self.header.has_patches() {
+                info!("decoding patches");
+                let p = PatchesDictionary::read(
+                    br,
+                    self.header.size_padded().0,
+                    self.header.size_padded().1,
+                    self.decoder_state.extra_channel_info().len(),
+                    &self.decoder_state.reference_frames[..],
+                )?;
+                *self.patches.borrow_mut() = p;
+            }

-        let noise = if self.header.has_noise() {
-            info!("decoding noise");
-            Some(Noise::read(br)?)
-        } else {
-            None
-        };
+            if self.header.has_splines() {
+                info!("decoding splines");
+                let s = Splines::read(br, self.header.width * self.header.height)?;
+                *self.splines.borrow_mut() = s;
+            }

-        let lf_quant = LfQuantFactors::new(br)?;
-        debug!(?lf_quant);
+            if self.header.has_noise() {
+                info!("decoding noise");
+                let n = Noise::read(br)?;
+                *self.noise.borrow_mut() = n;
+            }

-        let quant_params = if self.header.encoding == Encoding::VarDCT {
-            info!("decoding VarDCT quantizer params");
-            Some(QuantizerParams::read(br)?)
-        } else {
-            None
-        };
-        debug!(?quant_params);
+            let lf_quant = LfQuantFactors::new(br)?;
+            *self.lf_quant.borrow_mut() = lf_quant.clone();
+            debug!(?lf_quant);

-        let block_context_map = if self.header.encoding == Encoding::VarDCT {
-            info!("decoding block context map");
-            Some(BlockContextMap::read(br)?)
-        } else {
-            None
-        };
-        debug!(?block_context_map);
+            let quant_params = if self.header.encoding == Encoding::VarDCT {
+                info!("decoding VarDCT quantizer params");
+                Some(QuantizerParams::read(br)?)
+            } else {
+                None
+            };
+            debug!(?quant_params);

-        let color_correlation_params = if self.header.encoding == Encoding::VarDCT {
-            info!("decoding color correlation params");
-            Some(ColorCorrelationParams::read(br)?)
-        } else {
-            None
-        };
-        debug!(?color_correlation_params);
-
-        let tree = if br.read(1)? == 1 {
-            let size_limit = (1024
-                + self.header.width as usize
-                    * self.header.height as usize
-                    * (self.color_channels + self.decoder_state.extra_channel_info().len())
-                    / 16)
-                .min(1 << 22);
-            Some(Tree::read(br, size_limit)?)
-        } else {
-            None
-        };
+            let block_context_map = if self.header.encoding == Encoding::VarDCT {
+                info!("decoding block context map");
+                Some(BlockContextMap::read(br)?)
+            } else {
+                None
+            };
+            debug!(?block_context_map);

-        let modular_global = FullModularImage::read(
-            &self.header,
-            &self.decoder_state.file_header.image_metadata,
-            self.modular_color_channels(),
-            &tree,
-            br,
-        )?;
+            let color_correlation_params = if self.header.encoding == Encoding::VarDCT {
+                info!("decoding color correlation params");
+                let ccp = ColorCorrelationParams::read(br)?;
+                *self.color_correlation_params.borrow_mut() = ccp;
+                Some(ccp)
+            } else {
+                None
+            };
+            debug!(?color_correlation_params);
+
+            let tree = if br.read(1)? == 1 {
+                let size_limit = (1024
+                    + self.header.width as usize
+                        * self.header.height as usize
+                        * (self.color_channels + self.decoder_state.extra_channel_info().len())
+                        / 16)
+                    .min(1 << 22);
+                Some(Tree::read(br, size_limit)?)
+            } else {
+                None
+            };

-        self.lf_global = Some(LfGlobalState {
-            patches: patches.map(Arc::new),
-            splines,
-            noise,
-            lf_quant,
-            quant_params,
-            block_context_map,
-            color_correlation_params,
-            tree,
-            modular_global,
-        });
+            let modular_global = FullModularImage::read(
+                &self.header,
+                &self.decoder_state.file_header.image_metadata,
+                self.modular_color_channels(),
+                br,
+            )?;
+
+            // Ensure that, if we call this function again, we resume from just after
+            // reading modular global data (excluding section 0 channels).
+            let total_bits_read = br.total_bits_read();
+
+            self.lf_global = Some(LfGlobalState {
+                lf_quant,
+                quant_params,
+                block_context_map,
+                color_correlation_params,
+                tree,
+                modular_global,
+                total_bits_read,
+            });
+        }
+
+        let lf_global = self.lf_global.as_mut().unwrap();
+
+        lf_global
+            .modular_global
+            .read_section0(&self.header, &lf_global.tree, br, allow_partial)?;

         Ok(())
     }
@@ -281,6 +440,9 @@ impl Frame {
                 br,
             )?;
         }
+
+        lf_global.modular_global.mark_group_to_be_read(1, group);
+
         lf_global.modular_global.read_stream(
             ModularStreamId::ModularLF(group),
             &self.header,
@@ -305,188 +467,305 @@ impl Frame {
     #[instrument(level = "debug", skip_all)]
     pub fn decode_hf_global(&mut self, br: &mut BitReader) -> Result<()> {
         debug!(section_size = br.total_bits_available());
-        if self.header.encoding == Encoding::Modular {
-            return Ok(());
-        }
-        let lf_global = self.lf_global.as_mut().unwrap();
-        let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?;
-        let block_context_map = lf_global.block_context_map.as_mut().unwrap();
-        let num_histo_bits = self.header.num_groups().ceil_log2();
-        let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1;
-        info!(
-            "Processing HFGlobal section with {} passes and {} histograms",
-            self.header.passes.num_passes, num_histograms
-        );
-        let mut passes: Vec<PassState> = vec![];
-        #[allow(unused_variables)]
-        for i in 0..self.header.passes.num_passes as usize {
-            let used_orders = match br.read(2)? {
-                0 => 0x5f,
-                1 => 0x13,
-                2 => 0,
-                _ => br.read(coeff_order::NUM_ORDERS)?,
-            } as u32;
-            debug!(used_orders);
-            let coeff_orders = decode_coeff_orders(used_orders, br)?;
-            assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS);
-            let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts();
+        if self.header.encoding == Encoding::VarDCT {
+            let lf_global = self.lf_global.as_mut().unwrap();
+            let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?;
+            let block_context_map = lf_global.block_context_map.as_mut().unwrap();
+            let num_histo_bits = self.header.num_groups().ceil_log2();
+            let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1;
             info!(
-                "Deconding histograms for pass {} with {} contexts",
-                i, num_contexts
+                "Processing HFGlobal section with {} passes and {} histograms",
+                self.header.passes.num_passes, num_histograms
             );
-            let histograms = Histograms::decode(num_contexts, br, true)?;
-            debug!("Found {} histograms", histograms.num_histograms());
-            passes.push(PassState {
-                coeff_orders,
-                histograms,
+            let mut passes: Vec<PassState> = vec![];
+            #[allow(unused_variables)]
+            for i in 0..self.header.passes.num_passes as usize {
+                let used_orders = match br.read(2)? {
+                    0 => 0x5f,
+                    1 => 0x13,
+                    2 => 0,
+                    _ => br.read(coeff_order::NUM_ORDERS)?,
+                } as u32;
+                debug!(used_orders);
+                let coeff_orders = decode_coeff_orders(used_orders, br)?;
+                assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS);
+                let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts();
+                info!(
+                    "Decoding histograms for pass {} with {} contexts",
+                    i, num_contexts
+                );
+                let mut histograms = Histograms::decode(num_contexts, br, true)?;
+                // Pad the context map to avoid index out of bounds in decode_vardct_group (group.rs#L514@752e6a4).
+                let padding = ZERO_DENSITY_CONTEXT_LIMIT - ZERO_DENSITY_CONTEXT_COUNT;
+                histograms.resize(num_contexts + padding);
+                debug!("Found {} histograms", histograms.num_histograms());
+                passes.push(PassState {
+                    coeff_orders,
+                    histograms,
+                });
+            }
+            // Note that, if we have extra channels that can be rendered progressively,
+            // we might end up re-drawing some VarDCT groups. In that case, we need to
+            // keep around the coefficients, so allocate coefficients under those conditions
+            // too.
+            // TODO(veluca): evaluate whether we can make this check more precise.
+            let hf_coefficients = if passes.len() <= 1
+                && !(self
+                    .lf_global
+                    .as_mut()
+                    .unwrap()
+                    .modular_global
+                    .can_do_partial_render()
+                    && self.header.num_extra_channels > 0)
+            {
+                None
+            } else {
+                let xs = GROUP_DIM * GROUP_DIM;
+                let ys = self.header.num_groups();
+                Some((
+                    Image::new((xs, ys))?,
+                    Image::new((xs, ys))?,
+                    Image::new((xs, ys))?,
+                ))
+            };
+
+            self.hf_global = Some(HfGlobalState {
+                num_histograms,
+                passes,
+                dequant_matrices,
+                hf_coefficients,
             });
         }
-        let hf_coefficients = if passes.len() <= 1 {
-            None
-        } else {
-            let xs = GROUP_DIM * GROUP_DIM;
-            let ys = self.header.num_groups();
-            Some((
-                Image::new((xs, ys))?,
-                Image::new((xs, ys))?,
-                Image::new((xs, ys))?,
-            ))
-        };
-        self.hf_global = Some(HfGlobalState {
-            num_histograms,
-            passes,
-            dequant_matrices,
-            hf_coefficients,
-        });
+        // Set EPF sigma values to the correct values if we are doing EPF.
+        if self.header.restoration_filter.epf_iters > 0 {
+            *self.epf_sigma.borrow_mut() = SigmaSource::new(
+                &self.header,
+                self.lf_global.as_ref().unwrap(),
+                &self.hf_meta,
+            )?;
+        }
         Ok(())
     }

-    #[instrument(level = "debug", skip(self, br, buffer_splitter))]
-    pub fn decode_hf_group(
+    pub fn render_noise_for_group(
         &mut self,
         group: usize,
-        pass: usize,
-        mut br: BitReader,
+        complete: bool,
         buffer_splitter: &mut BufferSplitter,
     ) -> Result<()> {
-        debug!(section_size = br.total_bits_available());
-        if self.header.has_noise() {
-            // TODO(sboukortt): consider making this a dedicated stage
-            let num_channels = self.header.num_extra_channels as usize + 3;
-
-            let group_dim = self.header.group_dim() as u32;
-            let xsize_groups = self.header.size_groups().0;
-            let gx = (group % xsize_groups) as u32;
-            let gy = (group / xsize_groups) as u32;
-            // TODO(sboukortt): test upsampling+noise
-            let upsampling = self.header.upsampling;
-            let x0 = gx * upsampling * group_dim;
-            let y0 = gy * upsampling * group_dim;
-            let x1 = ((x0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().0);
-            let y1 = ((y0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().1);
-            let xsize = x1 - x0 as usize;
-            let ysize = y1 - y0 as usize;
-            let mut rng = Xorshift128Plus::new_with_seeds(
-                self.decoder_state.visible_frame_index as u32,
-                self.decoder_state.nonvisible_frame_index as u32,
-                x0,
-                y0,
-            );
-            let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000);
-            for i in 0..3 {
-                let mut buf = pipeline!(self, p, p.get_buffer(num_channels + i)?);
-                const FLOATS_PER_BATCH: usize =
-                    Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>();
-                let mut batch = [0u64; Xorshift128Plus::N];
-
-                for y in 0..ysize {
-                    let row = buf.row_mut(y);
-                    for batch_index in 0..xsize.div_ceil(FLOATS_PER_BATCH) {
-                        rng.fill(&mut batch);
-                        let batch_size =
-                            (xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH);
-                        for i in 0..batch_size {
-                            let x = FLOATS_PER_BATCH * batch_index + i;
-                            let k = i / 2;
-                            let high_bytes = i % 2 != 0;
-                            let bits = if high_bytes {
-                                ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32
-                            } else {
-                                (batch[k] & 0xFFFFFFFF) as u32
-                            };
-                            row[x] = bits_to_float(bits);
+        // TODO(sboukortt): consider making this a dedicated stage
+        // TODO(veluca): SIMD.
+        let num_channels = self.header.num_extra_channels as usize + 3;
+
+        let group_dim = self.header.group_dim() as u32;
+        let xsize_groups = self.header.size_groups().0;
+        let gx = (group % xsize_groups) as u32;
+        let gy = (group / xsize_groups) as u32;
+        let upsampling = self.header.upsampling;
+        let upsampled_size = self.header.size_upsampled();
+
+        // Total buffer covers the upsampled region for this group
+        let buf_x1 = ((gx + 1) * upsampling * group_dim) as usize;
+        let buf_y1 = ((gy + 1) * upsampling * group_dim) as usize;
+        let buf_xsize = buf_x1.min(upsampled_size.0) - (gx * upsampling * group_dim) as usize;
+        let buf_ysize = buf_y1.min(upsampled_size.1) - (gy * upsampling * group_dim) as usize;
+
+        let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000);
+
+        // Get all 3 noise channel buffers upfront
+        let mut bufs = [
+            pipeline!(self, p, p.get_buffer(num_channels)?),
+            pipeline!(self, p, p.get_buffer(num_channels + 1)?),
+            pipeline!(self, p, p.get_buffer(num_channels + 2)?),
+        ];
+
+        const FLOATS_PER_BATCH: usize =
+            Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>();
+        let mut batch = [0u64; Xorshift128Plus::N];
+
+        // libjxl iterates through upsampling subdivisions with separate RNG seeds.
+        // For each subregion, a single RNG is shared across all 3 channels.
+        for iy in 0..upsampling {
+            for ix in 0..upsampling {
+                // Seed coordinates for this subregion (matches libjxl)
+                let x0 = (gx * upsampling + ix) * group_dim;
+                let y0 = (gy * upsampling + iy) * group_dim;
+
+                // Create RNG with this subregion's seed - shared across all 3 channels
+                let mut rng = Xorshift128Plus::new_with_seeds(
+                    self.decoder_state.visible_frame_index as u32,
+                    self.decoder_state.nonvisible_frame_index as u32,
+                    x0,
+                    y0,
+                );
+
+                // Subregion boundaries within the buffer
+                let sub_x0 = (ix * group_dim) as usize;
+                let sub_y0 = (iy * group_dim) as usize;
+                let sub_x1 = ((ix + 1) * group_dim) as usize;
+                let sub_y1 = ((iy + 1) * group_dim) as usize;
+
+                // Clamp to actual buffer size
+                let sub_xsize = sub_x1.min(buf_xsize).saturating_sub(sub_x0);
+                let sub_ysize = sub_y1.min(buf_ysize).saturating_sub(sub_y0);
+
+                // Skip if this subregion is entirely outside the buffer
+                if sub_xsize == 0 || sub_ysize == 0 {
+                    continue;
+                }
+
+                // Fill all 3 channels with this subregion's noise, sharing the RNG
+                for buf in &mut bufs {
+                    for y in 0..sub_ysize {
+                        let row = buf.row_mut(sub_y0 + y);
+                        for batch_index in 0..sub_xsize.div_ceil(FLOATS_PER_BATCH) {
+                            rng.fill(&mut batch);
+                            let batch_size =
+                                (sub_xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH);
+                            for i in 0..batch_size {
+                                let x = sub_x0 + FLOATS_PER_BATCH * batch_index + i;
+                                let k = i / 2;
+                                let high_bytes = i % 2 != 0;
+                                let bits = if high_bytes {
+                                    ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32
+                                } else {
+                                    (batch[k] & 0xFFFFFFFF) as u32
+                                };
+                                row[x] = bits_to_float(bits);
+                            }
                         }
                     }
                 }
-                pipeline!(
-                    self,
-                    p,
-                    p.set_buffer_for_group(num_channels + i, group, 1, buf, buffer_splitter)?
-                )
             }
         }

+        // Set all buffers after filling
+        let [buf0, buf1, buf2] = bufs;
+        pipeline!(
+            self,
+            p,
+            p.set_buffer_for_group(num_channels, group, complete, buf0, buffer_splitter)?
+        );
+        pipeline!(
+            self,
+            p,
+            p.set_buffer_for_group(num_channels + 1, group, complete, buf1, buffer_splitter)?
+        );
+        pipeline!(
+            self,
+            p,
+            p.set_buffer_for_group(num_channels + 2, group, complete, buf2, buffer_splitter)?
+        );
+        Ok(())
+    }
+
+    // Returns `true` if VarDCT and noise data were effectively rendered.
+    #[instrument(level = "debug", skip(self, passes, buffer_splitter))]
+    pub fn decode_hf_group(
+        &mut self,
+        group: usize,
+        passes: &mut [(usize, BitReader)],
+        buffer_splitter: &mut BufferSplitter,
+        force_render: bool,
+    ) -> Result<bool> {
+        if passes.is_empty() {
+            assert!(force_render);
+        }
+
+        let last_pass_in_file = self.header.passes.num_passes as usize - 1;
+        let was_complete = self.last_rendered_pass[group].is_some_and(|p| p >= last_pass_in_file);
+
+        if let Some((p, _)) = passes.last() {
+            self.last_rendered_pass[group] = Some(*p);
+        };
+        let pass_to_render = self.last_rendered_pass[group];
+        let complete = pass_to_render.is_some_and(|p| p >= last_pass_in_file);
+
+        if complete && !was_complete {
+            self.incomplete_groups = self.incomplete_groups.checked_sub(1).unwrap();
+        }
+
+        // Render if we are decoding the last pass, or if we are requesting an eager render and
+        // we can handle this case of eager renders.
+        let do_render = if complete {
+            true
+        } else if force_render {
+            self.allow_rendering_before_last_pass()
+        } else {
+            false
+        };
+
+        if !do_render && passes.is_empty() {
+            return Ok(false);
+        }
+
+        if self.header.has_noise() && do_render {
+            self.render_noise_for_group(group, complete, buffer_splitter)?;
+        }
+
         let lf_global = self.lf_global.as_mut().unwrap();
         if self.header.encoding == Encoding::VarDCT {
-            info!("Decoding VarDCT group {group}, pass {pass}");
-            let hf_global = self.hf_global.as_mut().unwrap();
-            let hf_meta = self.hf_meta.as_mut().unwrap();
-            let mut pixels = [
-                pipeline!(self, p, p.get_buffer(0))?,
-                pipeline!(self, p, p.get_buffer(1))?,
-                pipeline!(self, p, p.get_buffer(2))?,
-            ];
-            let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new);
-            decode_vardct_group(
-                group,
-                pass,
-                &self.header,
-                lf_global,
-                hf_global,
-                hf_meta,
-                &self.lf_image,
-                &self.quant_lf,
-                &self
-                    .decoder_state
-                    .file_header
-                    .transform_data
-                    .opsin_inverse_matrix
-                    .quant_biases,
-                &mut pixels,
-                &mut br,
-                buffers,
-            )?;
-            if self.decoder_state.enable_output
-                && pass + 1 == self.header.passes.num_passes as usize
-            {
+            let mut pixels = if do_render {
+                Some([
+                    pipeline!(self, p, p.get_buffer(0))?,
+                    pipeline!(self, p, p.get_buffer(1))?,
+                    pipeline!(self, p, p.get_buffer(2))?,
+                ])
+            } else {
+                None
+            };
+            if pass_to_render.is_none() && do_render {
+                info!("Upsampling LF for group {group}");
+                upsample_lf_group(
+                    group,
+                    pixels.as_mut().unwrap(),
+                    self.lf_image.as_ref().unwrap(),
+                    &self.header,
+                    &self.decoder_state.file_header.transform_data,
+                )?;
+            } else {
+                info!("Decoding VarDCT group {group}");
+                let hf_global = self.hf_global.as_mut().unwrap();
+                let hf_meta = self.hf_meta.as_mut().unwrap();
+                let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new);
+                decode_vardct_group(
+                    group,
+                    passes,
+                    &self.header,
+                    lf_global,
+                    hf_global,
+                    hf_meta,
+                    &self.lf_image,
+                    &self.quant_lf,
+                    &self
+                        .decoder_state
+                        .file_header
+                        .transform_data
+                        .opsin_inverse_matrix
+                        .quant_biases,
+                    &mut pixels,
+                    buffers,
+                )?;
+            }
+            if let Some(pixels) = pixels {
                 for (c, img) in pixels.into_iter().enumerate() {
                     pipeline!(
                         self,
                         p,
-                        p.set_buffer_for_group(c, group, 1, img, buffer_splitter)?
+                        p.set_buffer_for_group(c, group, complete, img, buffer_splitter)?
                     );
                 }
             }
         }
-        lf_global.modular_global.read_stream(
-            ModularStreamId::ModularHF { group, pass },
-            &self.header,
-            &lf_global.tree,
-            &mut br,
-        )?;
-        lf_global.modular_global.process_output(
-            2 + pass,
-            group,
-            &self.header,
-            &mut |chan, group, num_passes, image| {
-                pipeline!(
-                    self,
-                    p,
-                    p.set_buffer_for_group(chan, group, num_passes, image, buffer_splitter)?
-                );
-                Ok(())
-            },
-        )?;
-        Ok(())
+
+        for (pass, br) in passes.iter_mut() {
+            lf_global.modular_global.read_stream(
+                ModularStreamId::ModularHF { group, pass: *pass },
+                &self.header,
+                &lf_global.tree,
+                br,
+            )?;
+        }
+        Ok(do_render)
     }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
index b98d4541894b6..b7d8021b33962 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
@@ -18,7 +18,7 @@ use crate::{
     },
     headers::frame_header::FrameHeader,
     image::{Image, ImageRect, Rect},
-    util::{CeilLog2, ShiftRightCeil, tracing_wrappers::*},
+    util::{CeilLog2, ShiftRightCeil, SmallVec, tracing_wrappers::*},
 };
 use jxl_simd::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, simd_function};

@@ -305,11 +305,69 @@ simd_function!(
     }
 );

+struct PassInfo<'a, 'b> {
+    histogram_index: usize,
+    reader: Option<SymbolReader>,
+    br: &'a mut BitReader<'b>,
+    shift: u32,
+    pass: usize,
+    // TODO(veluca): reuse this allocation.
+    num_nzeros: [Image<u32>; 3],
+}
+
+impl<'a, 'b> PassInfo<'a, 'b> {
+    fn new(
+        hf_global: &HfGlobalState,
+        frame_header: &FrameHeader,
+        block_group_rect: Rect,
+        pass: usize,
+        br: &'a mut BitReader<'b>,
+    ) -> Result<Self> {
+        let num_histo_bits = hf_global.num_histograms.ceil_log2();
+        debug!(?pass);
+        let histogram_index = br.read(num_histo_bits as usize)? as usize;
+        debug!(?histogram_index);
+        let reader = Some(SymbolReader::new(
+            &hf_global.passes[pass].histograms,
+            br,
+            None,
+        )?);
+        let shift = if pass < frame_header.passes.shift.len() {
+            frame_header.passes.shift[pass]
+        } else {
+            0
+        };
+        let num_nzeros = [
+            Image::new((
+                block_group_rect.size.0 >> frame_header.hshift(0),
+                block_group_rect.size.1 >> frame_header.vshift(0),
+            ))?,
+            Image::new((
+                block_group_rect.size.0 >> frame_header.hshift(1),
+                block_group_rect.size.1 >> frame_header.vshift(1),
+            ))?,
+            Image::new((
+                block_group_rect.size.0 >> frame_header.hshift(2),
+                block_group_rect.size.1 >> frame_header.vshift(2),
+            ))?,
+        ];
+
+        Ok(Self {
+            histogram_index,
+            reader,
+            br,
+            shift,
+            pass,
+            num_nzeros,
+        })
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 pub fn decode_vardct_group(
     group: usize,
-    pass: usize,
+    passes: &mut [(usize, BitReader)],
     frame_header: &FrameHeader,
     lf_global: &mut LfGlobalState,
     hf_global: &mut HfGlobalState,
@@ -317,19 +375,19 @@ pub fn decode_vardct_group(
     lf_image: &Option<[Image<f32>; 3]>,
     quant_lf: &Image<u8>,
     quant_biases: &[f32; 4],
-    pixels: &mut [Image<f32>; 3],
-    br: &mut BitReader,
+    pixels: &mut Option<[Image<f32>; 3]>,
     buffers: &mut VarDctBuffers,
 ) -> Result<(), Error> {
     let x_dm_multiplier = (1.0 / (1.25)).powf(frame_header.x_qm_scale as f32 - 2.0);
     let b_dm_multiplier = (1.0 / (1.25)).powf(frame_header.b_qm_scale as f32 - 2.0);

-    let num_histo_bits = hf_global.num_histograms.ceil_log2();
-    let histogram_index: usize = br.read(num_histo_bits as usize)? as usize;
-    debug!(?histogram_index);
-    let mut reader = SymbolReader::new(&hf_global.passes[pass].histograms, br, None)?;
     let block_group_rect = frame_header.block_group_rect(group);
     debug!(?block_group_rect);
+    let mut pass_info = passes
+        .iter_mut()
+        .map(|(pass, br)| PassInfo::new(hf_global, frame_header, block_group_rect, *pass, br))
+        .collect::<Result<SmallVec<_, 4>>>()?;
+
     // Reset and use pooled buffers
     buffers.reset();
     let scratch = &mut buffers.scratch;
@@ -350,23 +408,9 @@ pub fn decode_vardct_group(
     let ytob_map = hf_meta.ytob_map.get_rect(cmap_rect);
     let transform_map = hf_meta.transform_map.get_rect(block_group_rect);
     let raw_quant_map = hf_meta.raw_quant_map.get_rect(block_group_rect);
-    let mut num_nzeros: [Image<u32>; 3] = [
-        Image::new((
-            block_group_rect.size.0 >> frame_header.hshift(0),
-            block_group_rect.size.1 >> frame_header.vshift(0),
-        ))?,
-        Image::new((
-            block_group_rect.size.0 >> frame_header.hshift(1),
-            block_group_rect.size.1 >> frame_header.vshift(1),
-        ))?,
-        Image::new((
-            block_group_rect.size.0 >> frame_header.hshift(2),
-            block_group_rect.size.1 >> frame_header.vshift(2),
-        ))?,
-    ];
     let quant_lf_rect = quant_lf.get_rect(block_group_rect);
     let block_context_map = lf_global.block_context_map.as_mut().unwrap();
-    let context_offset = histogram_index * block_context_map.num_ac_contexts();
+    // TODO(veluca): improve coefficient storage (smaller allocations, use 16 bits if possible).
     let coeffs = match hf_global.hf_coefficients.as_mut() {
         Some(hf_coefficients) => [
             hf_coefficients.0.row_mut(group),
@@ -380,11 +424,6 @@ pub fn decode_vardct_group(
             [coeffs_x, coeffs_y, coeffs_b]
         }
     };
-    let shift_for_pass = if pass < frame_header.passes.shift.len() {
-        frame_header.passes.shift[pass]
-    } else {
-        0
-    };
     let mut coeffs_offset = 0;
     let transform_buffer = &mut buffers.transform_buffer;

@@ -474,94 +513,116 @@ pub fn decode_vardct_group(
             let num_blocks = cx * cy;
             let num_coeffs = num_blocks * BLOCK_SIZE;
             let log_num_blocks = num_blocks.ilog2() as usize;
-            let pass_info = &hf_global.passes[pass];
-            for c in [1, 0, 2] {
-                if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) {
-                    continue;
-                }
-                trace!(
-                    "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})",
-                    sbx[c], sby[c], c, cx, cy, transform_id, shape_id
-                );
-                let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]);
-                let block_context =
-                    block_context_map.block_context(quant_lf, raw_quant, shape_id, c);
-                let nonzero_context = block_context_map
-                    .nonzero_context(predicted_nzeros, block_context)
-                    + context_offset;
-                let mut nonzeros =
-                    reader.read_unsigned(&pass_info.histograms, br, nonzero_context) as usize;
-                trace!(
-                    "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \
+            for PassInfo {
+                histogram_index,
+                reader,
+                br,
+                shift,
+                pass,
+                num_nzeros,
+            } in pass_info.iter_mut()
+            {
+                let reader = reader.as_mut().unwrap();
+                let pass_info = &hf_global.passes[*pass];
+                let context_offset = *histogram_index * block_context_map.num_ac_contexts();
+                for c in [1, 0, 2] {
+                    if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) {
+                        continue;
+                    }
+                    trace!(
+                        "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})",
+                        sbx[c], sby[c], c, cx, cy, transform_id, shape_id
+                    );
+                    let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]);
+                    let block_context =
+                        block_context_map.block_context(quant_lf, raw_quant, shape_id, c);
+                    let nonzero_context = block_context_map
+                        .nonzero_context(predicted_nzeros, block_context)
+                        + context_offset;
+                    let mut nonzeros =
+                        reader.read_unsigned_inline(&pass_info.histograms, br, nonzero_context)
+                            as usize;
+                    trace!(
+                        "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \
                        nzero_ctx: {nonzero_context} (offset: {context_offset}) \
                        nzeros: {nonzeros}",
-                    sbx[c], sby[c]
-                );
-                if nonzeros + num_blocks > num_coeffs {
-                    return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks));
-                }
-                for iy in 0..cy {
-                    let nzrow = num_nzeros[c].row_mut(sby[c] + iy);
-                    for ix in 0..cx {
-                        nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32;
+                        sbx[c], sby[c]
+                    );
+                    if nonzeros + num_blocks > num_coeffs {
+                        return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks));
                     }
-                }
-                let histo_offset =
-                    block_context_map.zero_density_context_offset(block_context) + context_offset;
-                let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 };
-                let permutation = &pass_info.coeff_orders[shape_id * 3 + c];
-                let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs];
-                for k in num_blocks..num_coeffs {
-                    if nonzeros == 0 {
-                        break;
+                    for iy in 0..cy {
+                        let nzrow = num_nzeros[c].row_mut(sby[c] + iy);
+                        for ix in 0..cx {
+                            nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32;
+                        }
+                    }
+                    let histo_offset = block_context_map.zero_density_context_offset(block_context)
+                        + context_offset;
+                    let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 };
+                    let permutation = &pass_info.coeff_orders[shape_id * 3 + c];
+                    let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs];
+                    for k in num_blocks..num_coeffs {
+                        if nonzeros == 0 {
+                            break;
+                        }
+                        let ctx =
+                            histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev);
+                        let coeff =
+                            reader.read_signed_inline(&pass_info.histograms, br, ctx) << *shift;
+                        prev = if coeff != 0 { 1 } else { 0 };
+                        nonzeros -= prev;
+                        let coeff_index = permutation[k] as usize;
+                        current_coeffs[coeff_index] += coeff;
+                    }
+                    if nonzeros != 0 {
+                        return Err(Error::EndOfBlockResidualNonZeros(nonzeros));
                     }
-                    let ctx =
-                        histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev);
-                    let coeff =
-                        reader.read_signed(&pass_info.histograms, br, ctx) << shift_for_pass;
-                    prev = if coeff != 0 { 1 } else { 0 };
-                    nonzeros -= prev;
-                    let coeff_index = permutation[k] as usize;
-                    current_coeffs[coeff_index] += coeff;
-                }
-                if nonzeros != 0 {
-                    return Err(Error::EndOfBlockResidualNonZeros(nonzeros));
                 }
             }
-            let qblock = [
-                &coeffs[0][coeffs_offset..],
-                &coeffs[1][coeffs_offset..],
-                &coeffs[2][coeffs_offset..],
-            ];
-            let dequant_matrices = &hf_global.dequant_matrices;
-            dequant_and_transform_to_pixels_dispatch(
-                quant_biases,
-                x_dm_multiplier,
-                b_dm_multiplier,
-                pixels,
-                scratch,
-                inv_global_scale,
-                transform_buffer,
-                hshift,
-                vshift,
-                by,
-                sby,
-                bx,
-                sbx,
-                x_cc_mul,
-                b_cc_mul,
-                raw_quant,
-                &lf_rects,
-                transform_type,
-                block_rect,
-                num_blocks,
-                num_coeffs,
-                &qblock,
-                dequant_matrices,
-            )?;
+            if let Some(pixels) = pixels {
+                let qblock = [
+                    &coeffs[0][coeffs_offset..],
+                    &coeffs[1][coeffs_offset..],
+                    &coeffs[2][coeffs_offset..],
+                ];
+                let dequant_matrices = &hf_global.dequant_matrices;
+                dequant_and_transform_to_pixels_dispatch(
+                    quant_biases,
+                    x_dm_multiplier,
+                    b_dm_multiplier,
+                    pixels,
+                    scratch,
+                    inv_global_scale,
+                    transform_buffer,
+                    hshift,
+                    vshift,
+                    by,
+                    sby,
+                    bx,
+                    sbx,
+                    x_cc_mul,
+                    b_cc_mul,
+                    raw_quant,
+                    &lf_rects,
+                    transform_type,
+                    block_rect,
+                    num_blocks,
+                    num_coeffs,
+                    &qblock,
+                    dequant_matrices,
+                )?;
+            }
             coeffs_offset += num_coeffs;
         }
     }
-    reader.check_final_state(&hf_global.passes[pass].histograms, br)?;
+    for PassInfo {
+        pass, br, reader, ..
+    } in pass_info.iter_mut()
+    {
+        std::mem::take(reader)
+            .unwrap()
+            .check_final_state(&hf_global.passes[*pass].histograms, br)?;
+    }
     Ok(())
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs
new file mode 100644
index 0000000000000..43ebeb7f43aa5
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs
@@ -0,0 +1,371 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+use crate::{
+    api::{JxlColorProfile, JxlColorType, JxlDataFormat, JxlOutputBuffer, JxlPixelFormat},
+    error::Result,
+    frame::Frame,
+    headers::{Orientation, frame_header::FrameType},
+    image::{DataTypeTag, Rect},
+    render::{
+        Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage,
+        buffer_splitter::{BufferSplitter, SaveStageBufferInfo},
+        low_memory_pipeline::row_buffers::RowBuffer,
+        save::SaveStage,
+        stages::{
+            ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage, FromLinearStage,
+            OutputColorInfo, TransferFunction, Upsample8x, XybStage,
+        },
+    },
+    util::{f16, mirror},
+};
+
+impl Frame {
+    #[allow(clippy::too_many_arguments)]
+    fn render_lf_frame_rect(
+        &mut self,
+        color_type: JxlColorType,
+        data_format: JxlDataFormat,
+        rect: Rect,
+        upsampled_rect: Rect,
+        orientation: Orientation,
+        output_buffers: &mut [Option<JxlOutputBuffer<'_>>],
+        full_size: (usize, usize),
+        output_color_info: &OutputColorInfo,
+        output_tf: &TransferFunction,
+    ) -> Result<()> {
+        let save_stage = SaveStage::new(
+            if color_type.has_alpha() {
+                &[0, 1, 2, 3]
+            } else {
+                &[0, 1, 2]
+            },
+            orientation,
+            0,
+            color_type,
+            data_format,
+            color_type.has_alpha(),
+        );
+        let len = rect.size.0;
+        let ulen = len * 8;
+        enum DataFormatConverter {
+            U8(ConvertF32ToU8Stage),
+            U16(ConvertF32ToU16Stage),
+            F16(ConvertF32ToF16Stage),
+            None,
+        }
+        let (converter, constant_alpha) = match data_format {
+            JxlDataFormat::U8 { bit_depth } => (
+                DataFormatConverter::U8(ConvertF32ToU8Stage::new(0, bit_depth)),
+                RowBuffer::new_filled(DataTypeTag::U8, ulen, &(1u8 << bit_depth).to_ne_bytes())?,
+            ),
+            JxlDataFormat::U16 { bit_depth, .. } => (
+                DataFormatConverter::U16(ConvertF32ToU16Stage::new(0, bit_depth)),
+                RowBuffer::new_filled(DataTypeTag::U16, ulen, &(1u16 << bit_depth).to_ne_bytes())?,
+            ),
+            JxlDataFormat::F16 { .. } => (
+                DataFormatConverter::F16(ConvertF32ToF16Stage::new(0)),
+                RowBuffer::new_filled(
+                    DataTypeTag::F16,
+                    ulen,
+                    &(f16::from_f32(1.0).to_bits().to_ne_bytes()),
+                )?,
+            ),
+            JxlDataFormat::F32 { .. } => (
+                DataFormatConverter::None,
+                RowBuffer::new_filled(DataTypeTag::F32, ulen, &1.0f32.to_ne_bytes())?,
+            ),
+        };
+
+        let upsample_stage = Upsample8x::new(&self.decoder_state.file_header.transform_data, 0);
+        let mut upsample_state = upsample_stage.init_local_state(0)?.unwrap();
+
+        let xyb_stage = XybStage::new(0, output_color_info.clone());
+
+        let from_linear_stage = FromLinearStage::new(0, output_tf.clone());
+
+        let mut lf_rows = [
+            RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+            RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+            RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+        ];
+
+        // Converted to RGB in place.
+        let mut upsampled_rows = [
+            RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+            RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+            RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+        ];
+
+        let mut output_rows = [
+            RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+            RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+            RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+        ];
+
+        let src = if self.header.frame_type == FrameType::RegularFrame {
+            self.decoder_state.lf_frames[0].as_ref().unwrap()
+        } else {
+            self.lf_frame_data.as_ref().unwrap()
+        };
+
+        const LF_ROW_OFFSET: usize = 8;
+
+        let x0 = rect.origin.0;
+        let x1 = rect.end().0;
+
+        let y0 = rect.origin.1 as isize - 2;
+        let y1 = rect.end().1 as isize + 2;
+
+        let lf_size = src[0].size();
+
+        for yy in y0..y1 {
+            let sy = mirror(yy, lf_size.1);
+
+            // Fill in input.
+            for c in 0..3 {
+                let bufy = (yy + LF_ROW_OFFSET as isize) as usize;
+                let row = lf_rows[c].get_row_mut::<f32>(bufy);
+                let srow = src[c].row(sy);
+                let off = RowBuffer::x0_offset::<f32>();
+                row[off..off + len].copy_from_slice(&srow[x0..x1]);
+                row[off - 1] = srow[mirror(x0 as isize - 1, lf_size.0)];
+                row[off - 2] = srow[mirror(x0 as isize - 2, lf_size.0)];
+                row[off + len] = srow[mirror(x1 as isize, lf_size.0)];
+                row[off + len + 1] = srow[mirror(x1 as isize + 1, lf_size.0)];
+            }
+
+            if yy < y0 + 4 {
+                continue;
+            }
+
+            let y = yy as usize - 2;
+
+            // Upsample.
+            for c in 0..3 {
+                let off = RowBuffer::x0_offset::<f32>() - 2;
+                let input_rows_refs = [
+                    &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 2)[off..],
+                    &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 1)[off..],
+                    &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET)[off..],
+                    &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 1)[off..],
+                    &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 2)[off..],
+                ]
+                .into_iter()
+                .collect();
+                let input_channels = Channels::new(input_rows_refs, 1, 5);
+
+                let output_rows_refs =
+                    upsampled_rows[c].get_rows_mut(y * 8..y * 8 + 8, RowBuffer::x0_offset::<f32>());
+                let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8);
+
+                upsample_stage.process_row_chunk(
+                    (0, 0),
+                    len,
+                    &input_channels,
+                    &mut output_channels,
+                    Some(upsample_state.as_mut()),
+                );
+            }
+
+            // un-XYB, convert and save.
+            for uy in y * 8..y * 8 + 8 {
+                // XYB
+                let [x, y, b] = &mut upsampled_rows;
+                let off = RowBuffer::x0_offset::<f32>();
+                let mut rows = [
+                    &mut x.get_row_mut(uy)[off..],
+                    &mut y.get_row_mut(uy)[off..],
+                    &mut b.get_row_mut(uy)[off..],
+                ];
+                xyb_stage.process_row_chunk((0, 0), ulen, &mut rows, None);
+                from_linear_stage.process_row_chunk((0, 0), ulen, &mut rows, None);
+
+                macro_rules! convert {
+                    ($s: expr, $t: ty) => {
+                        for c in 0..3 {
+                            let input_rows_refs = std::iter::once(
+                                &upsampled_rows[c].get_row(uy)[RowBuffer::x0_offset::<f32>()..],
+                            )
+                            .collect();
+                            let input_channels = Channels::new(input_rows_refs, 1, 1);
+                            let output_rows_refs = output_rows[c]
+                                .get_rows_mut(uy..uy + 1, RowBuffer::x0_offset::<$t>());
+                            let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 1);
+                            $s.process_row_chunk(
+                                (0, 0),
+                                ulen,
+                                &input_channels,
+                                &mut output_channels,
+                                None,
+                            );
+                        }
+                    };
+                }
+
+                // Convert
+                let save_input = match &converter {
+                    DataFormatConverter::U8(s) => {
+                        convert!(s, u8);
+                        &output_rows
+                    }
+                    DataFormatConverter::U16(s) => {
+                        convert!(s, u16);
+                        &output_rows
+                    }
+                    DataFormatConverter::F16(s) => {
+                        convert!(s, f16);
+                        &output_rows
+                    }
+                    DataFormatConverter::None => &upsampled_rows,
+                };
+
+                let input_no_alpha = [&save_input[0], &save_input[1], &save_input[2]];
+                let input_alpha = [
+                    &save_input[0],
+                    &save_input[1],
+                    &save_input[2],
+                    &constant_alpha,
+                ];
+
+                save_stage.save_lowmem(
+                    if color_type.has_alpha() {
+                        &input_alpha
+                    } else {
+                        &input_no_alpha
+                    },
+                    output_buffers,
+                    upsampled_rect.size,
+                    uy,
+                    upsampled_rect.origin,
+                    full_size,
+                    (0, 0),
+                )?;
+            }
+        }
+
+        Ok(())
+    }
+
+    pub fn maybe_preview_lf_frame(
+        &mut self,
+        pixel_format: &JxlPixelFormat,
+        output_buffers: &mut [JxlOutputBuffer<'_>],
+        changed_regions: Option<&[Rect]>,
+        output_profile: &JxlColorProfile,
+    ) -> Result<()> {
+        if self.header.needs_blending() {
+            return Ok(());
+        }
+        if !((self.header.has_lf_frame() && self.header.frame_type == FrameType::RegularFrame)
+            || (self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1))
+        {
+            return Ok(());
+        }
+
+        let output_color_info = OutputColorInfo::from_header(&self.decoder_state.file_header)?;
+
+        let Some(output_tf) = output_profile.transfer_function().map(|tf| {
+            TransferFunction::from_api_tf(
+                tf,
+                output_color_info.intensity_target,
+                output_color_info.luminances,
+            )
+        }) else {
+            return Ok(());
+        };
+
+        if output_tf.is_linear() {
+            return Ok(());
+        }
+
+        let image_metadata = &self.decoder_state.file_header.image_metadata;
+        if !image_metadata.xyb_encoded || !image_metadata.extra_channel_info.is_empty() {
+            // We only render LF frames for XYB VarDCT images with no extra channels.
+            // TODO(veluca): we might want to relax this to "no alpha".
+            return Ok(());
+        }
+        let color_type = pixel_format.color_type;
+        let data_format = pixel_format.color_data_format.unwrap();
+        if pixel_format.color_data_format.is_none()
+            || output_buffers.is_empty()
+            || !matches!(
+                color_type,
+                JxlColorType::Rgb | JxlColorType::Rgba | JxlColorType::Bgr | JxlColorType::Bgra,
+            )
+        {
+            // We only render color data, and only to 3- or 4- channel output buffers.
+            return Ok(());
+        }
+        // We already have a fully-rendered frame and we are not requesting to re-render
+        // specific regions.
+        if self.decoder_state.lf_frame_was_rendered && changed_regions.is_none() {
+            return Ok(());
+        }
+        if changed_regions.is_none() {
+            self.decoder_state.lf_frame_was_rendered = true;
+        }
+
+        let sz = &self.decoder_state.file_header.size;
+        let xsize = sz.xsize() as usize;
+        let ysize = sz.ysize() as usize;
+
+        let mut regions_storage;
+
+        let regions = if let Some(regions) = changed_regions {
+            regions
+        } else {
+            regions_storage = vec![];
+            for i in (0..xsize.div_ceil(8)).step_by(256) {
+                let x0 = i;
+                let x1 = (i + 256).min(xsize.div_ceil(8));
+                regions_storage.push(Rect {
+                    origin: (x0, 0),
+                    size: (x1 - x0, ysize.div_ceil(8)),
+                });
+            }
+            &regions_storage[..]
+        };
+
+        let orientation = image_metadata.orientation;
+        let info = SaveStageBufferInfo {
+            downsample: (0, 0),
+            orientation,
+            byte_size: data_format.bytes_per_sample() * color_type.samples_per_pixel(),
+            after_extend: false,
+        };
+        let info = [Some(info)];
+        let mut bufs = [Some(JxlOutputBuffer::reborrow(&mut output_buffers[0]))];
+        let mut bufs = BufferSplitter::new(&mut bufs);
+        for r in regions {
+            let upsampled_rect = Rect {
+                size: (r.size.0 * 8, r.size.1 * 8),
+                origin: (r.origin.0 * 8, r.origin.1 * 8),
+            };
+            let upsampled_rect = upsampled_rect.clip((xsize, ysize));
+            let mut bufs = bufs.get_local_buffers(
+                &info,
+                upsampled_rect,
+                false,
+                (xsize, ysize),
+                (xsize, ysize),
+                (0, 0),
+            );
+            self.render_lf_frame_rect(
+                color_type,
+                data_format,
+                *r,
+                upsampled_rect,
+                orientation,
+                &mut bufs,
+                (xsize, ysize),
+                &output_color_info,
+                &output_tf,
+            )?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
index 7425d87fe19f3..61d285d66e29e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
@@ -3,7 +3,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-use std::sync::Arc;
+use std::{collections::BTreeSet, sync::Arc};

 use crate::{
     entropy_coding::decode::Histograms,
@@ -12,7 +12,7 @@ use crate::{
     headers::{
         FileHeader,
         extra_channels::ExtraChannelInfo,
-        frame_header::{Encoding, FrameHeader},
+        frame_header::{Encoding, FrameHeader, FrameType},
         permutation::Permutation,
         toc::Toc,
     },
@@ -26,12 +26,16 @@ use modular::{FullModularImage, Tree};
 use quant_weights::DequantMatrices;
 use quantizer::{LfQuantFactors, QuantizerParams};

+use crate::features::epf::SigmaSource;
+use crate::util::AtomicRefCell;
+
 mod adaptive_lf_smoothing;
 mod block_context_map;
 mod coeff_order;
 pub mod color_correlation_map;
 pub mod decode;
 mod group;
+pub mod lf_preview;
 pub mod modular;
 mod quant_weights;
 pub mod quantizer;
@@ -45,16 +49,15 @@ pub enum Section {
     Hf { group: usize, pass: usize },
 }

+#[derive(Debug)]
 pub struct LfGlobalState {
-    patches: Option<Arc<PatchesDictionary>>,
-    splines: Option<Splines>,
-    noise: Option<Noise>,
     lf_quant: LfQuantFactors,
     pub quant_params: Option<QuantizerParams>,
     block_context_map: Option<BlockContextMap>,
     color_correlation_params: Option<ColorCorrelationParams>,
     tree: Option<Tree>,
     modular_global: FullModularImage,
+    total_bits_read: usize,
 }

 pub struct PassState {
@@ -113,10 +116,7 @@ impl ReferenceFrame {
 pub struct DecoderState {
     pub(super) file_header: FileHeader,
     pub(super) reference_frames: Arc<[Option<ReferenceFrame>; Self::MAX_STORED_FRAMES]>,
-    pub(super) lf_frames: [Option<[Image<f32>; 3]>; 4],
-    // TODO(veluca): do we really need this? ISTM it could be achieved by passing None for all the
-    // buffers, and it's not clear to me what use the decoder can make of it.
-    pub enable_output: bool,
+    pub(super) lf_frames: [Option<[Image<f32>; 3]>; Self::NUM_LF_FRAMES],
     pub render_spotcolors: bool,
     #[cfg(test)]
     pub use_simple_pipeline: bool,
@@ -124,17 +124,21 @@ pub struct DecoderState {
     pub nonvisible_frame_index: usize,
     pub high_precision: bool,
     pub premultiply_output: bool,
+    // Whether the latest level 1 LF frame was fully rendered.
+    // If this is set to `true`, early flushing in the main frame
+    // (before HF is available) will do nothing.
+    pub lf_frame_was_rendered: bool,
 }

 impl DecoderState {
     pub const MAX_STORED_FRAMES: usize = 4;
+    pub const NUM_LF_FRAMES: usize = 4;

     pub fn new(file_header: FileHeader) -> Self {
         Self {
             file_header,
             reference_frames: Arc::new([None, None, None, None]),
-            lf_frames: [None, None, None, None],
-            enable_output: true,
+            lf_frames: std::array::from_fn(|_| None),
             render_spotcolors: true,
             #[cfg(test)]
             use_simple_pipeline: false,
@@ -142,6 +146,7 @@ impl DecoderState {
             nonvisible_frame_index: 0,
             high_precision: false,
             premultiply_output: false,
+            lf_frame_was_rendered: false,
         }
     }

@@ -169,6 +174,14 @@ pub struct HfMetadata {
     used_hf_types: u32,
 }

+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum RenderUnit {
+    /// VarDCT data
+    VarDCT,
+    /// Modular channel with the given index
+    Modular(usize),
+}
+
 pub struct Frame {
     header: FrameHeader,
     toc: Toc,
@@ -187,9 +200,21 @@ pub struct Frame {
     render_pipeline: Option<Box<crate::render::LowMemoryRenderPipeline>>,
     reference_frame_data: Option<Vec<Image<f32>>>,
     lf_frame_data: Option<[Image<f32>; 3]>,
-    lf_global_was_rendered: bool,
+    was_flushed_once: bool,
     /// Reusable buffers for VarDCT group decoding.
     vardct_buffers: Option<group::VarDctBuffers>,
+    // Last pass rendered so far for each HF group.
+    last_rendered_pass: Vec<Option<usize>>,
+    // Groups that should be rendered on the next call to flush().
+    groups_to_flush: BTreeSet<usize>,
+    changed_since_last_flush: BTreeSet<(usize, RenderUnit)>,
+    incomplete_groups: usize,
+    patches: Arc<AtomicRefCell<PatchesDictionary>>,
+    splines: Arc<AtomicRefCell<Splines>>,
+    noise: Arc<AtomicRefCell<Noise>>,
+    lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+    color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+    epf_sigma: Arc<AtomicRefCell<SigmaSource>>,
 }

 impl Frame {
@@ -221,6 +246,25 @@ impl Frame {
         }
     }

+    pub fn can_do_early_rendering(&self) -> bool {
+        if matches!(
+            self.header.frame_type,
+            FrameType::ReferenceOnly | FrameType::SkipProgressive
+        ) {
+            return false;
+        }
+        if self.header.has_lf_frame() {
+            return true;
+        }
+        if self.header.encoding == Encoding::VarDCT {
+            return false;
+        }
+        self.lf_global
+            .as_ref()
+            .map(|x| x.modular_global.can_do_early_partial_render())
+            .unwrap_or_default()
+    }
+
     pub fn finalize_lf(&mut self) -> Result<()> {
         if self.header.should_do_adaptive_lf_smoothing() {
             let lf_global = self.lf_global.as_mut().unwrap();
@@ -295,14 +339,14 @@ mod test {
         bytes: &[u8],
         verify: impl Fn(&Frame, usize) -> Result<()> + 'static,
     ) -> Result<usize> {
-        crate::api::tests::decode(bytes, usize::MAX, false, Some(Box::new(verify))).map(|x| x.0)
+        crate::api::tests::decode(bytes, usize::MAX, false, false, Some(Box::new(verify)))
+            .map(|x| x.0)
     }

     #[test]
     fn splines() -> Result<(), Error> {
         let verify_frame = move |frame: &Frame, _| {
-            let lf_global = frame.lf_global.as_ref().unwrap();
-            let splines = lf_global.splines.as_ref().unwrap();
+            let splines = frame.splines.borrow();
             assert_eq!(splines.quantization_adjustment, 0);
             let expected_starting_points = [Point { x: 9.0, y: 54.0 }].to_vec();
             assert_eq!(splines.starting_points, expected_starting_points);
@@ -361,8 +405,7 @@ mod test {
     #[test]
     fn noise() -> Result<(), Error> {
         let verify_frame = |frame: &Frame, _| {
-            let lf_global = frame.lf_global.as_ref().unwrap();
-            let noise = lf_global.noise.as_ref().unwrap();
+            let noise = frame.noise.borrow();
             let want_noise = [
                 0.000000, 0.000977, 0.002930, 0.003906, 0.005859, 0.006836, 0.008789, 0.010742,
             ];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
index 4e3b4569ec8cf..29c93efa42e57 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
@@ -18,7 +18,6 @@ pub fn with_buffers<T>(
     buffers: &[ModularBufferInfo],
     indices: &[usize],
     grid: usize,
-    skip_empty: bool,
     f: impl FnOnce(Vec<&mut ModularChannel>) -> Result<T>,
 ) -> Result<T> {
     let mut bufs = vec![];
@@ -36,10 +35,12 @@ pub fn with_buffers<T>(
             });
         }

-        // Skip zero-sized buffers when decoding - they don't contribute to the bitstream.
-        // This matches libjxl's behavior in DecodeGroup where zero-sized rects are skipped.
-        // The buffer is still allocated above so transforms can access it.
-        if skip_empty && (b.size.0 == 0 || b.size.1 == 0) {
+        // Skip zero-sized *tiles*.
+        //
+        // Note that some bitstreams can contain channels with one dimension being 0 (e.g. palette
+        // meta-channel with 0 colors has size (0, 3)). Those must still participate in channel
+        // numbering (but carry no entropy-coded pixels), so we only skip when both dimensions are 0.
+        if b.size.0 == 0 && b.size.1 == 0 {
             continue;
         }

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
index 930603f1f3f58..b2cc596252f6c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
@@ -20,6 +20,7 @@ pub fn decode_modular_subbitstream(
     header: Option<GroupHeader>,
     global_tree: &Option<Tree>,
     br: &mut BitReader,
+    partial_decoded_buffers: Option<&mut usize>,
 ) -> Result<()> {
     // Skip decoding if all grids are zero-sized.
     let is_empty = buffers
@@ -80,7 +81,21 @@ pub fn decode_modular_subbitstream(
     let mut reader = SymbolReader::new(&tree.histograms, br, Some(image_width))?;

     for i in 0..buffers.len() {
-        decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br)?;
+        // Keep channel numbering stable, but skip actually decoding empty channels.
+        // This matches libjxl, which continues the loop without renumbering.
+        let (w, h) = buffers[i].data.size();
+        if w == 0 || h == 0 {
+            continue;
+        }
+        if let Err(e) =
+            decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br)
+        {
+            if let Some(p) = partial_decoded_buffers {
+                buffers[i].data.fill(0);
+                *p = i;
+            }
+            return Err(e);
+        }
     }

     reader.check_final_state(&tree.histograms, br)?;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
index b9190ce996269..398eb204c491b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
@@ -189,13 +189,16 @@ pub(super) fn decode_modular_channel(

     let special_tree = specialize_tree(tree, chan, stream_id, size.0, header)?;
     match special_tree {
+        TreeSpecialCase::NoTree(t) => {
+            decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
+        }
         TreeSpecialCase::NoWp(t) => {
             decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
         }
-        TreeSpecialCase::WpOnly(t) => {
+        TreeSpecialCase::WpOnlyConfig420(t) => {
             decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
         }
-        TreeSpecialCase::GradientLookup(t) => {
+        TreeSpecialCase::GradientLookupConfig420(t) => {
             decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
         }
         TreeSpecialCase::SingleGradientOnly(t) => {
@@ -204,5 +207,6 @@ pub(super) fn decode_modular_channel(
         TreeSpecialCase::General(t) => {
             decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
         }
-    }
+    }?;
+    br.check_for_error()
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
index ff7d2263a7a91..8ffebfe8a8720 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
@@ -86,7 +86,7 @@ impl ModularChannelDecoder for NoWpTree {
             &self.references,
             &mut self.property_buffer,
         );
-        let dec = reader.read_signed(histograms, br, prediction_result.context as usize);
+        let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize);
         make_pixel(dec, prediction_result.multiplier, prediction_result.guess)
     }
 }
@@ -140,7 +140,7 @@ impl ModularChannelDecoder for GeneralTree {
             &self.no_wp_tree.references,
             &mut self.no_wp_tree.property_buffer,
         );
-        let dec = reader.read_signed(histograms, br, prediction_result.context as usize);
+        let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize);
         let val = make_pixel(dec, prediction_result.multiplier, prediction_result.guess);
         self.wp_state.update_errors(val, pos, xsize);
         val
@@ -152,12 +152,7 @@ const LUT_MIN_SPLITVAL: i32 = -1024;
 const LUT_TABLE_SIZE: usize = (LUT_MAX_SPLITVAL - LUT_MIN_SPLITVAL + 1) as usize;
 const _: () = assert!(LUT_TABLE_SIZE.is_power_of_two());

-pub struct WpOnlyLookup {
-    lut: [u8; LUT_TABLE_SIZE], // Lookup (wp value -> *clustered* context id)
-    wp_state: WeightedPredictorState,
-}
-
-fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE_SIZE]> {
+fn make_lut(tree: &[TreeNode]) -> Option<[u8; LUT_TABLE_SIZE]> {
     struct RangeAndNode {
         range: Range<i32>,
         node: u32,
@@ -198,8 +193,7 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE
                 }
                 let start = range.start - LUT_MIN_SPLITVAL;
                 let end = range.end - LUT_MIN_SPLITVAL;
-                ans[start as usize..end as usize]
-                    .fill(histograms.map_context_to_cluster(id as usize) as u8);
+                ans[start as usize..end as usize].fill(id as u8);
             }
         }
     }
@@ -207,20 +201,30 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE
     Some(ans)
 }

-impl WpOnlyLookup {
+/// Specialized WpOnlyLookup for when all HybridUint configs are 420
+/// This allows using the fast-path entropy decoder
+pub struct WpOnlyLookupConfig420 {
+    lut: [u8; LUT_TABLE_SIZE],
+    wp_state: WeightedPredictorState,
+}
+
+impl WpOnlyLookupConfig420 {
     fn new(
         tree: &[TreeNode],
         histograms: &Histograms,
         header: &GroupHeader,
         xsize: usize,
     ) -> Option<Self> {
+        if !histograms.can_use_config_420_fast_path() {
+            return None;
+        }
         let wp_state = WeightedPredictorState::new(&header.wp_header, xsize);
-        let lut = make_lut(tree, histograms)?;
+        let lut = make_lut(tree)?;
         Some(Self { lut, wp_state })
     }
 }

-impl ModularChannelDecoder for WpOnlyLookup {
+impl ModularChannelDecoder for WpOnlyLookupConfig420 {
     const NEEDS_TOP: bool = true;
     const NEEDS_TOPTOP: bool = true;

@@ -243,25 +247,30 @@ impl ModularChannelDecoder for WpOnlyLookup {
             .predict_and_property(pos, xsize, &prediction_data);
         let ctx = self.lut[(property as i64 - LUT_MIN_SPLITVAL as i64)
             .clamp(0, LUT_TABLE_SIZE as i64 - 1) as usize];
-        let dec = reader.read_signed_clustered(histograms, br, ctx as usize);
+        // Use the specialized 420 fast path
+        let dec = reader.read_signed_clustered_config_420(histograms, br, ctx as usize);
         let val = dec.wrapping_add(wp_pred as i32);
         self.wp_state.update_errors(val, pos, xsize);
         val
     }
 }

-/// Fast path for trees that split only on property 9 (gradient: left + top - topleft)
-/// with Gradient predictor, offset=0, multiplier=1.
-/// Maps property 9 values directly to cluster IDs via a LUT.
-/// This targets libjxl effort 2 encoding.
-pub struct GradientLookup {
-    lut: [u8; LUT_TABLE_SIZE],
-}
-
 /// Property 9 is the "gradient property": left + top - topleft
 const GRADIENT_PROPERTY: u8 = 9;

-fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<GradientLookup> {
+/// Config 420 specialized version of gradient lookup for trees that split only on property 9.
+/// This uses the specialized entropy decoder for config 420 + no LZ77.
+pub struct GradientLookupConfig420 {
+    lut: [u8; LUT_TABLE_SIZE],
+}
+
+fn make_gradient_lut_config_420(
+    tree: &[TreeNode],
+    histograms: &Histograms,
+) -> Option<GradientLookupConfig420> {
+    if !histograms.can_use_config_420_fast_path() {
+        return None;
+    }
     // Verify all splits are on property 9 and all leaves have Gradient predictor
     for node in tree {
         match node {
@@ -278,12 +287,11 @@ fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<Gradi
         }
     }

-    // Use existing make_lut which handles offset=0, multiplier=1 checks
-    let lut = make_lut(tree, histograms)?;
-    Some(GradientLookup { lut })
+    let lut = make_lut(tree)?;
+    Some(GradientLookupConfig420 { lut })
 }

-impl ModularChannelDecoder for GradientLookup {
+impl ModularChannelDecoder for GradientLookupConfig420 {
     const NEEDS_TOP: bool = true;
     const NEEDS_TOPTOP: bool = false;

@@ -314,13 +322,14 @@ impl ModularChannelDecoder for GradientLookup {
             prediction_data.topleft as i64,
         );

-        let dec = reader.read_signed_clustered(histograms, br, cluster as usize);
+        // Use the specialized config 420 fast path
+        let dec = reader.read_signed_clustered_config_420(histograms, br, cluster as usize);
         dec.wrapping_add(pred as i32)
     }
 }

 pub struct SingleGradientOnly {
-    ctx: usize,
+    clustered_ctx: usize,
 }

 impl ModularChannelDecoder for SingleGradientOnly {
@@ -340,16 +349,42 @@ impl ModularChannelDecoder for SingleGradientOnly {
         histograms: &Histograms,
     ) -> i32 {
         let pred = Predictor::Gradient.predict_one(prediction_data, 0);
-        let dec = reader.read_signed(histograms, br, self.ctx);
+        let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx);
         make_pixel(dec, 1, pred)
     }
 }

+pub struct NoTree {
+    clustered_ctx: usize,
+}
+
+impl ModularChannelDecoder for NoTree {
+    const NEEDS_TOP: bool = false;
+    const NEEDS_TOPTOP: bool = false;
+
+    fn init_row(&mut self, _: &mut [&mut ModularChannel], _: usize, _: usize) {}
+
+    #[inline(always)]
+    fn decode_one(
+        &mut self,
+        _: PredictionData,
+        _: (usize, usize),
+        _: usize,
+        reader: &mut SymbolReader,
+        br: &mut BitReader,
+        histograms: &Histograms,
+    ) -> i32 {
+        let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx);
+        make_pixel(dec, 1, 0)
+    }
+}
+
 #[allow(clippy::large_enum_variant)]
 pub enum TreeSpecialCase {
+    NoTree(NoTree),
     NoWp(NoWpTree),
-    WpOnly(WpOnlyLookup),
-    GradientLookup(GradientLookup),
+    WpOnlyConfig420(WpOnlyLookupConfig420),
+    GradientLookupConfig420(GradientLookupConfig420),
     SingleGradientOnly(SingleGradientOnly),
     General(GeneralTree),
 }
@@ -372,9 +407,10 @@ pub fn specialize_tree(
     let mut uses_non_wp = false;

     // Obtain a pruned tree without nodes that are not relevant in the current channel and stream.
-    // Proceed in BFS order, so that we know that the children of  anode will be adjacent.
+    // Proceed in BFS order, so that we know that the children of a node will be adjacent.
+    // Also re-maps context IDs to cluster IDs.
     while let Some(v) = queue.pop_front() {
-        let node = tree.nodes[v as usize];
+        let mut node = tree.nodes[v as usize];
         match node {
             TreeNode::Split {
                 property,
@@ -409,11 +445,29 @@ pub fn specialize_tree(
             TreeNode::Leaf { predictor, .. } => {
                 uses_wp |= predictor == Predictor::Weighted;
                 uses_non_wp |= predictor != Predictor::Weighted;
+                let TreeNode::Leaf { id, .. } = &mut node else {
+                    unreachable!()
+                };
+                *id = tree.histograms.map_context_to_cluster(*id as usize) as u32;
                 pruned_tree.push(node);
             }
         }
     }

+    if let [
+        TreeNode::Leaf {
+            predictor: Predictor::Zero,
+            multiplier: 1,
+            offset: 0,
+            id,
+        },
+    ] = &*pruned_tree
+    {
+        return Ok(TreeSpecialCase::NoTree(NoTree {
+            clustered_ctx: *id as usize,
+        }));
+    }
+
     if let [
         TreeNode::Leaf {
             predictor: Predictor::Gradient,
@@ -424,20 +478,23 @@ pub fn specialize_tree(
     ] = &*pruned_tree
     {
         return Ok(TreeSpecialCase::SingleGradientOnly(SingleGradientOnly {
-            ctx: *id as usize,
+            clustered_ctx: *id as usize,
         }));
     }

-    if !uses_non_wp
-        && let Some(wp) = WpOnlyLookup::new(&pruned_tree, &tree.histograms, header, xsize)
-    {
-        return Ok(TreeSpecialCase::WpOnly(wp));
+    if !uses_non_wp {
+        // Try the specialized 420 config version (fast path for effort 3 encoded images)
+        if let Some(wp) = WpOnlyLookupConfig420::new(&pruned_tree, &tree.histograms, header, xsize)
+        {
+            return Ok(TreeSpecialCase::WpOnlyConfig420(wp));
+        }
     }

-    // Try gradient LUT for non-WP trees (targets effort 2 encoding)
+    // Non-WP trees (includes effort 2 encoding and some groups in effort > 3)
     if !uses_wp {
-        if let Some(gl) = make_gradient_lut(&pruned_tree, &tree.histograms) {
-            return Ok(TreeSpecialCase::GradientLookup(gl));
+        // Try config 420 specialized gradient LUT version (fast path for effort 2 encoded images)
+        if let Some(gl) = make_gradient_lut_config_420(&pruned_tree, &tree.histograms) {
+            return Ok(TreeSpecialCase::GradientLookupConfig420(gl));
         }
         return Ok(TreeSpecialCase::NoWp(NoWpTree::new(
             pruned_tree,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
index 9537774e4f530..c5d665f328b9e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
@@ -3,7 +3,13 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-use std::{cmp::min, fmt::Debug};
+use std::{
+    cmp::min,
+    collections::{BTreeMap, BTreeSet},
+    fmt::Debug,
+    ops::Range,
+    sync::atomic::{AtomicUsize, Ordering},
+};

 use crate::{
     bit_reader::BitReader,
@@ -14,11 +20,13 @@ use crate::{
         quantizer::{self, LfQuantFactors, QuantizerParams},
     },
     headers::{
-        ImageMetadata, JxlHeader, bit_depth::BitDepth, frame_header::FrameHeader,
-        modular::GroupHeader,
+        ImageMetadata, JxlHeader,
+        bit_depth::BitDepth,
+        frame_header::FrameHeader,
+        modular::{GroupHeader, TransformId},
     },
     image::{Image, Rect},
-    util::{AtomicRefCell, CeilLog2, tracing_wrappers::*},
+    util::{AtomicRefCell, CeilLog2, SmallVec, tracing_wrappers::*},
 };
 use jxl_transforms::transform_map::*;

@@ -41,8 +49,8 @@ const IMAGE_OFFSET: (usize, usize) = (2, 2);

 #[derive(Clone, PartialEq, Eq, Copy)]
 struct ChannelInfo {
-    // The index of the output channel in the render pipeline, or -1 for non-output channels.
-    output_channel_idx: isize,
+    // The index of the output channel in the render pipeline.
+    output_channel_idx: Option<usize>,
     // width, height
     size: (usize, usize),
     shift: Option<(usize, usize)>, // None for meta-channels
@@ -58,8 +66,8 @@ impl Debug for ChannelInfo {
             write!(f, "(meta)")?;
         }
         write!(f, "{:?}", self.bit_depth)?;
-        if self.output_channel_idx >= 0 {
-            write!(f, "(output channel {})", self.output_channel_idx)?;
+        if let Some(oc) = self.output_channel_idx {
+            write!(f, "(output channel {})", oc)?;
         }
         Ok(())
     }
@@ -162,7 +170,7 @@ impl ModularChannel {

     fn channel_info(&self) -> ChannelInfo {
         ChannelInfo {
-            output_channel_idx: -1,
+            output_channel_idx: None,
             size: self.data.size(),
             shift: self.shift,
             bit_depth: self.bit_depth,
@@ -170,6 +178,10 @@ impl ModularChannel {
     }
 }

+const BUFFER_STATUS_NOT_RENDERED: usize = 0;
+const BUFFER_STATUS_PARTIAL_RENDER: usize = 1;
+const BUFFER_STATUS_FINAL_RENDER: usize = 2;
+
 // Note: this type uses interior mutability to get mutable references to multiple buffers at once.
 // In principle, this is not needed, but the overhead should be minimal so using `unsafe` here is
 // probably not worth it.
@@ -177,34 +189,81 @@ impl ModularChannel {
 struct ModularBuffer {
     data: AtomicRefCell<Option<ModularChannel>>,
     // Number of times this buffer will be used, *including* when it is used for output.
-    remaining_uses: usize,
-    used_by_transforms: Vec<usize>,
+    remaining_uses: AtomicUsize,
+    // Transform steps that "strongly" or "weakly" use the image data in this buffer.
+    // A "strong" usage always triggers a re-render if the image data changes.
+    // A "weak" usage only triggers a re-render if the buffer is final, or if the
+    // current re-render was not only caused by weak re-renders.
+    used_by_transforms_strong: Vec<usize>,
+    used_by_transforms_weak: Vec<usize>,
     size: (usize, usize),
+    status: AtomicUsize,
 }

 impl ModularBuffer {
+    fn get_status(&self) -> usize {
+        self.status.load(Ordering::Relaxed)
+    }
+
+    fn set_status(&self, val: usize) {
+        self.status.store(val, Ordering::Relaxed);
+    }
+
+    // Iterator over (transform_id, is_strong_use)
+    fn users(&self, include_weak: bool) -> impl Iterator<Item = (usize, bool)> {
+        let strong = self.used_by_transforms_strong.iter().map(|x| (*x, true));
+        let weak = if include_weak {
+            &self.used_by_transforms_weak[..]
+        } else {
+            &[]
+        }
+        .iter()
+        .map(|x| (*x, false));
+        strong.chain(weak)
+    }
+
     // Gives out a copy of the buffer + auxiliary buffer, marking the buffer as used.
     // If this was the last usage of the buffer, does not actually copy the buffer.
-    fn get_buffer(&mut self) -> Result<ModularChannel> {
-        self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap();
-        if self.remaining_uses == 0 {
-            Ok(self.data.borrow_mut().take().unwrap())
-        } else {
-            Ok(self
-                .data
-                .borrow()
-                .as_ref()
-                .map(ModularChannel::try_clone)
-                .transpose()?
-                .unwrap())
+    fn get_buffer(&self, can_consume: bool) -> Result<ModularChannel> {
+        if !can_consume {
+            return ModularChannel::try_clone(self.data.borrow().as_ref().unwrap());
         }
+        let mut ret = None;
+        let _ = self.remaining_uses.fetch_update(
+            Ordering::Release,
+            Ordering::Acquire,
+            |remaining_pre| {
+                let remaining = remaining_pre.checked_sub(1).unwrap();
+                if ret.is_none() {
+                    if remaining == 0 {
+                        ret = Some(Ok(self.data.borrow_mut().take().unwrap()))
+                    } else {
+                        ret = self.data.borrow().as_ref().map(ModularChannel::try_clone);
+                    }
+                } else if remaining == 0 {
+                    *self.data.borrow_mut() = None;
+                }
+                Some(remaining)
+            },
+        );
+        Ok(ret.transpose()?.unwrap())
     }

-    fn mark_used(&mut self) {
-        self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap();
-        if self.remaining_uses == 0 {
-            *self.data.borrow_mut() = None;
+    fn mark_used(&self, can_consume: bool) {
+        if !can_consume {
+            return;
         }
+        let _ = self.remaining_uses.fetch_update(
+            Ordering::Release,
+            Ordering::Acquire,
+            |remaining_pre: usize| {
+                let remaining = remaining_pre.checked_sub(1).unwrap();
+                if remaining == 0 {
+                    *self.data.borrow_mut() = None;
+                }
+                Some(remaining)
+            },
+        );
     }
 }

@@ -237,6 +296,7 @@ impl ModularBufferInfo {
         };
         self.grid_shape.0 * grid_pos.1 + grid_pos.0
     }
+
     fn get_grid_rect(
         &self,
         frame_header: &FrameHeader,
@@ -296,15 +356,41 @@ pub struct FullModularImage {
     // In order, LfGlobal, LfGroup, HfGroup(pass 0), ..., HfGroup(last pass).
     section_buffer_indices: Vec<Vec<usize>>,
     modular_color_channels: usize,
+    can_do_partial_render: bool,
+    can_do_early_partial_render: bool,
+    decoded_section0_channels: usize,
+    needed_section0_channels_for_early_render: usize,
+    global_header: Option<GroupHeader>,
+    buffers_for_channels: Vec<usize>,
+    // Buffers to _start rendering from_ on the next call to process_output.
+    // This is initially set to LF global and LF buffers, and populated with HF buffers
+    // just before we start decoding them.
+    ready_buffers_dry_run: BTreeSet<(usize, usize)>,
+    ready_buffers: BTreeSet<(usize, usize)>,
+    // Whether each channel is used or not by the render pipeline.
+    pipeline_used_channels: Vec<bool>,
 }

 impl FullModularImage {
+    pub fn can_do_partial_render(&self) -> bool {
+        self.can_do_partial_render
+    }
+
+    pub fn can_do_early_partial_render(&self) -> bool {
+        self.can_do_early_partial_render
+            // Avoid green martians
+            && self.decoded_section0_channels >= self.needed_section0_channels_for_early_render
+    }
+
+    pub fn set_pipeline_used_channels(&mut self, used: &[bool]) {
+        self.pipeline_used_channels = used.to_vec();
+    }
+
     #[instrument(level = "debug", skip_all)]
     pub fn read(
         frame_header: &FrameHeader,
         image_metadata: &ImageMetadata,
         modular_color_channels: usize,
-        global_tree: &Option<Tree>,
         br: &mut BitReader,
     ) -> Result<Self> {
         let mut channels = vec![];
@@ -312,7 +398,7 @@ impl FullModularImage {
             let shift = (frame_header.hshift(c), frame_header.vshift(c));
             let size = frame_header.size();
             channels.push(ChannelInfo {
-                output_channel_idx: c as isize,
+                output_channel_idx: Some(c),
                 size: (size.0.div_ceil(1 << shift.0), size.1.div_ceil(1 << shift.1)),
                 shift: Some(shift),
                 bit_depth: image_metadata.bit_depth,
@@ -332,7 +418,7 @@ impl FullModularImage {
                 size.1.div_ceil(*ecups as usize),
             );
             channels.push(ChannelInfo {
-                output_channel_idx: 3 + idx as isize,
+                output_channel_idx: Some(3 + idx),
                 size,
                 shift: Some((shift, shift)),
                 bit_depth: image_metadata.bit_depth,
@@ -350,12 +436,33 @@ impl FullModularImage {
                 transform_steps: vec![],
                 section_buffer_indices: vec![vec![]; 2 + frame_header.passes.num_passes as usize],
                 modular_color_channels,
+                can_do_partial_render: true,
+                can_do_early_partial_render: false,
+                decoded_section0_channels: 0,
+                needed_section0_channels_for_early_render: 0,
+                global_header: None,
+                buffers_for_channels: vec![],
+                ready_buffers_dry_run: BTreeSet::new(),
+                ready_buffers: BTreeSet::new(),
+                pipeline_used_channels: vec![],
             });
         }

         trace!("reading modular header");
         let header = GroupHeader::read(br)?;

+        // Disallow progressive rendering with multi-channel palette transforms
+        // or delta-palette.
+        let has_problematic_palette_transform = header.transforms.iter().any(|x| {
+            x.id == TransformId::Palette
+                && (x.num_channels > 1 || x.predictor_id != Predictor::Zero as u32)
+        });
+
+        let has_squeeze_transform = header
+            .transforms
+            .iter()
+            .any(|x| x.id == TransformId::Squeeze);
+
         let (mut buffer_info, transform_steps) =
             transforms::apply::meta_apply_transforms(&channels, &header)?;

@@ -460,12 +567,13 @@ impl FullModularImage {
             );
             for (pos, buf) in bi.buffer_grid.iter().enumerate() {
                 trace!(
-                    "Channel {i} grid {pos} ({}, {})  size: {:?}, uses: {}, used_by: {:?}",
+                    "Channel {i} grid {pos} ({}, {})  size: {:?}, uses: {:?}, used_by: s {:?} w {:?}",
                     pos % bi.grid_shape.0,
                     pos / bi.grid_shape.0,
                     buf.size,
                     buf.remaining_uses,
-                    buf.used_by_transforms
+                    buf.used_by_transforms_strong,
+                    buf.used_by_transforms_weak,
                 );
             }
         }
@@ -475,24 +583,105 @@ impl FullModularImage {
             trace!("Transform {i}: {ts:?}");
         }

-        with_buffers(&buffer_info, &section_buffer_indices[0], 0, true, |bufs| {
-            decode_modular_subbitstream(
-                bufs,
-                ModularStreamId::GlobalData.get_id(frame_header),
-                Some(header),
-                global_tree,
-                br,
-            )
-        })?;
+        let mut buffers_for_channels = vec![];
+
+        for (i, c) in buffer_info.iter().enumerate() {
+            if let Some(c) = c.info.output_channel_idx {
+                if buffers_for_channels.len() <= c {
+                    buffers_for_channels.resize(c + 1, 0);
+                }
+                buffers_for_channels[c] = i;
+            }
+        }
+
+        let num_meta_channels = buffer_info
+            .iter()
+            .filter(|b| b.coded_channel_id >= 0 && b.info.is_meta())
+            .count();

         Ok(FullModularImage {
             buffer_info,
             transform_steps,
             section_buffer_indices,
             modular_color_channels,
+            can_do_partial_render: !has_problematic_palette_transform,
+            can_do_early_partial_render: !has_problematic_palette_transform
+                && has_squeeze_transform,
+            decoded_section0_channels: 0,
+            needed_section0_channels_for_early_render: buffers_for_channels.len()
+                + num_meta_channels,
+            global_header: Some(header),
+            buffers_for_channels,
+            ready_buffers_dry_run: BTreeSet::new(),
+            ready_buffers: BTreeSet::new(),
+            pipeline_used_channels: vec![],
         })
     }

+    pub fn read_section0(
+        &mut self,
+        frame_header: &FrameHeader,
+        global_tree: &Option<Tree>,
+        br: &mut BitReader,
+        allow_partial: bool,
+    ) -> Result<()> {
+        let mut decoded_if_partial = 0;
+        let ret = with_buffers(
+            &self.buffer_info,
+            &self.section_buffer_indices[0],
+            0,
+            |bufs| {
+                decode_modular_subbitstream(
+                    bufs,
+                    ModularStreamId::GlobalData.get_id(frame_header),
+                    self.global_header.clone(),
+                    global_tree,
+                    br,
+                    Some(&mut decoded_if_partial),
+                )
+            },
+        );
+
+        match (ret, allow_partial) {
+            (Ok(_), _) => {
+                // Decoded section completely.
+                self.decoded_section0_channels = self.section_buffer_indices[0].len();
+            }
+            (Err(_), true) => {
+                self.decoded_section0_channels = decoded_if_partial;
+            }
+            (Err(e), false) => {
+                return Err(e);
+            }
+        }
+
+        for b in self.section_buffer_indices[0]
+            .iter()
+            .take(self.decoded_section0_channels)
+        {
+            if self.buffer_info[*b].buffer_grid[0].get_status() == BUFFER_STATUS_FINAL_RENDER {
+                continue;
+            }
+            // If we did a partial decode, we cannot be 100% sure of whether we correctly
+            // decoded all the sections. Thus, mark the sections as partially decoded.
+            self.buffer_info[*b].buffer_grid[0].set_status(if allow_partial {
+                BUFFER_STATUS_PARTIAL_RENDER
+            } else {
+                BUFFER_STATUS_FINAL_RENDER
+            });
+            self.ready_buffers_dry_run.insert((*b, 0));
+        }
+
+        Ok(())
+    }
+
+    pub fn mark_group_to_be_read(&mut self, section_id: usize, group: usize) {
+        for b in self.section_buffer_indices[section_id].iter() {
+            self.buffer_info[*b].buffer_grid[group].set_status(BUFFER_STATUS_FINAL_RENDER);
+            self.ready_buffers_dry_run.insert((*b, group));
+        }
+    }
+
     #[allow(clippy::type_complexity)]
     #[instrument(level = "debug", skip(self, frame_header, global_tree, br), ret)]
     pub fn read_stream(
@@ -520,7 +709,6 @@ impl FullModularImage {
             &self.buffer_info,
             &self.section_buffer_indices[section_id],
             grid,
-            true,
             |bufs| {
                 decode_modular_subbitstream(
                     bufs,
@@ -528,60 +716,247 @@ impl FullModularImage {
                     None,
                     global_tree,
                     br,
-                )
+                    None,
+                )?;
+                Ok(())
             },
         )?;
+
+        Ok(())
+    }
+
+    fn maybe_output(
+        &self,
+        buf: usize,
+        grid: usize,
+        dry_run: bool,
+        pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>,
+    ) -> Result<()> {
+        if let Some(chan) = self.buffer_info[buf].info.output_channel_idx {
+            let is_final =
+                self.buffer_info[buf].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER;
+            let all_final = self.buffers_for_channels.iter().all(|x| {
+                self.buffer_info[*x].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER
+            });
+            let channels: SmallVec<usize, 3> = if chan == 0 && self.modular_color_channels == 1 {
+                (0..3).filter(|x| self.pipeline_used_channels[*x]).collect()
+            } else {
+                self.pipeline_used_channels[chan]
+                    .then_some(chan)
+                    .into_iter()
+                    .collect()
+            };
+            if channels.is_empty() {
+                return Ok(());
+            }
+            if dry_run {
+                for c in channels.iter() {
+                    pass_to_pipeline(*c, grid, is_final, None)?;
+                }
+            } else {
+                debug!("Rendering channel {chan:?}, grid position {grid}");
+                let buf = self.buffer_info[buf].buffer_grid[grid].get_buffer(all_final)?;
+                for c in channels[1..].iter() {
+                    pass_to_pipeline(*c, grid, is_final, Some(buf.data.try_clone()?))?;
+                }
+                pass_to_pipeline(channels[0], grid, is_final, Some(buf.data))?;
+            }
+        }
         Ok(())
     }

+    // If `dry_run` is true, this call does not modify any state, and the calls to `pass_to_pipeline`
+    // will have None as an image. Otherwise, the image will always be `Some(..)`.
+    // It is *required* to do a dry run before doing an actual run after any event that might have
+    // readied some buffers.
     pub fn process_output(
         &mut self,
-        section_id: usize,
-        grid: usize,
         frame_header: &FrameHeader,
-        pass_to_pipeline: &mut dyn FnMut(usize, usize, usize, Image<i32>) -> Result<()>,
+        dry_run: bool,
+        pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>,
     ) -> Result<()> {
-        let mut maybe_output = |bi: &mut ModularBufferInfo, grid: usize| -> Result<()> {
-            if bi.info.output_channel_idx >= 0 {
-                let chan = bi.info.output_channel_idx as usize;
-                debug!("Rendering channel {chan:?}, grid position {grid}");
-                let buf = bi.buffer_grid[grid].get_buffer()?;
-                // TODO(veluca): figure out what to do with passes here.
-                if chan == 0 && self.modular_color_channels == 1 {
-                    for i in 0..2 {
-                        pass_to_pipeline(i, grid, 1, buf.data.try_clone()?)?;
-                    }
-                    pass_to_pipeline(2, grid, 1, buf.data)?;
+        // TODO(veluca): consider using `used_channel_mask` to avoid running transforms that produce
+        // channels that are not used.
+
+        // layer -> (transform -> is_strong)
+        let mut to_process_by_layer = BTreeMap::<usize, BTreeMap<usize, bool>>::new();
+        let mut buffers_to_output = vec![];
+
+        let ready_buffers = if dry_run {
+            std::mem::take(&mut self.ready_buffers_dry_run)
+        } else {
+            assert!(self.ready_buffers_dry_run.is_empty());
+            std::mem::take(&mut self.ready_buffers)
+        };
+
+        for (buf, grid) in ready_buffers {
+            if self.buffer_info[buf].info.output_channel_idx.is_some() {
+                buffers_to_output.push((buf, grid));
+            }
+            for (t, is_strong_dep) in self.buffer_info[buf].buffer_grid[grid].users(true) {
+                let layer = self.transform_steps[t].layer;
+                let layer = to_process_by_layer.entry(layer).or_default();
+                let is_strong = layer.entry(t).or_default();
+                *is_strong |= is_strong_dep;
+            }
+            if dry_run {
+                self.ready_buffers.insert((buf, grid));
+            }
+        }
+
+        // When doing a dry run, run the same logic as the real execution, but
+        // without modifying the actual buffer status -- instead, we use local
+        // overrides.
+        // This allows us to know what buffers will be produced before producing any.
+        let mut status_overrides = BTreeMap::new();
+
+        let get_status =
+            |status_overrides: &mut BTreeMap<(usize, usize), usize>, b: usize, g: usize| {
+                if let Some(s) = status_overrides.get(&(b, g)) {
+                    *s
                 } else {
-                    pass_to_pipeline(chan, grid, 1, buf.data)?;
+                    self.buffer_info[b].buffer_grid[g].get_status()
+                }
+            };
+
+        let mut new_dirty_transforms = vec![];
+        while let Some((_, transforms)) = to_process_by_layer.pop_first() {
+            trace!("{transforms:?}");
+            for (t, is_strong) in transforms {
+                let tfm = &self.transform_steps[t];
+                trace!("{:?}", tfm);
+
+                let dependency_status = tfm
+                    .deps
+                    .iter()
+                    .map(|(b, g)| get_status(&mut status_overrides, *b, *g))
+                    .min()
+                    .unwrap_or(BUFFER_STATUS_FINAL_RENDER);
+
+                if dependency_status == BUFFER_STATUS_NOT_RENDERED {
+                    continue;
+                }
+                let is_final = dependency_status == BUFFER_STATUS_FINAL_RENDER;
+
+                let mut previous_output_status = None;
+                for (b, g) in tfm.outputs(&self.buffer_info) {
+                    let status = get_status(&mut status_overrides, b, g);
+                    if previous_output_status.is_none() {
+                        previous_output_status = Some(status);
+                    }
+                    assert_eq!(Some(status), previous_output_status);
+                    if dry_run {
+                        status_overrides.insert((b, g), dependency_status);
+                    } else {
+                        self.buffer_info[b].buffer_grid[g].set_status(dependency_status);
+                    }
+                }
+                let previous_output_status = previous_output_status.unwrap();
+
+                if !dry_run {
+                    tfm.do_run(frame_header, &self.buffer_info, is_final)?;
+                }
+
+                // If this was the first _or_ the last render, trigger a re-render across weak edges
+                // even if the render was caused by a weak edge.
+                // This is necessary to finish drawing those renders correctly.
+                let is_strong = is_strong
+                    || (previous_output_status == BUFFER_STATUS_NOT_RENDERED
+                        || dependency_status == BUFFER_STATUS_FINAL_RENDER);
+                for (buf, grid) in self.transform_steps[t].outputs(&self.buffer_info) {
+                    if self.buffer_info[buf].info.output_channel_idx.is_some() {
+                        buffers_to_output.push((buf, grid));
+                    }
+                    for (t, is_strong_dep) in
+                        self.buffer_info[buf].buffer_grid[grid].users(is_strong)
+                    {
+                        new_dirty_transforms.push((t, is_strong_dep));
+                    }
+                }
+            }
+
+            for (t, is_strong_dep) in new_dirty_transforms.drain(..) {
+                let layer = self.transform_steps[t].layer;
+                let layer = to_process_by_layer.entry(layer).or_default();
+                let is_strong = layer.entry(t).or_default();
+                *is_strong |= is_strong_dep;
+            }
+        }
+
+        // Pass all the output buffers to the render pipeline.
+        for (buf, grid) in buffers_to_output {
+            self.maybe_output(buf, grid, dry_run, pass_to_pipeline)?;
+        }
+
+        Ok(())
+    }
+
+    pub fn channel_range(&self) -> Range<usize> {
+        if self.modular_color_channels != 0 {
+            0..self.buffers_for_channels.len()
+        } else {
+            // VarDCT image.
+            3..self.buffers_for_channels.len()
+        }
+    }
+
+    pub fn flush_output(
+        &mut self,
+        group: usize,
+        chan: usize,
+        pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Image<i32>) -> Result<()>,
+    ) -> Result<()> {
+        if !self.can_do_partial_render() {
+            return Ok(());
+        }
+        let buf_idx = self.buffers_for_channels[chan];
+        // Skip channels that don't have a real buffer assignment.
+        // buffers_for_channels is zero-filled on resize, so intermediate channels
+        // (e.g. G/B when modular_color_channels==1) may alias buffer 0 incorrectly.
+        if self.buffer_info[buf_idx].info.output_channel_idx != Some(chan) {
+            return Ok(());
+        }
+        self.maybe_output(buf_idx, group, false, &mut |chan, grid, complete, img| {
+            pass_to_pipeline(chan, grid, complete, img.unwrap())
+        })
+    }
+
+    pub fn zero_fill_empty_channels(
+        &mut self,
+        num_passes: usize,
+        num_groups: usize,
+        num_lf_groups: usize,
+    ) -> Result<()> {
+        if !self.can_do_partial_render() {
+            return Ok(());
+        }
+        if self.buffer_info.is_empty() {
+            return Ok(());
+        }
+        let mut fill_buffer = |section: usize, grid| -> Result<()> {
+            // TODO(veluca): consider filling these buffers with placeholders instead of real images.
+            with_buffers(
+                &self.buffer_info,
+                &self.section_buffer_indices[section],
+                grid,
+                |_| Ok(()),
+            )?;
+            for b in self.section_buffer_indices[section].iter() {
+                if self.buffer_info[*b].buffer_grid[grid].get_status() == BUFFER_STATUS_NOT_RENDERED
+                {
+                    self.buffer_info[*b].buffer_grid[grid].set_status(BUFFER_STATUS_PARTIAL_RENDER);
+                    self.ready_buffers.insert((*b, grid));
                 }
             }
             Ok(())
         };
-
-        let mut new_ready_transform_chunks = vec![];
-        for buf in self.section_buffer_indices[section_id].iter().copied() {
-            maybe_output(&mut self.buffer_info[buf], grid)?;
-            let new_chunks = self.buffer_info[buf].buffer_grid[grid]
-                .used_by_transforms
-                .to_vec();
-            trace!("Buffer {buf} grid position {grid} used by chunks {new_chunks:?}");
-            new_ready_transform_chunks.extend(new_chunks);
-        }
-
-        trace!(?new_ready_transform_chunks);
-
-        while let Some(tfm) = new_ready_transform_chunks.pop() {
-            trace!("tfm = {tfm} chunk = {:?}", self.transform_steps[tfm]);
-            for (new_buf, new_grid) in
-                self.transform_steps[tfm].dep_ready(frame_header, &mut self.buffer_info)?
-            {
-                maybe_output(&mut self.buffer_info[new_buf], new_grid)?;
-                let new_chunks = self.buffer_info[new_buf].buffer_grid[new_grid]
-                    .used_by_transforms
-                    .to_vec();
-                trace!("Buffer {new_buf} grid position {new_grid} used by chunks {new_chunks:?}");
-                new_ready_transform_chunks.extend(new_chunks);
+        fill_buffer(0, 0)?;
+        for grid in 0..num_lf_groups {
+            fill_buffer(1, grid)?;
+        }
+        for pass in 0..num_passes {
+            for grid in 0..num_groups {
+                fill_buffer(2 + pass, grid)?;
             }
         }

@@ -733,6 +1108,7 @@ pub fn decode_vardct_lf(
         None,
         global_tree,
         br,
+        None,
     )?;
     dequant_lf(
         r,
@@ -780,6 +1156,7 @@ pub fn decode_hf_metadata(
         None,
         global_tree,
         br,
+        None,
     )?;
     let ytox_image = &buffers[0].data;
     let ytob_image = &buffers[1].data;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
index 5f1b631370267..2c74441aa0f00 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
@@ -56,40 +56,44 @@ pub enum TransformStep {
 #[derive(Debug)]
 pub struct TransformStepChunk {
     pub(super) step: TransformStep,
+
     // Grid position this transform should produce.
     // Note that this is a lie for Palette with AverageAll or Weighted, as the transform with
     // position (0, y) will produce the entire row of blocks (*, y) (and there will be no
     // transforms with position (x, y) with x > 0).
     pub(super) grid_pos: (usize, usize),
-    // Number of inputs that are not yet available.
-    pub(super) incomplete_deps: usize,
+
+    // List of (buffer, grid) that this transform depends on.
+    pub(in super::super) deps: Vec<(usize, usize)>,
+
+    // Processing layer that this transform belongs to. Layer 0 are transforms
+    // that only depend on coded channels, layer 1 are transforms that only
+    // depend on coded channels and layer 0 outputs, etc. Since transforms
+    // in the same layer have no inter-dependencies, they can be run at the
+    // same time.
+    pub(in super::super) layer: usize,
 }

 impl TransformStepChunk {
-    // Marks that one dependency of this transform is ready, and potentially runs the transform,
-    // returning the new buffers that are now ready.
-    #[instrument(level = "trace", skip_all)]
-    pub fn dep_ready(
-        &mut self,
-        frame_header: &FrameHeader,
-        buffers: &mut [ModularBufferInfo],
-    ) -> Result<Vec<(usize, usize)>> {
-        self.incomplete_deps = self.incomplete_deps.checked_sub(1).unwrap();
-        if self.incomplete_deps > 0 {
-            trace!(
-                "skipping transform chunk because incomplete_deps = {}",
-                self.incomplete_deps
-            );
-            return Ok(vec![]);
-        }
-        let buf_out: &[usize] = match &self.step {
+    fn buf_out(&self) -> &[usize] {
+        match &self.step {
             TransformStep::Rct { buf_out, .. } => buf_out,
             TransformStep::Palette { buf_out, .. } => buf_out,
             TransformStep::HSqueeze { buf_out, .. } | TransformStep::VSqueeze { buf_out, .. } => {
-                &[*buf_out]
+                std::slice::from_ref(buf_out)
             }
-        };
+        }
+    }

+    // Runs this transform. This function *will* crash if the transform is not ready.
+    #[instrument(level = "trace", skip_all)]
+    pub fn do_run(
+        &self,
+        frame_header: &FrameHeader,
+        buffers: &[ModularBufferInfo],
+        is_final: bool,
+    ) -> Result<()> {
+        let buf_out = self.buf_out();
         let out_grid_kind = buffers[buf_out[0]].grid_kind;
         let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos);
         let out_size = buffers[buf_out[0]].info.size;
@@ -112,13 +116,12 @@ impl TransformStepChunk {
                     // If not, creates buffers in the output that are a copy of the input buffers.
                     // This should be rare.
                     *buffers[buf_out[i]].buffer_grid[out_grid].data.borrow_mut() =
-                        Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer()?);
+                        Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer(is_final)?);
                 }
-                with_buffers(buffers, buf_out, out_grid, false, |mut bufs| {
+                with_buffers(buffers, buf_out, out_grid, |mut bufs| {
                     super::rct::do_rct_step(&mut bufs, *op, *perm);
                     Ok(())
                 })?;
-                Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
             }
             TransformStep::Palette {
                 buf_in,
@@ -127,10 +130,9 @@ impl TransformStepChunk {
                 ..
             } if buffers[*buf_in].info.size.0 == 0 => {
                 // Nothing to do, just bookkeeping.
-                buffers[*buf_in].buffer_grid[out_grid].mark_used();
-                buffers[*buf_pal].buffer_grid[0].mark_used();
-                with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?;
-                Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
+                buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final);
+                buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
+                with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?;
             }
             TransformStep::Palette {
                 buf_in,
@@ -155,7 +157,7 @@ impl TransformStepChunk {
                         });
                     // Ensure that the output buffers are present.
                     // TODO(szabadka): Extend the callback to support many grid points.
-                    with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?;
+                    with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?;
                     let grid_shape = buffers[buf_out[0]].grid_shape;
                     let grid_x = out_grid % grid_shape.0;
                     let grid_y = out_grid / grid_shape.0;
@@ -191,9 +193,8 @@ impl TransformStepChunk {
                         *predictor,
                     );
                 }
-                buffers[*buf_in].buffer_grid[out_grid].mark_used();
-                buffers[*buf_pal].buffer_grid[0].mark_used();
-                Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
+                buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final);
+                buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
             }
             TransformStep::Palette {
                 buf_in,
@@ -206,7 +207,6 @@ impl TransformStepChunk {
             } => {
                 assert_eq!(out_grid_kind, buffers[*buf_in].grid_kind);
                 assert_eq!(out_size, buffers[*buf_in].info.size);
-                let mut generated_chunks = Vec::<(usize, usize)>::new();
                 let grid_shape = buffers[buf_out[0]].grid_shape;
                 {
                     assert_eq!(out_grid % grid_shape.0, 0);
@@ -222,7 +222,7 @@ impl TransformStepChunk {
                         ));
                         // Ensure that the output buffers are present.
                         // TODO(szabadka): Extend the callback to support many grid points.
-                        with_buffers(buffers, buf_out, out_grid + grid_x, false, |_| Ok(()))?;
+                        with_buffers(buffers, buf_out, out_grid + grid_x, |_| Ok(()))?;
                     }
                     let in_buf_refs: Vec<&ModularChannel> =
                         in_bufs.iter().map(|x| x.deref()).collect();
@@ -256,14 +256,10 @@ impl TransformStepChunk {
                         wp_header,
                     )?;
                 }
-                buffers[*buf_pal].buffer_grid[0].mark_used();
+                buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
                 for grid_x in 0..grid_shape.0 {
-                    buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used();
-                    for buf in buf_out {
-                        generated_chunks.push((*buf, out_grid + grid_x));
-                    }
+                    buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used(is_final);
                 }
-                Ok(generated_chunks)
             }
             TransformStep::HSqueeze { buf_in, buf_out } => {
                 let buf_avg = &buffers[buf_in[0]];
@@ -309,7 +305,7 @@ impl TransformStepChunk {
                         ))
                     };

-                    with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| {
+                    with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| {
                         super::squeeze::do_hsqueeze_step(
                             &in_avg.data.get_rect(buf_avg.get_grid_rect(
                                 frame_header,
@@ -328,9 +324,8 @@ impl TransformStepChunk {
                         Ok(())
                     })?;
                 }
-                buffers[buf_in[0]].buffer_grid[in_grid].mark_used();
-                buffers[buf_in[1]].buffer_grid[res_grid].mark_used();
-                Ok(vec![(*buf_out, out_grid)])
+                buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final);
+                buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final);
             }
             TransformStep::VSqueeze { buf_in, buf_out } => {
                 let buf_avg = &buffers[buf_in[0]];
@@ -379,7 +374,7 @@ impl TransformStepChunk {
                         buf_avg.get_grid_rect(frame_header, out_grid_kind, (gx, gy));
                     let res_grid_rect =
                         buf_res.get_grid_rect(frame_header, out_grid_kind, (gx, gy));
-                    with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| {
+                    with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| {
                         super::squeeze::do_vsqueeze_step(
                             &in_avg.data.get_rect(avg_grid_rect),
                             &in_res.data.get_rect(res_grid_rect),
@@ -390,11 +385,34 @@ impl TransformStepChunk {
                         Ok(())
                     })?;
                 }
-                buffers[buf_in[0]].buffer_grid[in_grid].mark_used();
-                buffers[buf_in[1]].buffer_grid[res_grid].mark_used();
-                Ok(vec![(*buf_out, out_grid)])
+                buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final);
+                buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final);
             }
-        }
+        };
+
+        Ok(())
+    }
+
+    // Iterates over the list of outputs for this transform.
+    pub fn outputs(&self, buffers: &[ModularBufferInfo]) -> impl Iterator<Item = (usize, usize)> {
+        let buf_out = self.buf_out();
+        let out_grid_kind = buffers[buf_out[0]].grid_kind;
+        let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos);
+        let grid_offset_up = match &self.step {
+            TransformStep::Palette {
+                buf_in,
+                buf_out,
+                predictor,
+                ..
+            } if buffers[*buf_in].info.size.0 != 0 && predictor.requires_full_row() => {
+                buffers[buf_out[0]].grid_shape.0
+            }
+            _ => 1,
+        };
+
+        buf_out
+            .iter()
+            .flat_map(move |x| (0..grid_offset_up).map(move |y| (*x, out_grid + y)))
     }
 }

@@ -445,7 +463,7 @@ fn meta_apply_single_transform(
             for i in 0..3 {
                 let c = &mut channels[begin_channel + i];
                 let mut info = c.1;
-                info.output_channel_idx = -1;
+                info.output_channel_idx = None;
                 c.0 = add_transform_buffer(
                     info,
                     format!(
@@ -503,7 +521,7 @@ fn meta_apply_single_transform(
                         ((w, h.div_ceil(2)), (w, h - h.div_ceil(2)))
                     };
                     let new_0 = ChannelInfo {
-                        output_channel_idx: -1,
+                        output_channel_idx: None,
                         shift: new_shift,
                         size: new_size_0,
                         bit_depth: chan.bit_depth,
@@ -513,7 +531,7 @@ fn meta_apply_single_transform(
                         format!("Squeezed channel, original channel {}", begin_channel + ic),
                     );
                     let new_1 = ChannelInfo {
-                        output_channel_idx: -1,
+                        output_channel_idx: None,
                         shift: new_shift,
                         size: new_size_1,
                         bit_depth: chan.bit_depth,
@@ -551,7 +569,7 @@ fn meta_apply_single_transform(
             // equal in the line above.
             let bit_depth = channels[begin_channel].1.bit_depth;
             let pchan_info = ChannelInfo {
-                output_channel_idx: -1,
+                output_channel_idx: None,
                 shift: None,
                 size: (num_colors + num_deltas, num_channels),
                 bit_depth,
@@ -564,7 +582,7 @@ fn meta_apply_single_transform(
                 ),
             );
             let mut inchan_info = channels[begin_channel].1;
-            inchan_info.output_channel_idx = -1;
+            inchan_info.output_channel_idx = None;
             let inchan = add_transform_buffer(
                 inchan_info,
                 format!(
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
index f1de5be25ec1a..6be51c221ed5c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
@@ -3,10 +3,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
 use apply::TransformStep;
 pub use apply::TransformStepChunk;
 use num_derive::FromPrimitive;

+use crate::frame::modular::BUFFER_STATUS_NOT_RENDERED;
 use crate::frame::modular::ModularBuffer;
 use crate::headers::frame_header::FrameHeader;
 use crate::util::AtomicRefCell;
@@ -112,15 +116,17 @@ pub fn make_grids(

     // Create grids.
     for g in buffer_info.iter_mut() {
-        let is_output = g.info.output_channel_idx >= 0;
+        let is_output = g.info.output_channel_idx.is_some();
         g.buffer_grid = get_grid_indices(g.grid_shape)
             .map(|(x, y)| ModularBuffer {
                 data: AtomicRefCell::new(None),
-                remaining_uses: if is_output { 1 } else { 0 },
-                used_by_transforms: vec![],
+                remaining_uses: AtomicUsize::new(if is_output { 1 } else { 0 }),
+                used_by_transforms_weak: vec![],
+                used_by_transforms_strong: vec![],
                 size: g
                     .get_grid_rect(frame_header, g.grid_kind, (x as usize, y as usize))
                     .size,
+                status: AtomicUsize::new(BUFFER_STATUS_NOT_RENDERED),
             })
             .collect();
     }
@@ -135,7 +141,8 @@ pub fn make_grids(
             grid_transform_steps.push(TransformStepChunk {
                 step: transform.clone(),
                 grid_pos: (grid_pos.0 as usize, grid_pos.1 as usize),
-                incomplete_deps: 0,
+                deps: vec![],
+                layer: 0,
             });
             ts
         };
@@ -145,6 +152,7 @@ pub fn make_grids(
                         output_grid_kind: ModularGridKind,
                         output_grid_shape: (usize, usize),
                         output_grid_pos: (isize, isize),
+                        is_weak: bool,
                         grid_transform_steps: &mut Vec<TransformStepChunk>,
                         buffer_info: &mut Vec<ModularBufferInfo>| {
         let output_grid_size = (output_grid_shape.0 as isize, output_grid_shape.1 as isize);
@@ -159,15 +167,19 @@ pub fn make_grids(
         let output_grid_pos = (output_grid_pos.0 as usize, output_grid_pos.1 as usize);
         let input_grid_pos =
             buffer_info[input_buffer_idx].get_grid_idx(output_grid_kind, output_grid_pos);
-        if !buffer_info[input_buffer_idx].buffer_grid[input_grid_pos]
-            .used_by_transforms
-            .contains(&ts)
+        let grid = &mut buffer_info[input_buffer_idx].buffer_grid[input_grid_pos];
+        if !grid.used_by_transforms_weak.contains(&ts)
+            && !grid.used_by_transforms_strong.contains(&ts)
         {
-            buffer_info[input_buffer_idx].buffer_grid[input_grid_pos].remaining_uses += 1;
-            buffer_info[input_buffer_idx].buffer_grid[input_grid_pos]
-                .used_by_transforms
-                .push(ts);
-            grid_transform_steps[ts].incomplete_deps += 1;
+            grid.remaining_uses.fetch_add(1, Ordering::Relaxed);
+            grid_transform_steps[ts]
+                .deps
+                .push((input_buffer_idx, input_grid_pos));
+            if is_weak {
+                grid.used_by_transforms_weak.push(ts);
+            } else {
+                grid.used_by_transforms_strong.push(ts);
+            }
         }
     };

@@ -191,6 +203,7 @@ pub fn make_grids(
                             out_kind,
                             out_shape,
                             (x, y),
+                            false,
                             &mut grid_transform_steps,
                             buffer_info,
                         );
@@ -219,6 +232,7 @@ pub fn make_grids(
                             out_kind,
                             out_shape,
                             (x, y),
+                            false,
                             &mut grid_transform_steps,
                             buffer_info,
                         );
@@ -229,6 +243,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x, y),
+                        false,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -239,6 +254,7 @@ pub fn make_grids(
                             out_kind,
                             out_shape,
                             (x, y - 1),
+                            false,
                             &mut grid_transform_steps,
                             buffer_info,
                         );
@@ -265,6 +281,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x, y),
+                        false,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -274,6 +291,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x, y),
+                        false,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -289,6 +307,7 @@ pub fn make_grids(
                                 out_kind,
                                 out_shape,
                                 (x + dx, y + dy),
+                                false,
                                 &mut grid_transform_steps,
                                 buffer_info,
                             );
@@ -309,6 +328,7 @@ pub fn make_grids(
                             out_kind,
                             out_shape,
                             (x, y),
+                            false,
                             &mut grid_transform_steps,
                             buffer_info,
                         );
@@ -320,6 +340,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x + 1, y),
+                        true,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -330,6 +351,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x - 1, y),
+                        true,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -348,6 +370,7 @@ pub fn make_grids(
                             out_kind,
                             out_shape,
                             (x, y),
+                            false,
                             &mut grid_transform_steps,
                             buffer_info,
                         );
@@ -359,6 +382,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x, y + 1),
+                        true,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -369,6 +393,7 @@ pub fn make_grids(
                         out_kind,
                         out_shape,
                         (x, y - 1),
+                        true,
                         &mut grid_transform_steps,
                         buffer_info,
                     );
@@ -377,6 +402,46 @@ pub fn make_grids(
         }
     }

+    // Compute the layer of each transform step.
+    // TODO(veluca): for parallelization purposes, it might make sense to try to ensure that
+    // transforms in the same layer are as similar in runtime as possible.
+    let mut transforms_needed_by = vec![vec![]; grid_transform_steps.len()];
+    let mut enabled_transforms = vec![vec![]; grid_transform_steps.len()];
+    for (i, s) in grid_transform_steps.iter().enumerate() {
+        for (b, g) in s.outputs(buffer_info) {
+            for (t, _) in buffer_info[b].buffer_grid[g].users(true) {
+                transforms_needed_by[t].push(i);
+                enabled_transforms[i].push(t);
+            }
+        }
+    }
+
+    let mut missing_prerequisites: Vec<_> = transforms_needed_by.iter().map(|x| x.len()).collect();
+
+    let mut stack = vec![];
+    for (i, m) in missing_prerequisites.iter().enumerate() {
+        if *m == 0 {
+            stack.push(i);
+        }
+    }
+
+    while let Some(i) = stack.pop() {
+        assert_eq!(missing_prerequisites[i], 0);
+        for e in enabled_transforms[i].iter() {
+            missing_prerequisites[*e] = missing_prerequisites[*e].checked_sub(1).unwrap();
+            if missing_prerequisites[*e] == 0 {
+                stack.push(*e);
+            }
+        }
+
+        grid_transform_steps[i].layer = transforms_needed_by[i]
+            .iter()
+            .map(|x| grid_transform_steps[*x].layer)
+            .max()
+            .unwrap_or(0)
+            + 1;
+    }
+
     trace!(?grid_transform_steps, ?buffer_info);

     grid_transform_steps
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
index 59730862b7061..b5f0022a95270 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
@@ -63,6 +63,124 @@ pub struct Tree {
     pub histograms: Histograms,
 }

+fn validate_tree(tree: &[TreeNode], num_properties: usize) -> Result<()> {
+    const HEIGHT_LIMIT: usize = 2048;
+
+    if tree.is_empty() {
+        return Ok(());
+    }
+
+    // This mirrors libjxl's ValidateTree(), but avoids allocating
+    // `num_properties * tree.len()` entries.
+    //
+    // We do an explicit DFS and keep the property ranges only for the current root->node path.
+    // When descending into a child we update exactly one property's range (the one we split on)
+    // and store the previous range in the child frame; when returning from that child we restore
+    // it. This makes memory O(num_properties + height) instead of O(num_properties * tree_size).
+
+    #[derive(Clone, Copy, Debug)]
+    enum Stage {
+        Enter,
+        AfterLeft,
+        AfterRight,
+    }
+
+    struct Frame {
+        node: usize,
+        depth: usize,
+        stage: Stage,
+        restore: Option<(usize, (i32, i32))>,
+    }
+
+    let mut property_ranges: Vec<(i32, i32)> = vec![(i32::MIN, i32::MAX); num_properties];
+    let mut stack = vec![Frame {
+        node: 0,
+        depth: 0,
+        stage: Stage::Enter,
+        restore: None,
+    }];
+
+    while let Some(mut frame) = stack.pop() {
+        if frame.depth > HEIGHT_LIMIT {
+            return Err(Error::TreeTooTall(frame.depth, HEIGHT_LIMIT));
+        }
+
+        match (frame.stage, tree[frame.node]) {
+            (Stage::Enter, TreeNode::Leaf { .. }) => {
+                if let Some((p, old)) = frame.restore {
+                    property_ranges[p] = old;
+                }
+            }
+            (
+                Stage::Enter,
+                TreeNode::Split {
+                    property,
+                    val,
+                    left,
+                    right: _,
+                },
+            ) => {
+                let p = property as usize;
+                let (l, u) = property_ranges[p];
+                if l > val || u <= val {
+                    return Err(Error::TreeSplitOnEmptyRange(property, val, l, u));
+                }
+
+                frame.stage = Stage::AfterLeft;
+                let depth = frame.depth;
+                stack.push(frame);
+
+                // Descend into left child: range becomes (val+1, u).
+                let old = property_ranges[p];
+                property_ranges[p] = (val + 1, u);
+                stack.push(Frame {
+                    node: left as usize,
+                    depth: depth + 1,
+                    stage: Stage::Enter,
+                    restore: Some((p, old)),
+                });
+            }
+            (
+                Stage::AfterLeft,
+                TreeNode::Split {
+                    property,
+                    val,
+                    left: _,
+                    right,
+                },
+            ) => {
+                let p = property as usize;
+                let (l, u) = property_ranges[p];
+                if l > val || u <= val {
+                    return Err(Error::TreeSplitOnEmptyRange(property, val, l, u));
+                }
+
+                frame.stage = Stage::AfterRight;
+                let depth = frame.depth;
+                stack.push(frame);
+
+                // Descend into right child: range becomes (l, val).
+                let old = property_ranges[p];
+                property_ranges[p] = (l, val);
+                stack.push(Frame {
+                    node: right as usize,
+                    depth: depth + 1,
+                    stage: Stage::Enter,
+                    restore: Some((p, old)),
+                });
+            }
+            (Stage::AfterRight, TreeNode::Split { .. }) => {
+                if let Some((p, old)) = frame.restore {
+                    property_ranges[p] = old;
+                }
+            }
+            _ => unreachable!("invalid tree validation state"),
+        }
+    }
+
+    Ok(())
+}
+
 impl Debug for Tree {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "Tree[{:?}]", self.nodes)
@@ -358,52 +476,7 @@ impl Tree {
         tree_reader.check_final_state(&tree_histograms, br)?;

         let num_properties = max_property as usize + 1;
-        let mut property_ranges = Vec::new_with_capacity(num_properties * tree.len())?;
-        property_ranges.resize(num_properties * tree.len(), (i32::MIN, i32::MAX));
-        let mut height = Vec::new_with_capacity(tree.len())?;
-        height.resize(tree.len(), 0);
-        for i in 0..tree.len() {
-            const HEIGHT_LIMIT: usize = 2048;
-            if height[i] > HEIGHT_LIMIT {
-                return Err(Error::TreeTooLarge(height[i], HEIGHT_LIMIT));
-            }
-            if let TreeNode::Split {
-                property,
-                val,
-                left,
-                right,
-            } = tree[i]
-            {
-                height[left as usize] = height[i] + 1;
-                height[right as usize] = height[i] + 1;
-                for p in 0..num_properties {
-                    if p == property as usize {
-                        let (l, u) = property_ranges[i * num_properties + p];
-                        if l > val || u <= val {
-                            return Err(Error::TreeSplitOnEmptyRange(p as u8, val, l, u));
-                        }
-                        trace!(
-                            "splitting at node {i} on property {p}, range [{l}, {u}] at position {val}"
-                        );
-                        property_ranges[left as usize * num_properties + p] = (val + 1, u);
-                        property_ranges[right as usize * num_properties + p] = (l, val);
-                    } else {
-                        property_ranges[left as usize * num_properties + p] =
-                            property_ranges[i * num_properties + p];
-                        property_ranges[right as usize * num_properties + p] =
-                            property_ranges[i * num_properties + p];
-                    }
-                }
-            } else {
-                #[cfg(feature = "tracing")]
-                {
-                    for p in 0..num_properties {
-                        let (l, u) = property_ranges[i * num_properties + p];
-                        trace!("final range at node {i} property {p}: [{l}, {u}]");
-                    }
-                }
-            }
-        }
+        validate_tree(&tree, num_properties)?;

         let histograms = Histograms::decode(tree.len().div_ceil(2), br, true)?;

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
index 7eb13c4ce2495..0bcbda39a020f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
@@ -254,6 +254,7 @@ impl QuantEncoding {
                     None,
                     &lf_global.tree,
                     br,
+                    None,
                 )?;
                 let mut qtable = Vec::with_capacity(required_size_x * required_size_y * 3);
                 for channel in image.iter_mut() {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
index 57413e811b57a..e1797e3c4fc7e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
@@ -13,12 +13,23 @@ use crate::{
 pub const NUM_QUANT_TABLES: usize = 17;
 pub const GLOBAL_SCALE_DENOM: usize = 1 << 16;

-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct LfQuantFactors {
     pub quant_factors: [f32; 3],
     pub inv_quant_factors: [f32; 3],
 }

+impl Default for LfQuantFactors {
+    fn default() -> Self {
+        let quant_factors = quant_weights::LF_QUANT;
+        let inv_quant_factors = quant_factors.map(f32::recip);
+        Self {
+            quant_factors,
+            inv_quant_factors,
+        }
+    }
+}
+
 impl LfQuantFactors {
     pub fn new(br: &mut BitReader) -> Result<LfQuantFactors> {
         let mut quant_factors = [0.0f32; 3];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
index c8803bf28e113..f419c9858c55d 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
@@ -12,16 +12,27 @@ use crate::api::JxlOutputBuffer;
 use crate::bit_reader::BitReader;
 use crate::error::{Error, Result};
 use crate::features::epf::SigmaSource;
+use crate::features::noise::Noise;
+use crate::features::patches::PatchesDictionary;
+use crate::features::spline::Splines;
+use crate::frame::RenderUnit;
+use crate::frame::color_correlation_map::ColorCorrelationParams;
+use crate::frame::quantizer::LfQuantFactors;
 use crate::headers::frame_header::Encoding;
+use crate::headers::frame_header::FrameType;
 use crate::headers::{Orientation, color_encoding::ColorSpace, extra_channels::ExtraChannel};
+use crate::image::Image;
 use crate::image::Rect;
+use crate::util::AtomicRefCell;
+use std::sync::Arc;
+
 #[cfg(test)]
 use crate::render::SimpleRenderPipeline;
 use crate::render::buffer_splitter::BufferSplitter;
 use crate::render::{LowMemoryRenderPipeline, RenderPipeline, RenderPipelineBuilder, stages::*};
 use crate::{
     api::JxlPixelFormat,
-    frame::{DecoderState, Frame, LfGlobalState},
+    frame::{DecoderState, Frame},
     headers::frame_header::FrameHeader,
 };

@@ -66,7 +77,7 @@ impl Frame {
         mut pipeline: RenderPipelineBuilder<P>,
         channels: &[usize],
         data_format: JxlDataFormat,
-    ) -> Result<RenderPipelineBuilder<P>> {
+    ) -> RenderPipelineBuilder<P> {
         use crate::render::stages::{
             ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage,
         };
@@ -75,24 +86,24 @@ impl Frame {
             JxlDataFormat::U8 { bit_depth } => {
                 for &channel in channels {
                     pipeline =
-                        pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth))?;
+                        pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth));
                 }
             }
             JxlDataFormat::U16 { bit_depth, .. } => {
                 for &channel in channels {
                     pipeline =
-                        pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth))?;
+                        pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth));
                 }
             }
             JxlDataFormat::F16 { .. } => {
                 for &channel in channels {
-                    pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel))?;
+                    pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel));
                 }
             }
             // F32 doesn't need conversion - the pipeline already uses f32
             JxlDataFormat::F32 { .. } => {}
         }
-        Ok(pipeline)
+        pipeline
     }

     /// Check if CMS will consume a black channel that the user requested in the output.
@@ -126,8 +137,10 @@ impl Frame {
         api_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>,
         pixel_format: &JxlPixelFormat,
         groups: Vec<(usize, Vec<(usize, BitReader)>)>,
+        do_flush: bool,
+        output_profile: &JxlColorProfile,
     ) -> Result<()> {
-        if self.render_pipeline.is_none() {
+        if self.render_pipeline.is_none() || self.lf_global.is_none() {
             assert_eq!(groups.iter().map(|x| x.1.len()).sum::<usize>(), 0);
             // We don't yet have any output ready (as the pipeline would be initialized otherwise),
             // so exit without doing anything.
@@ -194,41 +207,128 @@ impl Frame {

         pipeline!(self, p, p.render_outside_frame(&mut buffer_splitter)?);

-        // Render data from the lf global section, if we didn't do so already, before rendering HF.
-        if !self.lf_global_was_rendered {
-            self.lf_global_was_rendered = true;
-            let lf_global = self.lf_global.as_mut().unwrap();
-            let mut pass_to_pipeline = |chan, group, num_passes, image| {
+        let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+
+        modular_global.set_pipeline_used_channels(pipeline!(self, p, p.used_channel_mask()));
+
+        // STEP 1: if we are requesting a flush, and did not flush before, mark modular channels
+        // as having been decoded as 0.
+        if !self.was_flushed_once && do_flush {
+            self.was_flushed_once = true;
+            self.groups_to_flush.extend(0..self.header.num_groups());
+            modular_global.zero_fill_empty_channels(
+                self.header.passes.num_passes as usize,
+                self.header.num_groups(),
+                self.header.num_lf_groups(),
+            )?;
+        }
+
+        // STEP 2: ensure that groups that will be re-rendered are marked as such.
+        // VarDCT data to be rendered.
+        for (g, _) in groups.iter() {
+            self.groups_to_flush.insert(*g);
+            pipeline!(self, p, p.mark_group_to_rerender(*g));
+        }
+        // Modular data to be re-rendered.
+        {
+            let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+            for (group, passes) in groups.iter() {
+                for (pass, _) in passes.iter() {
+                    modular_global.mark_group_to_be_read(2 + *pass, *group);
+                }
+            }
+            let mut pass_to_pipeline = |_, group, _, _| {
+                self.groups_to_flush.insert(group);
+                pipeline!(self, p, p.mark_group_to_rerender(group));
+                Ok(())
+            };
+            modular_global.process_output(&self.header, true, &mut pass_to_pipeline)?;
+        }
+
+        // STEP 3: decode the groups, eagerly rendering VarDCT channels and noise.
+        for (group, mut passes) in groups {
+            if self.decode_hf_group(group, &mut passes, &mut buffer_splitter, do_flush)? {
+                self.changed_since_last_flush
+                    .insert((group, RenderUnit::VarDCT));
+            }
+        }
+
+        // STEP 4: process all modular transforms that can now be processed,
+        // flushing buffers that will not be used again, if either we are forcing a render now
+        // or we are done with the file.
+        if self.incomplete_groups == 0 || do_flush {
+            let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+            let mut pass_to_pipeline = |chan, group, complete, image: Option<Image<i32>>| {
+                self.changed_since_last_flush
+                    .insert((group, RenderUnit::Modular(chan)));
                 pipeline!(
                     self,
                     p,
-                    p.set_buffer_for_group(chan, group, num_passes, image, &mut buffer_splitter)?
+                    p.set_buffer_for_group(
+                        chan,
+                        group,
+                        complete,
+                        image.unwrap(),
+                        &mut buffer_splitter
+                    )?
                 );
                 Ok(())
             };
-            lf_global
-                .modular_global
-                .process_output(0, 0, &self.header, &mut pass_to_pipeline)?;
-            for group in 0..self.header.num_lf_groups() {
-                lf_global.modular_global.process_output(
-                    1,
-                    group,
-                    &self.header,
-                    &mut pass_to_pipeline,
-                )?;
+            modular_global.process_output(&self.header, false, &mut pass_to_pipeline)?;
+
+            // STEP 5: re-render VarDCT/noise data in rendered groups for which it was
+            // not rendered, or re-send to pipeline modular channels that were not
+            // updated in those groups.
+            for g in std::mem::take(&mut self.groups_to_flush) {
+                if self
+                    .changed_since_last_flush
+                    .take(&(g, RenderUnit::VarDCT))
+                    .is_none()
+                {
+                    self.decode_hf_group(g, &mut [], &mut buffer_splitter, true)?;
+                }
+                let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+                let mut pass_to_pipeline = |chan, group, complete, image| {
+                    pipeline!(
+                        self,
+                        p,
+                        p.set_buffer_for_group(chan, group, complete, image, &mut buffer_splitter)?
+                    );
+                    Ok(())
+                };
+                for c in modular_global.channel_range() {
+                    if self
+                        .changed_since_last_flush
+                        .take(&(g, RenderUnit::Modular(c)))
+                        .is_none()
+                    {
+                        modular_global.flush_output(g, c, &mut pass_to_pipeline)?;
+                    }
+                }
             }
         }

-        for (group, passes) in groups {
-            // TODO(veluca): render all the available passes at once.
-            for (pass, br) in passes {
-                self.decode_hf_group(group, pass, br, &mut buffer_splitter)?;
-            }
-        }
+        let regions = buffer_splitter.into_changed_regions();

         self.reference_frame_data = reference_frame_data;
         self.lf_frame_data = lf_frame_data;

+        if self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1 {
+            if do_flush && let Some(buffers) = api_buffers {
+                self.maybe_preview_lf_frame(
+                    pixel_format,
+                    buffers,
+                    Some(&regions[..]),
+                    output_profile,
+                )?;
+            } else if self.incomplete_groups == 0 {
+                // If we are not requesting another flush at the end of the LF frame, we
+                // probably have a partial render. Ensure we re-render the LF frame when
+                // decoding the actual frame.
+                self.decoder_state.lf_frame_was_rendered = false;
+            }
+        }
+
         Ok(())
     }

@@ -236,8 +336,12 @@ impl Frame {
     pub(crate) fn build_render_pipeline<T: RenderPipeline>(
         decoder_state: &DecoderState,
         frame_header: &FrameHeader,
-        lf_global: &LfGlobalState,
-        epf_sigma: &Option<SigmaSource>,
+        patches: Arc<AtomicRefCell<PatchesDictionary>>,
+        splines: Arc<AtomicRefCell<Splines>>,
+        noise: Arc<AtomicRefCell<Noise>>,
+        lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+        color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+        epf_sigma: Arc<AtomicRefCell<SigmaSource>>,
         pixel_format: &JxlPixelFormat,
         cms: Option<&dyn JxlCms>,
         input_profile: &JxlColorProfile,
@@ -251,31 +355,29 @@ impl Frame {
             frame_header.size_upsampled(),
             frame_header.upsampling.ilog2() as usize,
             frame_header.log_group_dim(),
-            frame_header.passes.num_passes as usize,
         );

         if frame_header.encoding == Encoding::Modular {
             if decoder_state.file_header.image_metadata.xyb_encoded {
-                pipeline = pipeline
-                    .add_inout_stage(ConvertModularXYBToF32Stage::new(0, &lf_global.lf_quant))?
+                pipeline = pipeline.add_inout_stage(ConvertModularXYBToF32Stage::new(0, lf_quant))
             } else {
                 for i in 0..3 {
                     pipeline = pipeline
-                        .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth))?;
+                        .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth));
                 }
             }
         }
         for i in 3..num_channels {
             let ec_bit_depth = metadata.extra_channel_info[i - 3].bit_depth();
-            pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth))?;
+            pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth));
         }

         for c in 0..3 {
             if frame_header.hshift(c) != 0 {
-                pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c))?;
+                pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c));
             }
             if frame_header.vshift(c) != 0 {
-                pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c))?;
+                pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c));
             }
         }

@@ -286,17 +388,17 @@ impl Frame {
                     0,
                     filters.gab_x_weight1,
                     filters.gab_x_weight2,
-                ))?
+                ))
                 .add_inout_stage(GaborishStage::new(
                     1,
                     filters.gab_y_weight1,
                     filters.gab_y_weight2,
-                ))?
+                ))
                 .add_inout_stage(GaborishStage::new(
                     2,
                     filters.gab_b_weight1,
                     filters.gab_b_weight2,
-                ))?;
+                ));
         }

         let rf = &frame_header.restoration_filter;
@@ -305,24 +407,24 @@ impl Frame {
                 rf.epf_pass0_sigma_scale,
                 rf.epf_border_sad_mul,
                 rf.epf_channel_scale,
-                epf_sigma.clone().unwrap(),
-            ))?
+                epf_sigma.clone(),
+            ))
         }
         if rf.epf_iters >= 1 {
             pipeline = pipeline.add_inout_stage(Epf1Stage::new(
                 1.0,
                 rf.epf_border_sad_mul,
                 rf.epf_channel_scale,
-                epf_sigma.clone().unwrap(),
-            ))?
+                epf_sigma.clone(),
+            ))
         }
         if rf.epf_iters >= 2 {
             pipeline = pipeline.add_inout_stage(Epf2Stage::new(
                 rf.epf_pass2_sigma_scale,
                 rf.epf_border_sad_mul,
                 rf.epf_channel_scale,
-                epf_sigma.clone().unwrap(),
-            ))?
+                epf_sigma.clone(),
+            ))
         }

         let late_ec_upsample = frame_header.upsampling > 1
@@ -340,26 +442,26 @@ impl Frame {
                         4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, 3 + ec)),
                         8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, 3 + ec)),
                         _ => unreachable!(),
-                    }?;
+                    };
                 }
             }
         }

         if frame_header.has_patches() {
-            pipeline = pipeline.add_inplace_stage(PatchesStage {
-                patches: lf_global.patches.clone().unwrap(),
-                extra_channels: metadata.extra_channel_info.clone(),
-                decoder_state: decoder_state.reference_frames.clone(),
-            })?
+            pipeline = pipeline.add_inplace_stage(PatchesStage::new(
+                patches,
+                metadata.extra_channel_info.clone(),
+                decoder_state.reference_frames.clone(),
+            ))
         }

         if frame_header.has_splines() {
             pipeline = pipeline.add_inplace_stage(SplinesStage::new(
-                lf_global.splines.clone().unwrap(),
+                splines,
                 frame_header.size(),
-                &lf_global.color_correlation_params.unwrap_or_default(),
+                color_correlation_params.clone(),
                 decoder_state.high_precision,
-            )?)?
+            ))
         }

         if frame_header.upsampling > 1 {
@@ -375,20 +477,20 @@ impl Frame {
                     4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, c)),
                     8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, c)),
                     _ => unreachable!(),
-                }?;
+                };
             }
         }

         if frame_header.has_noise() {
             pipeline = pipeline
-                .add_inout_stage(ConvolveNoiseStage::new(num_channels))?
-                .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1))?
-                .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2))?
+                .add_inout_stage(ConvolveNoiseStage::new(num_channels))
+                .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1))
+                .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2))
                 .add_inplace_stage(AddNoiseStage::new(
-                    *lf_global.noise.as_ref().unwrap(),
-                    lf_global.color_correlation_params.unwrap_or_default(),
+                    noise,
+                    color_correlation_params,
                     num_channels,
-                ))?;
+                ));
         }

         // Calculate the actual number of API-provided buffers based on pixel_format.
@@ -414,7 +516,7 @@ impl Frame {
                     JxlColorType::Grayscale,
                     JxlDataFormat::f32(),
                     false,
-                )?;
+                );
             }
         }
         if frame_header.can_be_referenced && frame_header.save_before_ct {
@@ -426,7 +528,7 @@ impl Frame {
                     JxlColorType::Grayscale,
                     JxlDataFormat::f32(),
                     false,
-                )?;
+                );
             }
         }

@@ -461,9 +563,9 @@ impl Frame {
         let xyb_encoded = decoder_state.file_header.image_metadata.xyb_encoded;

         if frame_header.do_ycbcr {
-            pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0))?;
+            pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0));
         } else if xyb_encoded {
-            pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone()))?;
+            pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone()));
         }

         // Insert CMS stage if profiles differ.
@@ -547,7 +649,7 @@ impl Frame {
                     out_channels,
                     cms_black_channel,
                     max_pixels,
-                ))?;
+                ));
                 cms_used = true;
             }
         }
@@ -556,7 +658,7 @@ impl Frame {
         // - Only if output is non-linear AND
         // - CMS was not used (CMS already handles the full conversion including TF)
         if xyb_encoded && !output_tf.is_linear() && !cms_used {
-            pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone()))?;
+            pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone()));
         }

         if frame_header.needs_blending() {
@@ -564,14 +666,14 @@ impl Frame {
                 frame_header,
                 &decoder_state.file_header,
                 decoder_state.reference_frames.clone(),
-            )?)?;
+            )?);
             // TODO(veluca): we might not need to add an extend stage if the image size is
             // compatible with the frame size.
             pipeline = pipeline.add_extend_stage(ExtendToImageDimensionsStage::new(
                 frame_header,
                 &decoder_state.file_header,
                 decoder_state.reference_frames.clone(),
-            )?)?;
+            )?);
         }

         if frame_header.can_be_referenced && !frame_header.save_before_ct {
@@ -583,7 +685,7 @@ impl Frame {
                     JxlColorType::Grayscale,
                     JxlDataFormat::f32(),
                     false,
-                )?;
+                );
             }
         }

@@ -597,7 +699,7 @@ impl Frame {
             {
                 if info.ec_type == ExtraChannel::SpotColor {
                     pipeline = pipeline
-                        .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap()))?;
+                        .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap()));
                 }
             }
         }
@@ -659,10 +761,10 @@ impl Frame {
                         0,
                         num_color_channels,
                         alpha_channel,
-                    ))?;
+                    ));
                 }
                 // Add conversion stages for non-float output formats
-                pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df)?;
+                pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df);
                 pipeline = pipeline.add_save_stage(
                     color_source_channels,
                     metadata.orientation,
@@ -670,20 +772,26 @@ impl Frame {
                     pixel_format.color_type,
                     *df,
                     fill_opaque_alpha,
-                )?;
+                );
             }
+            let mut save_idx = if pixel_format.color_data_format.is_some() {
+                1
+            } else {
+                0
+            };
             for i in 0..frame_header.num_extra_channels as usize {
                 if let Some(df) = &pixel_format.extra_channel_format[i] {
                     // Add conversion stages for non-float output formats
-                    pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df)?;
+                    pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df);
                     pipeline = pipeline.add_save_stage(
                         &[3 + i],
                         metadata.orientation,
-                        1 + i,
+                        save_idx,
                         JxlColorType::Grayscale,
                         *df,
                         false,
-                    )?;
+                    );
+                    save_idx += 1;
                 }
             }
         }
@@ -697,20 +805,17 @@ impl Frame {
         input_profile: &JxlColorProfile,
         output_profile: &JxlColorProfile,
     ) -> Result<()> {
-        let lf_global = self.lf_global.as_mut().unwrap();
-        let epf_sigma = if self.header.restoration_filter.epf_iters > 0 {
-            Some(SigmaSource::new(&self.header, lf_global, &self.hf_meta)?)
-        } else {
-            None
-        };
-
         #[cfg(test)]
         let render_pipeline = if self.use_simple_pipeline {
             Self::build_render_pipeline::<SimpleRenderPipeline>(
                 &self.decoder_state,
                 &self.header,
-                lf_global,
-                &epf_sigma,
+                self.patches.clone(),
+                self.splines.clone(),
+                self.noise.clone(),
+                self.lf_quant.clone(),
+                self.color_correlation_params.clone(),
+                self.epf_sigma.clone(),
                 pixel_format,
                 cms,
                 input_profile,
@@ -720,8 +825,12 @@ impl Frame {
             Self::build_render_pipeline::<LowMemoryRenderPipeline>(
                 &self.decoder_state,
                 &self.header,
-                lf_global,
-                &epf_sigma,
+                self.patches.clone(),
+                self.splines.clone(),
+                self.noise.clone(),
+                self.lf_quant.clone(),
+                self.color_correlation_params.clone(),
+                self.epf_sigma.clone(),
                 pixel_format,
                 cms,
                 input_profile,
@@ -732,15 +841,19 @@ impl Frame {
         let render_pipeline = Self::build_render_pipeline::<LowMemoryRenderPipeline>(
             &self.decoder_state,
             &self.header,
-            lf_global,
-            &epf_sigma,
+            self.patches.clone(),
+            self.splines.clone(),
+            self.noise.clone(),
+            self.lf_quant.clone(),
+            self.color_correlation_params.clone(),
+            self.epf_sigma.clone(),
             pixel_format,
             cms,
             input_profile,
             output_profile,
         )?;
         self.render_pipeline = Some(render_pipeline);
-        self.lf_global_was_rendered = false;
+        self.was_flushed_once = false;
         Ok(())
     }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
index d4299928e6e7d..5ec6ba1dea95a 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
@@ -192,9 +192,9 @@ pub struct ColorEncoding {

 impl ColorEncoding {
     pub fn check(&self, _: &Empty) -> Result<(), Error> {
-        if !self.want_icc
-            && (self.color_space == ColorSpace::Unknown
-                || self.tf.transfer_function == TransferFunction::Unknown)
+        if self.color_space == ColorSpace::Unknown
+            || self.tf.transfer_function == TransferFunction::Unknown
+            || self.color_space == ColorSpace::XYB
         {
             Err(Error::InvalidColorEncoding)
         } else {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
index b7fc359ec7597..ffa52591b07e4 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
@@ -851,4 +851,14 @@ mod test_frame_header {
             },
         )
     }
+
+    #[test]
+    fn test_frame_name() {
+        let (_, frame_header, _) =
+            read_headers_and_toc(include_bytes!("../../resources/test/named_frame_test.jxl"))
+                .unwrap();
+        assert_eq!(frame_header.frame_type, FrameType::RegularFrame);
+        assert_eq!(frame_header.name, "TestFrameName");
+        assert_eq!(frame_header.name.len(), 13);
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
index 5ac270930c26e..286f8a4423b28 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
@@ -97,7 +97,7 @@ pub enum TransformId {
     Invalid = 3,
 }

-#[derive(UnconditionalCoder, Debug, PartialEq)]
+#[derive(UnconditionalCoder, Debug, PartialEq, Clone)]
 #[validate]
 pub struct Transform {
     #[coder(Bits(2))]
@@ -157,7 +157,7 @@ impl Transform {
     }
 }

-#[derive(UnconditionalCoder, Debug, PartialEq)]
+#[derive(UnconditionalCoder, Debug, PartialEq, Clone)]
 pub struct GroupHeader {
     pub use_global_tree: bool,
     pub wp_header: WeightedHeader,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
index 8144607a21d19..37617a15fa9e8 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
@@ -20,7 +20,8 @@ mod stream;
 mod tag;

 use header::read_header;
-use stream::{IccStream, read_varint_from_reader};
+use stream::IccStream;
+pub(crate) use stream::read_varint_from_reader;
 use tag::{read_single_command, read_tag_list};

 const ICC_CONTEXTS: usize = 41;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
index 4ca91091311d4..0de64e9422655 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
@@ -25,7 +25,7 @@ fn read_varint(mut read_one: impl FnMut() -> Result<u8>) -> Result<u64> {
     Ok(value)
 }

-pub(super) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> {
+pub(crate) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> {
     read_varint(|| stream.read_u8().map_err(|_| Error::IccEndOfStream))
 }

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
index 3fbbb20562fc6..9c689e788a29a 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
@@ -20,6 +20,10 @@ pub struct OwnedRawImage {
 }

 impl OwnedRawImage {
+    pub fn new(byte_size: (usize, usize)) -> Result<Self> {
+        Self::new_zeroed_with_padding(byte_size, (0, 0), (0, 0))
+    }
+
     pub fn new_zeroed_with_padding(
         byte_size: (usize, usize),
         offset: (usize, usize),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
index d5c1b06c38119..60fab1c83c5d9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
@@ -7,22 +7,28 @@ use crate::{api::JxlOutputBuffer, headers::Orientation, image::Rect, util::Shift

 // Information for splitting the output buffers.
 #[derive(Debug)]
-pub(super) struct SaveStageBufferInfo {
-    pub(super) downsample: (u8, u8),
-    pub(super) orientation: Orientation,
-    pub(super) byte_size: usize,
-    pub(super) after_extend: bool,
+pub struct SaveStageBufferInfo {
+    pub downsample: (u8, u8),
+    pub orientation: Orientation,
+    pub byte_size: usize,
+    pub after_extend: bool,
 }

 /// Data structure responsible for handing out access to portions of the output buffers.
-pub struct BufferSplitter<'a, 'b>(&'a mut [Option<JxlOutputBuffer<'b>>]);
+pub struct BufferSplitter<'a, 'b> {
+    buffers: &'a mut [Option<JxlOutputBuffer<'b>>],
+    requested_rects: Vec<Rect>,
+}

 impl<'a, 'b> BufferSplitter<'a, 'b> {
     pub fn new(bufs: &'a mut [Option<JxlOutputBuffer<'b>>]) -> Self {
-        Self(bufs)
+        Self {
+            buffers: bufs,
+            requested_rects: vec![],
+        }
     }

-    pub(super) fn get_local_buffers(
+    pub(crate) fn get_local_buffers(
         &mut self,
         save_buffer_info: &[Option<SaveStageBufferInfo>],
         rect: Rect,
@@ -31,8 +37,9 @@ impl<'a, 'b> BufferSplitter<'a, 'b> {
         full_image_size: (usize, usize),
         frame_origin: (isize, isize),
     ) -> Vec<Option<JxlOutputBuffer<'_>>> {
+        self.requested_rects.push(rect);
         let mut local_buffers = vec![];
-        let buffers = &mut *self.0;
+        let buffers = &mut *self.buffers;
         local_buffers.reserve(buffers.len());
         for _ in 0..buffers.len() {
             local_buffers.push(None::<JxlOutputBuffer>);
@@ -97,7 +104,11 @@ impl<'a, 'b> BufferSplitter<'a, 'b> {
         local_buffers
     }

+    pub fn into_changed_regions(self) -> Vec<Rect> {
+        self.requested_rects
+    }
+
     pub fn get_full_buffers(&mut self) -> &mut [Option<JxlOutputBuffer<'b>>] {
-        &mut *self.0
+        &mut *self.buffers
     }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
index 0e4957a62401b..9523e1d1607ff 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
@@ -6,8 +6,10 @@
 use crate::api::{JxlColorType, JxlDataFormat};
 use crate::error::{Error, Result};
 use crate::headers::Orientation;
+use crate::render::StageSpecialCase;
 use crate::render::internal::ChannelInfo;
 use crate::render::save::SaveStage;
+use crate::render::stages::ConvertI32ToU8Stage;
 use crate::util::{ShiftRightCeil, tracing_wrappers::*};

 use super::internal::{RenderPipelineShared, Stage};
@@ -25,7 +27,6 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
         size: (usize, usize),
         downsampling_shift: usize,
         mut log_group_size: usize,
-        num_passes: usize,
         chunk_size: usize,
     ) -> Self {
         info!("creating render pipeline");
@@ -47,70 +48,20 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
                 log_group_size,
                 group_count: (size.0.shrc(log_group_size), size.1.shrc(log_group_size)),
                 stages: vec![],
-                group_chan_ready_passes: vec![
-                    vec![0; num_channels];
-                    size.0.shrc(log_group_size)
-                        * size.1.shrc(log_group_size)
+                group_chan_complete: vec![
+                    vec![false; num_channels];
+                    size.0.shrc(log_group_size) * size.1.shrc(log_group_size)
                 ],
-                num_passes,
                 chunk_size,
                 extend_stage_index: None,
+                channel_is_used: vec![false; num_channels],
             },
         }
     }

-    pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Result<Self> {
-        let input_type = stage.input_type();
-        let output_type = stage.output_type();
-        let shift = stage.shift();
-        let border = stage.border();
-        let is_extend = matches!(stage, Stage::Extend(_));
-        let current_info = self.shared.channel_info.last().unwrap().clone();
-        debug!(
-            last_stage_channel_info = ?current_info,
-            extend_stage_index= ?self.shared.extend_stage_index,
-            "adding stage '{stage}'",
-        );
-        let mut after_info = vec![];
-        for (c, info) in current_info.iter().enumerate() {
-            if !stage.uses_channel(c) {
-                after_info.push(ChannelInfo {
-                    ty: info.ty,
-                    downsample: (0, 0),
-                });
-            } else {
-                if let Some(ty) = info.ty
-                    && ty != input_type
-                {
-                    return Err(Error::PipelineChannelTypeMismatch(
-                        stage.to_string(),
-                        c,
-                        input_type,
-                        ty,
-                    ));
-                }
-                after_info.push(ChannelInfo {
-                    ty: Some(output_type.unwrap_or(input_type)),
-                    downsample: shift,
-                });
-            }
-        }
-        if self.shared.extend_stage_index.is_some()
-            && (shift != (0, 0) || border != (0, 0) || is_extend)
-        {
-            return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string()));
-        }
-        if is_extend {
-            self.shared.extend_stage_index = Some(self.shared.stages.len());
-        }
-        debug!(
-            new_channel_info = ?after_info,
-            extend_stage_index= ?self.shared.extend_stage_index,
-            "added stage '{stage}'",
-        );
-        self.shared.channel_info.push(after_info);
+    pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Self {
         self.shared.stages.push(stage);
-        Ok(self)
+        self
     }

     pub fn new(
@@ -118,19 +69,16 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
         size: (usize, usize),
         downsampling_shift: usize,
         log_group_size: usize,
-        num_passes: usize,
     ) -> Self {
         Self::new_with_chunk_size(
             num_channels,
             size,
             downsampling_shift,
             log_group_size,
-            num_passes,
             1 << (log_group_size + downsampling_shift),
         )
     }

-    #[instrument(skip_all, err)]
     pub fn add_save_stage(
         self,
         channels: &[usize],
@@ -139,7 +87,7 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
         color_type: JxlColorType,
         data_format: JxlDataFormat,
         fill_opaque_alpha: bool,
-    ) -> Result<Self> {
+    ) -> Self {
         let stage = SaveStage::new(
             channels,
             orientation,
@@ -151,25 +99,131 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
         self.add_stage_internal(Stage::Save(stage))
     }

-    #[instrument(skip_all, err)]
-    pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Result<Self> {
+    pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Self {
         self.add_stage_internal(Stage::Extend(extend))
     }

-    #[instrument(skip_all, err)]
-    pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Result<Self> {
+    pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Self {
         self.add_stage_internal(Stage::InPlace(Pipeline::box_inplace_stage(stage)))
     }

-    #[instrument(skip_all, err)]
-    pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Result<Self> {
+    pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Self {
         self.add_stage_internal(Stage::InOut(Pipeline::box_inout_stage(stage)))
     }

     #[instrument(skip_all, err)]
     pub fn build(mut self) -> Result<Box<Pipeline>> {
+        let mut stage_is_used = vec![false; self.shared.stages.len()];
+        let num_channels = self.shared.num_channels();
+        let mut channel_next_use = vec![None; num_channels];
+        // Prune unused stages.
+        for i in (0..self.shared.stages.len()).rev() {
+            let stage = &self.shared.stages[i];
+            if matches!(stage, Stage::Save(_)) {
+                for (c, next_use) in channel_next_use.iter_mut().enumerate() {
+                    if stage.uses_channel(c) {
+                        self.shared.channel_is_used[c] = true;
+                        *next_use = Some(i);
+                    }
+                }
+            }
+            for c in 0..num_channels {
+                if stage.uses_channel(c) {
+                    stage_is_used[i] |= self.shared.channel_is_used[c];
+                }
+            }
+            if stage_is_used[i] {
+                match self.shared.stages[i].is_special_case() {
+                    None => (),
+                    Some(StageSpecialCase::F32ToU8 { .. }) => (),
+                    Some(StageSpecialCase::ModularToF32 { channel, bit_depth }) => {
+                        let n = channel_next_use[channel].unwrap();
+                        if let Some(StageSpecialCase::F32ToU8 {
+                            channel: c,
+                            bit_depth: b,
+                        }) = self.shared.stages[n].is_special_case()
+                        {
+                            assert_eq!(c, channel);
+                            if b % bit_depth == 0 {
+                                let mult = ((1 << b) - 1) / ((1 << bit_depth) - 1);
+                                // Remove the next stage, and replace the current stage with I32 -> I8
+                                // conversion.
+                                stage_is_used[n] = false;
+                                self.shared.stages[i] = Stage::InOut(Pipeline::box_inout_stage(
+                                    ConvertI32ToU8Stage::new(c, mult, (1 << b) - 1),
+                                ));
+                            }
+                        }
+                    }
+                }
+                for (c, next_use) in channel_next_use.iter_mut().enumerate() {
+                    if self.shared.stages[i].uses_channel(c) {
+                        self.shared.channel_is_used[c] = true;
+                        *next_use = Some(i);
+                    }
+                }
+            }
+        }
+        self.shared.stages = self
+            .shared
+            .stages
+            .into_iter()
+            .zip(stage_is_used)
+            .filter_map(|(s, used)| used.then_some(s))
+            .collect();
+        for (i, stage) in self.shared.stages.iter().enumerate() {
+            let input_type = stage.input_type();
+            let output_type = stage.output_type();
+            let shift = stage.shift();
+            let border = stage.border();
+            let is_extend = matches!(stage, Stage::Extend(_));
+            let current_info = self.shared.channel_info.last().unwrap().clone();
+            debug!(
+                last_stage_channel_info = ?current_info,
+                extend_stage_index= ?self.shared.extend_stage_index,
+                "adding stage '{stage}'",
+            );
+            let mut after_info = vec![];
+            for (c, info) in current_info.iter().enumerate() {
+                if !stage.uses_channel(c) {
+                    after_info.push(ChannelInfo {
+                        ty: info.ty,
+                        downsample: (0, 0),
+                    });
+                } else {
+                    if let Some(ty) = info.ty
+                        && ty != input_type
+                    {
+                        return Err(Error::PipelineChannelTypeMismatch(
+                            stage.to_string(),
+                            c,
+                            input_type,
+                            ty,
+                        ));
+                    }
+                    after_info.push(ChannelInfo {
+                        ty: Some(output_type.unwrap_or(input_type)),
+                        downsample: shift,
+                    });
+                }
+            }
+            if self.shared.extend_stage_index.is_some()
+                && (shift != (0, 0) || border != (0, 0) || is_extend)
+            {
+                return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string()));
+            }
+            if is_extend {
+                self.shared.extend_stage_index = Some(i);
+            }
+            debug!(
+                new_channel_info = ?after_info,
+                extend_stage_index= ?self.shared.extend_stage_index,
+                "added stage '{stage}'",
+            );
+            self.shared.channel_info.push(after_info);
+        }
+
         let channel_info = &mut self.shared.channel_info;
-        let num_channels = channel_info[0].len();
         let mut cur_downsamples = vec![(0u8, 0u8); num_channels];
         for (s, stage) in self.shared.stages.iter().enumerate().rev() {
             let [current_info, next_info, ..] = &mut channel_info[s..] else {
@@ -232,11 +286,12 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
             );
         }

-        // Ensure all channels have been used, so that we know the types of all buffers at all
-        // stages.
         for (c, chinfo) in channel_info.iter().flat_map(|x| x.iter().enumerate()) {
             if chinfo.ty.is_none() {
-                return Err(Error::PipelineChannelUnused(c));
+                assert!(!self.shared.channel_is_used[c]);
+                for g in self.shared.group_chan_complete.iter_mut() {
+                    g[c] = true;
+                }
             }
         }

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
index 89e2989d03efa..e2a98cd58a36c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
@@ -8,6 +8,7 @@ use std::fmt::Display;

 use crate::error::Result;
 use crate::image::{DataTypeTag, ImageDataType};
+use crate::render::StageSpecialCase;
 use crate::util::ShiftRightCeil;

 use super::save::SaveStage;
@@ -74,6 +75,13 @@ impl<Buffer: 'static> Stage<Buffer> {
             _ => None,
         }
     }
+    pub(super) fn is_special_case(&self) -> Option<StageSpecialCase> {
+        match self {
+            Stage::InOut(s) => s.is_special_case(),
+            Stage::InPlace(s) => s.is_special_case(),
+            _ => None,
+        }
+    }
 }

 impl<Buffer> Display for Stage<Buffer> {
@@ -98,11 +106,11 @@ pub struct RenderPipelineShared<Buffer> {
     pub input_size: (usize, usize),
     pub log_group_size: usize,
     pub group_count: (usize, usize),
-    pub group_chan_ready_passes: Vec<Vec<usize>>,
-    pub num_passes: usize,
+    pub group_chan_complete: Vec<Vec<bool>>,
     pub chunk_size: usize,
     pub stages: Vec<Stage<Buffer>>,
     pub extend_stage_index: Option<usize>,
+    pub channel_is_used: Vec<bool>,
 }

 impl<Buffer> RenderPipelineShared<Buffer> {
@@ -158,7 +166,11 @@ impl<Buffer> RenderPipelineShared<Buffer> {
     }

     pub fn num_channels(&self) -> usize {
-        self.channel_info[0].len()
+        self.channel_is_used.len()
+    }
+
+    pub fn num_used_channels(&self) -> usize {
+        self.channel_is_used.iter().filter(|x| **x).count()
     }
 }

@@ -171,6 +183,7 @@ pub trait InPlaceStage: Any + Display {
     fn init_local_state(&self, thread_index: usize) -> Result<Option<Box<dyn Any>>>;
     fn uses_channel(&self, c: usize) -> bool;
     fn ty(&self) -> DataTypeTag;
+    fn is_special_case(&self) -> Option<StageSpecialCase>;
 }

 pub trait RunInPlaceStage<Buffer: PipelineBuffer>: InPlaceStage {
@@ -192,6 +205,9 @@ impl<T: RenderPipelineInPlaceStage> InPlaceStage for T {
     fn ty(&self) -> DataTypeTag {
         T::Type::DATA_TYPE_ID
     }
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        self.is_special_case()
+    }
 }

 pub trait InOutStage: Any + Display {
@@ -201,6 +217,7 @@ pub trait InOutStage: Any + Display {
     fn uses_channel(&self, c: usize) -> bool;
     fn input_type(&self) -> DataTypeTag;
     fn output_type(&self) -> DataTypeTag;
+    fn is_special_case(&self) -> Option<StageSpecialCase>;
 }

 impl<T: RenderPipelineInOutStage> InOutStage for T {
@@ -222,6 +239,9 @@ impl<T: RenderPipelineInOutStage> InOutStage for T {
     fn output_type(&self) -> DataTypeTag {
         T::OutputT::DATA_TYPE_ID
     }
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        self.is_special_case()
+    }
 }

 pub trait RunInOutStage<Buffer: PipelineBuffer>: InOutStage {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs
new file mode 100644
index 0000000000000..abc810ef8a42c
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs
@@ -0,0 +1,372 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+use std::ops::Range;
+
+use crate::error::Result;
+use crate::image::{OwnedRawImage, Rect};
+use crate::render::LowMemoryRenderPipeline;
+use crate::render::buffer_splitter::BufferSplitter;
+use crate::render::internal::{ChannelInfo, Stage};
+use crate::util::tracing_wrappers::*;
+
+pub(super) struct InputBuffer {
+    // One buffer per channel.
+    pub(super) data: Vec<Option<OwnedRawImage>>,
+    // Storage for left/right borders. Includes corners.
+    pub(super) leftright: Vec<Option<OwnedRawImage>>,
+    // Storage for top/bottom borders. Includes corners.
+    pub(super) topbottom: Vec<Option<OwnedRawImage>>,
+    // Number of ready channels in the current pass.
+    ready_channels: usize,
+    pub(super) is_ready: bool,
+    num_completed_groups_3x3: usize,
+}
+
+impl InputBuffer {
+    pub(super) fn set_buffer(&mut self, chan: usize, buf: OwnedRawImage) {
+        assert!(self.data[chan].is_none());
+        self.data[chan] = Some(buf);
+        self.ready_channels += 1;
+    }
+
+    pub(super) fn new(num_channels: usize) -> Self {
+        let b = || (0..num_channels).map(|_| None).collect();
+        Self {
+            data: b(),
+            leftright: b(),
+            topbottom: b(),
+            ready_channels: 0,
+            is_ready: false,
+            num_completed_groups_3x3: 0,
+        }
+    }
+}
+
+// Finds a small set of rectangles that cover all the "true" values in `ready_mask`,
+// and calls `f` on each such rectangle.
+fn foreach_ready_rect(
+    ready_mask: [bool; 9],
+    mut f: impl FnMut(Range<u8>, Range<u8>) -> Result<()>,
+) -> Result<()> {
+    // x range in middle row
+    let xrange = (1 - ready_mask[3] as u8)..(2 + ready_mask[5] as u8);
+    let can_extend_top = xrange.clone().all(|x| ready_mask[x as usize]);
+    let can_extend_bottom = xrange.clone().all(|x| ready_mask[6 + x as usize]);
+    let yrange = (1 - can_extend_top as u8)..(2 + can_extend_bottom as u8);
+    f(xrange.clone(), yrange)?;
+
+    if !can_extend_top {
+        if ready_mask[1] {
+            let xrange = (1 - ready_mask[0] as u8)..(2 + ready_mask[2] as u8);
+            f(xrange, 0..1)?;
+        } else {
+            if ready_mask[0] {
+                f(0..1, 0..1)?;
+            }
+            if ready_mask[2] {
+                f(2..3, 0..1)?;
+            }
+        }
+    } else {
+        if ready_mask[0] && !xrange.contains(&0) {
+            f(0..1, 0..1)?;
+        }
+        if ready_mask[2] && !xrange.contains(&2) {
+            f(2..3, 0..1)?;
+        }
+    }
+
+    if !can_extend_bottom {
+        if ready_mask[7] {
+            let xrange = (1 - ready_mask[6] as u8)..(2 + ready_mask[8] as u8);
+            f(xrange, 2..3)?;
+        } else {
+            if ready_mask[6] {
+                f(0..1, 2..3)?;
+            }
+            if ready_mask[8] {
+                f(2..3, 2..3)?;
+            }
+        }
+    } else {
+        if ready_mask[6] && !xrange.contains(&0) {
+            f(0..1, 2..3)?;
+        }
+        if ready_mask[8] && !xrange.contains(&2) {
+            f(2..3, 2..3)?;
+        }
+    }
+
+    Ok(())
+}
+
+impl LowMemoryRenderPipeline {
+    pub(super) fn maybe_get_scratch_buffer(
+        &mut self,
+        channel: usize,
+        kind: usize,
+    ) -> Option<OwnedRawImage> {
+        self.scratch_channel_buffers[channel * 3 + kind].pop()
+    }
+
+    fn store_scratch_buffer(&mut self, channel: usize, kind: usize, image: OwnedRawImage) {
+        self.scratch_channel_buffers[channel * 3 + kind].push(image)
+    }
+
+    pub(super) fn render_with_new_group(
+        &mut self,
+        g: usize,
+        buffer_splitter: &mut BufferSplitter,
+    ) -> Result<()> {
+        let buf = &mut self.input_buffers[g];
+        assert!(buf.ready_channels <= self.shared.num_used_channels());
+        if buf.ready_channels != self.shared.num_used_channels() {
+            return Ok(());
+        }
+        buf.ready_channels = 0;
+        let (gx, gy) = self.shared.group_position(g);
+        debug!("new data ready for group {gx},{gy}");
+
+        // Prepare output buffers for the group.
+        let (origin, size) = if let Some(e) = self.shared.extend_stage_index {
+            let Stage::Extend(e) = &self.shared.stages[e] else {
+                unreachable!("extend stage is not an extend stage");
+            };
+            (e.frame_origin, e.image_size)
+        } else {
+            ((0, 0), self.shared.input_size)
+        };
+        let gsz = 1 << self.shared.log_group_size;
+        let group_rect = Rect {
+            size: (gsz, gsz),
+            origin: (gsz * gx, gsz * gy),
+        }
+        .clip(self.shared.input_size);
+
+        {
+            for c in 0..self.shared.num_channels() {
+                if !self.shared.channel_is_used[c] {
+                    continue;
+                }
+                let (bx, by) = self.border_size;
+                let (sx, sy) = self.input_buffers[g].data[c].as_ref().unwrap().byte_size();
+                let ChannelInfo {
+                    ty,
+                    downsample: (dx, dy),
+                } = self.shared.channel_info[0][c];
+                let ty = ty.unwrap();
+                let bx = bx >> dx;
+                let by = by >> dy;
+                let mut topbottom = if let Some(b) = self.input_buffers[g].topbottom[c].take() {
+                    b
+                } else if let Some(b) = self.maybe_get_scratch_buffer(c, 1) {
+                    b
+                } else {
+                    let height = 4 * by;
+                    let width = (1 << self.shared.log_group_size) * ty.size();
+                    OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))?
+                };
+                let mut leftright = if let Some(b) = self.input_buffers[g].leftright[c].take() {
+                    b
+                } else if let Some(b) = self.maybe_get_scratch_buffer(c, 2) {
+                    b
+                } else {
+                    let height = 1 << self.shared.log_group_size;
+                    let width = 4 * bx * ty.size();
+                    OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))?
+                };
+                let input = self.input_buffers[g].data[c].as_ref().unwrap();
+                if by != 0 {
+                    for y in 0..(2 * by).min(sy) {
+                        topbottom.row_mut(y)[..sx].copy_from_slice(input.row(y));
+                        topbottom.row_mut(4 * by - 1 - y)[..sx]
+                            .copy_from_slice(input.row(sy - y - 1));
+                    }
+                }
+                if bx != 0 {
+                    let cs = (bx * 2 * ty.size()).min(sx);
+                    for y in 0..sy {
+                        let row_out = leftright.row_mut(y);
+                        let row_in = input.row(y);
+                        row_out[..cs].copy_from_slice(&row_in[..cs]);
+                        row_out[4 * bx * ty.size() - cs..].copy_from_slice(&row_in[sx - cs..]);
+                    }
+                }
+                self.input_buffers[g].leftright[c] = Some(leftright);
+                self.input_buffers[g].topbottom[c] = Some(topbottom);
+            }
+            self.input_buffers[g].is_ready = true;
+        }
+
+        let gxm1 = gx.saturating_sub(1);
+        let gym1 = gy.saturating_sub(1);
+        let gxp1 = (gx + 1).min(self.shared.group_count.0 - 1);
+        let gyp1 = (gy + 1).min(self.shared.group_count.1 - 1);
+        let gw = self.shared.group_count.0;
+        // TODO(veluca): this code probably needs to be adapted for multithreading.
+        let mut ready_mask = [
+            self.input_buffers[gym1 * gw + gxm1].is_ready,
+            self.input_buffers[gym1 * gw + gx].is_ready,
+            self.input_buffers[gym1 * gw + gxp1].is_ready,
+            self.input_buffers[gy * gw + gxm1].is_ready,
+            self.input_buffers[gy * gw + gx].is_ready, // should be guaranteed to be 1.
+            self.input_buffers[gy * gw + gxp1].is_ready,
+            self.input_buffers[gyp1 * gw + gxm1].is_ready,
+            self.input_buffers[gyp1 * gw + gx].is_ready,
+            self.input_buffers[gyp1 * gw + gxp1].is_ready,
+        ];
+        // We can only render a corner if we have all the 4 adjacent groups. Thus, mask out corners if
+        // the corresponding side buffers are not ready.
+        ready_mask[0] &= ready_mask[1];
+        ready_mask[0] &= ready_mask[3];
+        ready_mask[2] &= ready_mask[1];
+        ready_mask[2] &= ready_mask[5];
+        ready_mask[6] &= ready_mask[3];
+        ready_mask[6] &= ready_mask[7];
+        ready_mask[8] &= ready_mask[5];
+        ready_mask[8] &= ready_mask[7];
+
+        foreach_ready_rect(ready_mask, |xrange, yrange| {
+            let y0 = match (gy == 0, yrange.start) {
+                (true, 0) => group_rect.origin.1,
+                (false, 0) => group_rect.origin.1 - self.border_size.1,
+                (_, 1) => group_rect.origin.1 + self.border_size.1,
+                // (_, 2)
+                _ => group_rect.end().1 - self.border_size.1,
+            };
+            let x0 = match (gx == 0, xrange.start) {
+                (true, 0) => group_rect.origin.0,
+                (false, 0) => group_rect.origin.0 - self.border_size.0,
+                (_, 1) => group_rect.origin.0 + self.border_size.0,
+                // (_, 2)
+                _ => group_rect.end().0 - self.border_size.0,
+            };
+
+            let y1 = match (gy + 1 == self.shared.group_count.1, yrange.end) {
+                (true, 3) => group_rect.end().1,
+                (false, 3) => group_rect.end().1 + self.border_size.1,
+                (_, 2) => group_rect.end().1 - self.border_size.1,
+                // (_, 1)
+                _ => group_rect.origin.1 + self.border_size.1,
+            };
+
+            let x1 = match (gx + 1 == self.shared.group_count.0, xrange.end) {
+                (true, 3) => group_rect.end().0,
+                (false, 3) => group_rect.end().0 + self.border_size.0,
+                (_, 2) => group_rect.end().0 - self.border_size.0,
+                // (_, 1)
+                _ => group_rect.origin.0 + self.border_size.0,
+            };
+
+            let image_area = Rect {
+                origin: (x0, y0),
+                size: (x1 - x0, y1 - y0),
+            };
+
+            let mut local_buffers = buffer_splitter.get_local_buffers(
+                &self.save_buffer_info,
+                image_area,
+                false,
+                self.shared.input_size,
+                size,
+                origin,
+            );
+
+            self.render_group((gx, gy), image_area, &mut local_buffers)?;
+            Ok(())
+        })?;
+
+        for c in 0..self.input_buffers[g].data.len() {
+            if let Some(b) = std::mem::take(&mut self.input_buffers[g].data[c]) {
+                self.store_scratch_buffer(c, 0, b);
+            }
+        }
+
+        // Clear border buffers that will not be used again.
+        // This is certainly the case if *all* the groups in the 3x3 group area around
+        // the current group are complete.
+        if self.shared.group_chan_complete[g].iter().all(|x| *x) {
+            for g in [
+                gym1 * gw + gxm1,
+                gym1 * gw + gx,
+                gym1 * gw + gxp1,
+                gy * gw + gxm1,
+                gy * gw + gx,
+                gy * gw + gxp1,
+                gyp1 * gw + gxm1,
+                gyp1 * gw + gx,
+                gyp1 * gw + gxp1,
+            ] {
+                self.input_buffers[g].num_completed_groups_3x3 += 1;
+                if self.input_buffers[g].num_completed_groups_3x3 != 9 {
+                    continue;
+                }
+                for c in 0..self.input_buffers[g].data.len() {
+                    if let Some(b) = std::mem::take(&mut self.input_buffers[g].topbottom[c]) {
+                        self.store_scratch_buffer(c, 1, b);
+                    }
+                    if let Some(b) = std::mem::take(&mut self.input_buffers[g].leftright[c]) {
+                        self.store_scratch_buffer(c, 2, b);
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_foreach_ready_rect() {
+        for i in 0..512 {
+            let mut ready_mask = [false; 9];
+            for j in 0..9 {
+                if (i >> j) & 1 == 1 {
+                    ready_mask[j] = true;
+                }
+            }
+            if !ready_mask[4] {
+                continue;
+            }
+
+            let mut covered = [false; 9];
+            foreach_ready_rect(ready_mask, |xr, yr| {
+                for y in yr {
+                    for x in xr.clone() {
+                        let idx = (y as usize) * 3 + (x as usize);
+                        assert!(
+                            ready_mask[idx],
+                            "Covered not ready index {} in mask {:?} (x={}, y={})",
+                            idx, ready_mask, x, y
+                        );
+                        assert!(
+                            !covered[idx],
+                            "Double coverage of index {} in mask {:?}",
+                            idx, ready_mask
+                        );
+                        covered[idx] = true;
+                    }
+                }
+                Ok(())
+            })
+            .unwrap();
+
+            for j in 0..9 {
+                if ready_mask[j] {
+                    assert!(
+                        covered[j],
+                        "Failed to cover index {} in mask {:?}",
+                        j, ready_mask
+                    );
+                }
+            }
+        }
+    }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
index cf4a65e81049e..7f8214ff35abe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
@@ -43,17 +43,3 @@ pub(super) fn get_distinct_indices<'a, T>(
         .map(|x| std::mem::take(x).expect("Not all elements were found"))
         .collect()
 }
-
-/// Mirror-reflects a value v to fit in a [0; s) range.
-pub(super) fn mirror(mut v: isize, s: usize) -> usize {
-    // TODO(veluca): consider speeding this up if needed.
-    loop {
-        if v < 0 {
-            v = -v - 1;
-        } else if v >= s as isize {
-            v = s as isize * 2 - v - 1;
-        } else {
-            return v as usize;
-        }
-    }
-}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
index b2d33cade1396..761175a12aa09 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
@@ -11,27 +11,23 @@ use row_buffers::RowBuffer;

 use crate::api::JxlOutputBuffer;
 use crate::error::Result;
-use crate::image::{Image, ImageDataType, OwnedRawImage, Rect};
+use crate::image::{DataTypeTag, Image, ImageDataType, OwnedRawImage, Rect};
 use crate::render::MAX_BORDER;
 use crate::render::buffer_splitter::{BufferSplitter, SaveStageBufferInfo};
 use crate::render::internal::Stage;
+use crate::render::low_memory_pipeline::group_scheduler::InputBuffer;
 use crate::util::{ShiftRightCeil, tracing_wrappers::*};

 use super::RenderPipeline;
 use super::internal::{RenderPipelineShared, RunInOutStage, RunInPlaceStage};

+mod group_scheduler;
 mod helpers;
 mod render_group;
-pub(super) mod row_buffers;
+pub(crate) mod row_buffers;
 mod run_stage;
 mod save;

-struct InputBuffer {
-    // One buffer per channel.
-    data: Vec<Option<OwnedRawImage>>,
-    completed_passes: usize,
-}
-
 pub struct LowMemoryRenderPipeline {
     shared: RenderPipelineShared<RowBuffer>,
     input_buffers: Vec<InputBuffer>,
@@ -49,7 +45,8 @@ pub struct LowMemoryRenderPipeline {
     // The amount of pixels that we need to read (for every channel) in non-edge groups to run all
     // stages correctly.
     input_border_pixels: Vec<(usize, usize)>,
-    has_nontrivial_border: bool,
+    // Size of the border, in image (i.e. non-downsampled) pixels.
+    border_size: (usize, usize),
     // For every stage, the downsampling level of *any* channel that the stage uses at that point.
     // Note that this must be equal across all the used channels.
     downsampling_for_stage: Vec<(usize, usize)>,
@@ -60,160 +57,21 @@ pub struct LowMemoryRenderPipeline {
     opaque_alpha_buffers: Vec<Option<RowBuffer>>,
     // Sorted indices to call get_distinct_indices.
     sorted_buffer_indices: Vec<Vec<(usize, usize, usize)>>,
-    // For each channel, buffers that could be reused to store group data for that channel.
+    // For each channel and the 3 kinds of buffers (center / topbottom / leftright), buffers that
+    // could be reused to store group data for that channel.
+    // Indexed by [3*channel] = center, [3*channel+1] = topbottom, [3*channel+2] = leftright.
     scratch_channel_buffers: Vec<Vec<OwnedRawImage>>,
 }

-impl LowMemoryRenderPipeline {
-    // TODO(veluca): most of this logic will need to change to ensure better cache utilization and
-    // lower memory usage.
-    fn render_with_new_group(
-        &mut self,
-        new_group_id: usize,
-        buffer_splitter: &mut BufferSplitter,
-    ) -> Result<()> {
-        let (gx, gy) = self.shared.group_position(new_group_id);
-
-        // We put groups that are 2 afar here, because even if they could not have become
-        // renderable, they might have become freeable.
-        let mut possible_groups = vec![];
-        for dy in -2..=2 {
-            let igy = gy as isize + dy;
-            if igy < 0 || igy >= self.shared.group_count.1 as isize {
-                continue;
-            }
-            for dx in -2..=2 {
-                let igx = gx as isize + dx;
-                if igx < 0 || igx >= self.shared.group_count.0 as isize {
-                    continue;
-                }
-                possible_groups.push(igy as usize * self.shared.group_count.0 + igx as usize);
-            }
-        }
-
-        // First, render all groups that have made progress; only check those that *could* have
-        // made progress.
-        for g in possible_groups.iter().copied() {
-            let ready_passes = self.shared.group_chan_ready_passes[g]
-                .iter()
-                .copied()
-                .min()
-                .unwrap();
-            if self.input_buffers[g].completed_passes < ready_passes {
-                let (gx, gy) = self.shared.group_position(g);
-                let mut fully_ready_passes = ready_passes;
-                // Here we assume that we never need more than one group worth of border.
-                if self.has_nontrivial_border {
-                    for dy in -1..=1 {
-                        let igy = gy as isize + dy;
-                        if igy < 0 || igy >= self.shared.group_count.1 as isize {
-                            continue;
-                        }
-                        for dx in -1..=1 {
-                            let igx = gx as isize + dx;
-                            if igx < 0 || igx >= self.shared.group_count.0 as isize {
-                                continue;
-                            }
-                            let ig = (igy as usize) * self.shared.group_count.0 + igx as usize;
-                            let ready_passes = self.shared.group_chan_ready_passes[ig]
-                                .iter()
-                                .copied()
-                                .min()
-                                .unwrap();
-                            fully_ready_passes = fully_ready_passes.min(ready_passes);
-                        }
-                    }
-                }
-                if self.input_buffers[g].completed_passes >= fully_ready_passes {
-                    continue;
-                }
-                debug!(
-                    "new ready passes for group {gx},{gy} ({} completed, \
-                    {ready_passes} ready, {fully_ready_passes} ready including neighbours)",
-                    self.input_buffers[g].completed_passes
-                );
-
-                // Prepare output buffers for the group.
-                let (origin, size) = if let Some(e) = self.shared.extend_stage_index {
-                    let Stage::Extend(e) = &self.shared.stages[e] else {
-                        unreachable!("extend stage is not an extend stage");
-                    };
-                    (e.frame_origin, e.image_size)
-                } else {
-                    ((0, 0), self.shared.input_size)
-                };
-                let gsz = (
-                    1 << self.shared.log_group_size,
-                    1 << self.shared.log_group_size,
-                );
-                let rect_to_render = Rect {
-                    size: gsz,
-                    origin: (gsz.0 * gx, gsz.1 * gy),
-                };
-                let mut local_buffers = buffer_splitter.get_local_buffers(
-                    &self.save_buffer_info,
-                    rect_to_render,
-                    false,
-                    self.shared.input_size,
-                    size,
-                    origin,
-                );
-
-                self.render_group((gx, gy), &mut local_buffers)?;
-
-                self.input_buffers[g].completed_passes = fully_ready_passes;
-            }
-        }
-
-        // Clear buffers that will not be used again.
-        for g in possible_groups.iter().copied() {
-            let (gx, gy) = self.shared.group_position(g);
-            let mut neigh_complete_passes = self.input_buffers[g].completed_passes;
-            if self.has_nontrivial_border {
-                for dy in -1..=1 {
-                    let igy = gy as isize + dy;
-                    if igy < 0 || igy >= self.shared.group_count.1 as isize {
-                        continue;
-                    }
-                    for dx in -1..=1 {
-                        let igx = gx as isize + dx;
-                        if igx < 0 || igx >= self.shared.group_count.0 as isize {
-                            continue;
-                        }
-                        let ig = (igy as usize) * self.shared.group_count.0 + igx as usize;
-                        neigh_complete_passes = self.input_buffers[ig]
-                            .completed_passes
-                            .min(neigh_complete_passes);
-                    }
-                }
-            }
-            if self.shared.num_passes <= neigh_complete_passes {
-                for (c, b) in self.input_buffers[g].data.iter_mut().enumerate() {
-                    if let Some(b) = std::mem::take(b) {
-                        self.scratch_channel_buffers[c].push(b);
-                    }
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
 impl RenderPipeline for LowMemoryRenderPipeline {
     type Buffer = RowBuffer;

     fn new_from_shared(shared: RenderPipelineShared<Self::Buffer>) -> Result<Self> {
         let mut input_buffers = vec![];
-        for _ in 0..shared.group_chan_ready_passes.len() {
-            input_buffers.push(InputBuffer {
-                data: vec![],
-                completed_passes: 0,
-            });
-            for _ in 0..shared.group_chan_ready_passes[0].len() {
-                input_buffers.last_mut().unwrap().data.push(None);
-            }
+        let nc = shared.num_channels();
+        for _ in 0..shared.group_chan_complete.len() {
+            input_buffers.push(InputBuffer::new(nc));
         }
-        let nc = shared.channel_info[0].len();
         let mut previous_inout: Vec<_> = (0..nc).map(|x| (0usize, x)).collect();
         let mut stage_input_buffer_index = vec![];
         let mut next_border_and_cur_downsample = vec![vec![]];
@@ -245,9 +103,10 @@ impl RenderPipeline for LowMemoryRenderPipeline {
         let mut initial_buffers = vec![];
         for chan in 0..nc {
             initial_buffers.push(RowBuffer::new(
-                shared.channel_info[0][chan].ty.unwrap(),
+                shared.channel_info[0][chan].ty.unwrap_or(DataTypeTag::U8),
                 next_border_and_cur_downsample[0][chan].0 as usize,
                 0,
+                0,
                 shared.chunk_size >> shared.channel_info[0][chan].downsample.0,
             )?);
         }
@@ -261,6 +120,7 @@ impl RenderPipeline for LowMemoryRenderPipeline {
                     stage.output_type().unwrap(),
                     *next_y_border as usize,
                     stage.shift().1 as usize,
+                    stage.shift().0 as usize,
                     shared.chunk_size >> *dsx,
                 )?);
             }
@@ -385,6 +245,24 @@ impl RenderPipeline for LowMemoryRenderPipeline {
             })
             .collect();

+        let mut border_size = (0, 0);
+        for c in 0..nc {
+            border_size.0 = border_size
+                .0
+                .max(border_pixels[c].0 << shared.channel_info[0][c].downsample.0);
+            border_size.1 = border_size
+                .1
+                .max(border_pixels[c].1 << shared.channel_info[0][c].downsample.1);
+        }
+        for s in 0..shared.stages.len() {
+            border_size.0 = border_size
+                .0
+                .max(border_pixels_per_stage[s].0 << downsampling_for_stage[s].0);
+            border_size.1 = border_size
+                .1
+                .max(border_pixels_per_stage[s].1 << downsampling_for_stage[s].1);
+        }
+
         Ok(Self {
             input_buffers,
             stage_input_buffer_index,
@@ -392,7 +270,7 @@ impl RenderPipeline for LowMemoryRenderPipeline {
             padding_was_rendered: false,
             save_buffer_info,
             stage_output_border_pixels: border_pixels_per_stage,
-            has_nontrivial_border: border_pixels.iter().any(|x| *x != (0, 0)),
+            border_size,
             input_border_pixels: border_pixels,
             local_states: shared
                 .stages
@@ -403,13 +281,13 @@ impl RenderPipeline for LowMemoryRenderPipeline {
             downsampling_for_stage,
             opaque_alpha_buffers,
             sorted_buffer_indices,
-            scratch_channel_buffers: (0..nc).map(|_| vec![]).collect(),
+            scratch_channel_buffers: (0..nc * 3).map(|_| vec![]).collect(),
         })
     }

     #[instrument(skip_all, err)]
     fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>> {
-        if let Some(b) = self.scratch_channel_buffers[channel].pop() {
+        if let Some(b) = self.maybe_get_scratch_buffer(channel, 0) {
             return Ok(Image::from_raw(b));
         }
         let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
@@ -420,20 +298,23 @@ impl RenderPipeline for LowMemoryRenderPipeline {
         &mut self,
         channel: usize,
         group_id: usize,
-        num_passes: usize,
+        complete: bool,
         buf: Image<T>,
         buffer_splitter: &mut BufferSplitter,
     ) -> Result<()> {
-        debug!(
-            "filling data for group {}, channel {}, using type {:?}",
-            group_id,
-            channel,
-            T::DATA_TYPE_ID,
-        );
-        self.input_buffers[group_id].data[channel] = Some(buf.into_raw());
-        self.shared.group_chan_ready_passes[group_id][channel] += num_passes;
+        if self.shared.channel_is_used[channel] {
+            debug!(
+                "filling data for group {}, channel {}, using type {:?}",
+                group_id,
+                channel,
+                T::DATA_TYPE_ID,
+            );
+            self.input_buffers[group_id].set_buffer(channel, buf.into_raw());
+            self.shared.group_chan_complete[group_id][channel] = complete;

-        self.render_with_new_group(group_id, buffer_splitter)
+            self.render_with_new_group(group_id, buffer_splitter)?;
+        }
+        Ok(())
     }

     fn check_buffer_sizes(&self, buffers: &mut [Option<JxlOutputBuffer>]) -> Result<()> {
@@ -535,6 +416,10 @@ impl RenderPipeline for LowMemoryRenderPipeline {
         Ok(())
     }

+    fn mark_group_to_rerender(&mut self, g: usize) {
+        self.input_buffers[g].is_ready = false;
+    }
+
     fn box_inout_stage<S: super::RenderPipelineInOutStage>(
         stage: S,
     ) -> Box<dyn RunInOutStage<Self::Buffer>> {
@@ -546,4 +431,8 @@ impl RenderPipeline for LowMemoryRenderPipeline {
     ) -> Box<dyn RunInPlaceStage<Self::Buffer>> {
         Box::new(stage)
     }
+
+    fn used_channel_mask(&self) -> &[bool] {
+        &self.shared.channel_is_used
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
index 6d4ded09003bd..6f9b65b67e1fe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
@@ -8,15 +8,12 @@ use std::ops::Range;
 use crate::{
     api::JxlOutputBuffer,
     error::Result,
-    image::DataTypeTag,
+    image::{DataTypeTag, Rect},
     render::{
-        internal::Stage,
-        low_memory_pipeline::{
-            helpers::{get_distinct_indices, mirror},
-            run_stage::ExtraInfo,
-        },
+        internal::{ChannelInfo, Stage},
+        low_memory_pipeline::{helpers::get_distinct_indices, run_stage::ExtraInfo},
     },
-    util::{ShiftRightCeil, SmallVec, tracing_wrappers::*},
+    util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*},
 };

 use super::{LowMemoryRenderPipeline, row_buffers::RowBuffer};
@@ -70,79 +67,134 @@ fn apply_x_padding(
 }

 impl LowMemoryRenderPipeline {
-    fn fill_initial_buffers(&mut self, c: usize, y: usize, y0: usize, (gx, gy): (usize, usize)) {
-        let ty = self.shared.channel_info[0][c]
-            .ty
-            .expect("Channel info should be populated at this point");
-        let gys = 1
-            << (self.shared.log_group_size - self.shared.channel_info[0][c].downsample.1 as usize);
+    fn fill_initial_buffers(
+        &mut self,
+        c: usize,
+        y: usize,
+        (x0, xsize): (usize, usize),
+        (gx, gy): (usize, usize),
+    ) {
+        if !self.shared.channel_is_used[c] {
+            return;
+        }
+        let ChannelInfo {
+            ty,
+            downsample: (dx, dy),
+        } = self.shared.channel_info[0][c];
+        let ty = ty.expect("Channel info should be populated at this point");
+        let group_ysize = 1 << (self.shared.log_group_size - dy as usize);
+        let group_xsize = 1 << (self.shared.log_group_size - dx as usize);
+
+        let (bx, by) = self.border_size;

-        let (input_y, igy) = if y < y0 {
-            (y + gys - y0, gy - 1)
-        } else if y >= y0 + gys {
-            (y - y0 - gys, gy + 1)
+        let group_y0 = gy * group_ysize;
+        let group_x0 = gx << (self.shared.log_group_size - dx as usize);
+        let group_x1 = group_x0 + group_xsize;
+
+        let (input_y, igy, is_topbottom) = if y < group_y0 {
+            (y + (by >> dy) * 4 - group_y0, gy - 1, true)
+        } else if y >= group_y0 + group_ysize {
+            (y - group_y0 - group_ysize, gy + 1, true)
         } else {
-            (y - y0, gy)
+            (y - group_y0, gy, false)
         };

         let output_row = self.row_buffers[0][c].get_row_mut::<u8>(y);
-        // Both are in units of bytes.
-        let x0_offset = RowBuffer::x0_byte_offset();
-        let extrax = self.input_border_pixels[c].0 * ty.size();
+
+        let copy_x0 = x0.saturating_sub(self.input_border_pixels[c].0);
+        let copy_x1 =
+            (x0 + xsize + self.input_border_pixels[c].0).min(self.shared.input_size.0.shrc(dx));
+
+        debug_assert!(copy_x1 >= group_x0);
+
+        let mut copy_byte_offset = RowBuffer::x0_byte_offset() - (x0 - copy_x0) * ty.size();

         let base_gid = igy * self.shared.group_count.0 + gx;

-        // Previous group horizontally, if any.
-        if gx > 0 && extrax != 0 {
-            let input_buf = self.input_buffers[base_gid - 1].data[c].as_ref().unwrap();
+        // Previous group horizontally, if needed.
+        if copy_x0 < group_x0 {
+            let (input_buf, xs) = if is_topbottom {
+                (
+                    self.input_buffers[base_gid - 1].topbottom[c]
+                        .as_ref()
+                        .unwrap(),
+                    group_xsize,
+                )
+            } else {
+                (
+                    self.input_buffers[base_gid - 1].leftright[c]
+                        .as_ref()
+                        .unwrap(),
+                    4 * (bx >> dx),
+                )
+            };
             let input_row = input_buf.row(input_y);
-            output_row[x0_offset - extrax..x0_offset]
-                .copy_from_slice(&input_row[input_buf.byte_size().0 - extrax..]);
+
+            let to_copy = (group_x0 - copy_x0) * ty.size();
+            let src_byte_offset = xs * ty.size() - to_copy;
+
+            output_row[copy_byte_offset..copy_byte_offset + to_copy]
+                .copy_from_slice(&input_row[src_byte_offset..src_byte_offset + to_copy]);
+            copy_byte_offset += to_copy;
         }
-        let input_buf = self.input_buffers[base_gid].data[c].as_ref().unwrap();
+        let input_buf = if is_topbottom {
+            self.input_buffers[base_gid].topbottom[c].as_ref().unwrap()
+        } else {
+            self.input_buffers[base_gid].data[c].as_ref().unwrap()
+        };
         let input_row = input_buf.row(input_y);
-        let gxs = input_buf.byte_size().0; // bytes
-        output_row[x0_offset..x0_offset + gxs].copy_from_slice(input_row);
+        let copy_start = copy_x0.saturating_sub(group_x0) * ty.size();
+        let copy_end = (copy_x1.min(group_x1) - group_x0) * ty.size();
+        let to_copy = copy_end - copy_start;
+        output_row[copy_byte_offset..copy_byte_offset + to_copy]
+            .copy_from_slice(&input_row[copy_start..copy_end]);
+        copy_byte_offset += to_copy;
         // Next group horizontally, if any.
-        if gx + 1 < self.shared.group_count.0 && extrax != 0 {
-            let input_buf = self.input_buffers[base_gid + 1].data[c].as_ref().unwrap();
+        if copy_x1 > group_x1 {
+            let input_buf = if is_topbottom {
+                self.input_buffers[base_gid + 1].topbottom[c]
+                    .as_ref()
+                    .unwrap()
+            } else {
+                self.input_buffers[base_gid + 1].leftright[c]
+                    .as_ref()
+                    .unwrap()
+            };
             let input_row = input_buf.row(input_y);
             let dx = self.shared.channel_info[0][c].downsample.0;
             let gid = gy * self.shared.group_count.0 + gx;
             let next_group_xsize = self.shared.group_size(gid + 1).0.shrc(dx);
-            let border_x = extrax.min(next_group_xsize * ty.size());
-            output_row[gxs + x0_offset..gxs + x0_offset + border_x]
-                .copy_from_slice(&input_row[..border_x]);
-            if border_x < extrax {
-                let pad_from = ((gxs + border_x) / ty.size()) as isize;
-                let pad_to = ((gxs + extrax) / ty.size()) as isize;
+            let border_x = (copy_x1 - group_x1).min(next_group_xsize);
+            output_row[copy_byte_offset..copy_byte_offset + border_x * ty.size()]
+                .copy_from_slice(&input_row[..border_x * ty.size()]);
+            if border_x + group_x1 < copy_x1 {
+                let pad_from = (xsize + border_x) as isize;
+                let pad_to = (xsize + copy_x1 - group_x1) as isize;
                 apply_x_padding(ty, output_row, pad_from..pad_to, 0..pad_from);
             }
         }
     }

-    // Renders a single group worth of data.
+    // Renders *parts* of group's worth of data.
+    // In particular, renders the sub-rectangle given in `image_area`, where (1, 1) refers to
+    // the center of the group, and 0 and 2 include data from the neighbouring group (if any).
     #[instrument(skip(self, buffers))]
     pub(super) fn render_group(
         &mut self,
         (gx, gy): (usize, usize),
+        image_area: Rect,
         buffers: &mut [Option<JxlOutputBuffer>],
     ) -> Result<()> {
-        let gid = gy * self.shared.group_count.0 + gx;
-        let (xsize, num_rows) = self.shared.group_size(gid);
-        let (x0, y0) = self.shared.group_offset(gid);
+        let start_of_row = image_area.origin.0 == 0;
+        let end_of_row = image_area.end().0 == self.shared.input_size.0;

-        let num_channels = self.shared.num_channels();
-        let mut num_extra_rows = 0;
+        let Rect {
+            origin: (x0, y0),
+            size: (xsize, num_rows),
+        } = image_area;

-        for c in 0..num_channels {
-            num_extra_rows = num_extra_rows
-                .max(self.input_border_pixels[c].1 << self.shared.channel_info[0][c].downsample.1);
-        }
-        for s in 0..self.shared.stages.len() {
-            num_extra_rows = num_extra_rows
-                .max(self.stage_output_border_pixels[s].1 << self.downsampling_for_stage[s].1);
-        }
+        let num_channels = self.shared.num_channels();
+        let num_extra_rows = self.border_size.1;

         // This follows the same implementation strategy as the C++ code in libjxl.
         // We pretend that every stage has a vertical shift of 0, i.e. it is as tall
@@ -152,7 +204,7 @@ impl LowMemoryRenderPipeline {
         // when vy % (1<<vshift) == 0.

         let vy0 = y0.saturating_sub(num_extra_rows);
-        let vy1 = y0 + num_rows + num_extra_rows;
+        let vy1 = image_area.end().1 + num_extra_rows;

         for vy in vy0..vy1 {
             let mut current_origin = (0, 0);
@@ -161,7 +213,7 @@ impl LowMemoryRenderPipeline {
             // Step 1: read input channels.
             for c in 0..num_channels {
                 // Same logic as below, but adapted to the input stage.
-                let dy = self.shared.channel_info[0][c].downsample.1;
+                let (dx, dy) = self.shared.channel_info[0][c].downsample;
                 let scaled_y_border = self.input_border_pixels[c].1 << dy;
                 let stage_vy = vy as isize - num_extra_rows as isize + scaled_y_border as isize;
                 if stage_vy % (1 << dy) != 0 {
@@ -176,7 +228,7 @@ impl LowMemoryRenderPipeline {
                     continue;
                 }
                 let y = y as usize;
-                self.fill_initial_buffers(c, y, y0 >> dy, (gx, gy));
+                self.fill_initial_buffers(c, y, (x0 >> dx, xsize >> dx), (gx, gy));
             }
             // Step 2: go through stages one by one.
             for (i, stage) in self.shared.stages.iter().enumerate() {
@@ -215,8 +267,8 @@ impl LowMemoryRenderPipeline {
                                 current_row: y,
                                 group_x0: x0 >> dx,
                                 out_extra_x,
-                                is_first_xgroup: gx == 0,
-                                is_last_xgroup: gx + 1 == self.shared.group_count.0,
+                                start_of_row,
+                                end_of_row,
                                 image_height: shifted_ysize,
                             },
                             &mut buffers,
@@ -294,8 +346,8 @@ impl LowMemoryRenderPipeline {
                                 current_row: y,
                                 group_x0: x0 >> dx,
                                 out_extra_x,
-                                is_first_xgroup: gx == 0,
-                                is_last_xgroup: gx + 1 == self.shared.group_count.0,
+                                start_of_row,
+                                end_of_row,
                                 image_height: shifted_ysize,
                             },
                             &input_data,
@@ -351,8 +403,8 @@ impl LowMemoryRenderPipeline {
                                 current_row: y,
                                 group_x0: x0,
                                 out_extra_x: 0,
-                                is_first_xgroup: false,
-                                is_last_xgroup: false,
+                                start_of_row: false,
+                                end_of_row: false,
                                 image_height: self.shared.input_size.1,
                             },
                             &mut buffers,
@@ -397,8 +449,8 @@ impl LowMemoryRenderPipeline {
                                 current_row: y,
                                 group_x0: x0,
                                 out_extra_x: 0,
-                                is_first_xgroup: false,
-                                is_last_xgroup: false,
+                                start_of_row: false,
+                                end_of_row: false,
                                 image_height: self.shared.input_size.1,
                             },
                             &input_data,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
index 43c4617c41e92..4cf01155da2a4 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
@@ -33,13 +33,15 @@ impl RowBuffer {
         data_type: DataTypeTag,
         next_y_border: usize,
         y_shift: usize,
+        x_shift: usize,
         row_len: usize,
     ) -> Result<Self> {
         let num_rows = (1 << y_shift) + 2 * next_y_border;
         let num_rows = num_rows.next_power_of_two();
         // Input offset is at *one* cacheline, and we need up to *two* cachelines on the other
         // side as the data might exceed xsize slightly.
-        let row_stride = (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + 3;
+        let row_stride =
+            (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + (3 << x_shift);
         let mut buffer = Vec::<CacheLine>::new();
         buffer.try_reserve_exact(row_stride * num_rows)?;
         buffer.resize(row_stride * num_rows, CacheLine::default());
@@ -54,13 +56,15 @@ impl RowBuffer {
     /// Creates a new row buffer with a single row filled with a repeating pattern.
     /// Used for constant values like opaque alpha.
     pub fn new_filled(data_type: DataTypeTag, row_len: usize, fill_pattern: &[u8]) -> Result<Self> {
-        let mut result = Self::new(data_type, 0, 0, row_len)?;
+        let mut result = Self::new(data_type, 0, 0, 0, row_len)?;
         let row_bytes: &mut [u8] = result.get_row_mut(0);
-        let start = Self::x0_offset::<u8>();
-        let end = start + row_len * fill_pattern.len();
-        for (i, byte) in row_bytes[start..end].iter_mut().enumerate() {
+
+        // Fill the *entire* allocated row, including the padding on both sides,
+        // so cross-group border sampling doesn't read zeros (transparent alpha).
+        for (i, byte) in row_bytes.iter_mut().enumerate() {
             *byte = fill_pattern[i % fill_pattern.len()];
         }
+
         Ok(result)
     }

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
index 704b5a5bdc079..5acced8b34dda 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
@@ -9,9 +9,9 @@ use crate::{
     render::{
         Channels, ChannelsMut, RunInPlaceStage,
         internal::{PipelineBuffer, RunInOutStage},
-        low_memory_pipeline::{helpers::mirror, render_group::ChannelVec},
+        low_memory_pipeline::render_group::ChannelVec,
     },
-    util::{ShiftRightCeil, SmallVec, tracing_wrappers::*},
+    util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*},
 };

 use super::{
@@ -26,8 +26,8 @@ pub struct ExtraInfo {
     pub(super) out_extra_x: usize,
     pub(super) current_row: usize,
     pub(super) group_x0: usize,
-    pub(super) is_first_xgroup: bool,
-    pub(super) is_last_xgroup: bool,
+    pub(super) start_of_row: bool,
+    pub(super) end_of_row: bool,
     pub(super) image_height: usize,
 }

@@ -46,16 +46,16 @@ impl<T: RenderPipelineInPlaceStage> RunInPlaceStage<RowBuffer> for T {
             group_x0,
             out_extra_x,
             image_height: _,
-            is_first_xgroup,
-            is_last_xgroup,
+            start_of_row,
+            end_of_row,
         }: ExtraInfo,
         buffers: &mut [&mut RowBuffer],
         state: Option<&mut dyn Any>,
     ) {
         let x0 = RowBuffer::x0_offset::<T::Type>();
-        let xpre = if is_first_xgroup { 0 } else { out_extra_x };
+        let xpre = if start_of_row { 0 } else { out_extra_x };
         let xstart = x0 - xpre;
-        let xend = x0 + xsize + if is_last_xgroup { 0 } else { out_extra_x };
+        let xend = x0 + xsize + if end_of_row { 0 } else { out_extra_x };
         let mut rows: ChannelVec<_> = buffers
             .iter_mut()
             .map(|x| &mut x.get_row_mut::<T::Type>(current_row)[xstart..])
@@ -80,8 +80,8 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
             group_x0,
             out_extra_x,
             image_height,
-            is_first_xgroup,
-            is_last_xgroup,
+            start_of_row,
+            end_of_row,
         }: ExtraInfo,
         input_buffers: &[&RowBuffer],
         output_buffers: &mut [RowBuffer],
@@ -89,7 +89,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
     ) {
         let ibordery = Self::BORDER.1 as isize;
         let x0 = RowBuffer::x0_offset::<T::InputT>();
-        let xpre = if is_first_xgroup {
+        let xpre = if start_of_row {
             0
         } else {
             out_extra_x.shrc(T::SHIFT.0)
@@ -97,7 +97,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
         let xstart = x0 - xpre;
         let xend = x0
             + xsize
-            + if is_last_xgroup {
+            + if end_of_row {
                 0
             } else {
                 out_extra_x.shrc(T::SHIFT.0)
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
index b586e9d8dd00d..a5024c175cc7c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
@@ -8,105 +8,200 @@
 use std::mem::MaybeUninit;
 use std::ops::Range;

-use jxl_simd::{F32SimdVec, SimdDescriptor, simd_function};
+use jxl_simd::{F32SimdVec, SimdDescriptor, U8SimdVec, U16SimdVec, simd_function};

 use crate::{
     api::{Endianness, JxlDataFormat, JxlOutputBuffer},
     render::low_memory_pipeline::row_buffers::RowBuffer,
 };

-#[inline(always)]
-fn run_interleaved_2<D: SimdDescriptor>(
-    d: D,
-    a: &[f32],
-    b: &[f32],
-    out: &mut [MaybeUninit<f32>],
-) -> usize {
-    let len = D::F32Vec::LEN;
-    let mut n = 0;
-
-    for ((chunk_a, chunk_b), chunk_out) in a
-        .chunks_exact(len)
-        .zip(b.chunks_exact(len))
-        .zip(out.chunks_exact_mut(len * 2))
-    {
-        let va = D::F32Vec::load(d, chunk_a);
-        let vb = D::F32Vec::load(d, chunk_b);
-        D::F32Vec::store_interleaved_2_uninit(va, vb, chunk_out);
-        n += len;
-    }
+macro_rules! define_run_interleaved {
+    ($fn_name:ident, $ty:ty, $vec_trait:ident, $store_fn:ident, $cnt:expr, $($arg:ident),+) => {
+        #[inline(always)]
+        fn $fn_name<D: SimdDescriptor>(
+            d: D,
+            $($arg: &[$ty]),+,
+            out: &mut [MaybeUninit<$ty>],
+        ) -> usize {
+            let len = D::$vec_trait::LEN;
+            let mut n = 0;
+            let limit = [$($arg.len()),+][0];
+
+            {
+                let out_chunks = out[..limit * $cnt].chunks_exact_mut(len * $cnt);
+                $(let mut $arg = $arg.chunks_exact(len);)+
+                for out_chunk in out_chunks {
+                    $(let $arg = D::$vec_trait::load(d, $arg.next().unwrap());)+
+                    D::$vec_trait::$store_fn($($arg),+, out_chunk);
+                    n += len;
+                }
+            }

-    n
+            let d256 = d.maybe_downgrade_256bit();
+            let len256 = <D::Descriptor256 as SimdDescriptor>::$vec_trait::LEN;
+            if len256 < len {
+                let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len256 * $cnt);
+                $(let mut $arg = $arg[n..limit].chunks_exact(len256);)+
+                for out_chunk in out_chunks {
+                    $(let $arg = <D::Descriptor256 as SimdDescriptor>::$vec_trait::load(d256, $arg.next().unwrap());)+
+                    <D::Descriptor256 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk);
+                    n += len256;
+                }
+            }
+
+            let d128 = d.maybe_downgrade_128bit();
+            let len128 = <D::Descriptor128 as SimdDescriptor>::$vec_trait::LEN;
+            if len128 < len {
+                let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len128 * $cnt);
+                $(let mut $arg = $arg[n..limit].chunks_exact(len128);)+
+                for out_chunk in out_chunks {
+                    $(let $arg = <D::Descriptor128 as SimdDescriptor>::$vec_trait::load(d128, $arg.next().unwrap());)+
+                    <D::Descriptor128 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk);
+                    n += len128;
+                }
+            }
+
+            n
+        }
+    };
 }

-#[inline(always)]
-fn run_interleaved_3<D: SimdDescriptor>(
+define_run_interleaved!(
+    run_interleaved_2_f32,
+    f32,
+    F32Vec,
+    store_interleaved_2_uninit,
+    2,
+    a,
+    b
+);
+define_run_interleaved!(
+    run_interleaved_3_f32,
+    f32,
+    F32Vec,
+    store_interleaved_3_uninit,
+    3,
+    a,
+    b,
+    c
+);
+define_run_interleaved!(
+    run_interleaved_4_f32,
+    f32,
+    F32Vec,
+    store_interleaved_4_uninit,
+    4,
+    a,
+    b,
+    c,
+    e
+);
+
+simd_function!(
+    store_interleaved_f32,
     d: D,
-    a: &[f32],
-    b: &[f32],
-    c: &[f32],
-    out: &mut [MaybeUninit<f32>],
-) -> usize {
-    let len = D::F32Vec::LEN;
-    let mut n = 0;
-
-    for (((chunk_a, chunk_b), chunk_c), chunk_out) in a
-        .chunks_exact(len)
-        .zip(b.chunks_exact(len))
-        .zip(c.chunks_exact(len))
-        .zip(out.chunks_exact_mut(len * 3))
-    {
-        let va = D::F32Vec::load(d, chunk_a);
-        let vb = D::F32Vec::load(d, chunk_b);
-        let vc = D::F32Vec::load(d, chunk_c);
-        D::F32Vec::store_interleaved_3_uninit(va, vb, vc, chunk_out);
-        n += len;
+    fn store_interleaved_impl_f32(
+        inputs: &[&[f32]],
+        output: &mut [MaybeUninit<f32>]
+    ) -> usize {
+        match inputs.len() {
+            2 => run_interleaved_2_f32(d, inputs[0], inputs[1], output),
+            3 => run_interleaved_3_f32(d, inputs[0], inputs[1], inputs[2], output),
+            4 => run_interleaved_4_f32(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+            _ => 0,
+        }
     }
+);

-    n
-}
+define_run_interleaved!(
+    run_interleaved_2_u8,
+    u8,
+    U8Vec,
+    store_interleaved_2_uninit,
+    2,
+    a,
+    b
+);
+define_run_interleaved!(
+    run_interleaved_3_u8,
+    u8,
+    U8Vec,
+    store_interleaved_3_uninit,
+    3,
+    a,
+    b,
+    c
+);
+define_run_interleaved!(
+    run_interleaved_4_u8,
+    u8,
+    U8Vec,
+    store_interleaved_4_uninit,
+    4,
+    a,
+    b,
+    c,
+    e
+);

-#[inline(always)]
-fn run_interleaved_4<D: SimdDescriptor>(
+simd_function!(
+    store_interleaved_u8,
     d: D,
-    a: &[f32],
-    b: &[f32],
-    c: &[f32],
-    e: &[f32],
-    out: &mut [MaybeUninit<f32>],
-) -> usize {
-    let len = D::F32Vec::LEN;
-    let mut n = 0;
-
-    for ((((chunk_a, chunk_b), chunk_c), chunk_e), chunk_out) in a
-        .chunks_exact(len)
-        .zip(b.chunks_exact(len))
-        .zip(c.chunks_exact(len))
-        .zip(e.chunks_exact(len))
-        .zip(out.chunks_exact_mut(len * 4))
-    {
-        let va = D::F32Vec::load(d, chunk_a);
-        let vb = D::F32Vec::load(d, chunk_b);
-        let vc = D::F32Vec::load(d, chunk_c);
-        let ve = D::F32Vec::load(d, chunk_e);
-        D::F32Vec::store_interleaved_4_uninit(va, vb, vc, ve, chunk_out);
-        n += len;
+    fn store_interleaved_impl_u8(
+        inputs: &[&[u8]],
+        output: &mut [MaybeUninit<u8>]
+    ) -> usize {
+        match inputs.len() {
+            2 => run_interleaved_2_u8(d, inputs[0], inputs[1], output),
+            3 => run_interleaved_3_u8(d, inputs[0], inputs[1], inputs[2], output),
+            4 => run_interleaved_4_u8(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+            _ => 0,
+        }
     }
+);

-    n
-}
+define_run_interleaved!(
+    run_interleaved_2_u16,
+    u16,
+    U16Vec,
+    store_interleaved_2_uninit,
+    2,
+    a,
+    b
+);
+define_run_interleaved!(
+    run_interleaved_3_u16,
+    u16,
+    U16Vec,
+    store_interleaved_3_uninit,
+    3,
+    a,
+    b,
+    c
+);
+define_run_interleaved!(
+    run_interleaved_4_u16,
+    u16,
+    U16Vec,
+    store_interleaved_4_uninit,
+    4,
+    a,
+    b,
+    c,
+    e
+);

 simd_function!(
-    store_interleaved,
+    store_interleaved_u16,
     d: D,
-    fn store_interleaved_impl(
-        inputs: &[&[f32]],
-        output: &mut [MaybeUninit<f32>]
+    fn store_interleaved_impl_u16(
+        inputs: &[&[u16]],
+        output: &mut [MaybeUninit<u16>]
     ) -> usize {
         match inputs.len() {
-            2 => run_interleaved_2(d, inputs[0], inputs[1], output),
-            3 => run_interleaved_3(d, inputs[0], inputs[1], inputs[2], output),
-            4 => run_interleaved_4(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+            2 => run_interleaved_2_u16(d, inputs[0], inputs[1], output),
+            3 => run_interleaved_3_u16(d, inputs[0], inputs[1], inputs[2], output),
+            4 => run_interleaved_4_u16(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
             _ => 0,
         }
     }
@@ -153,6 +248,43 @@ pub(super) fn store(
             }
             input_buf.len() / data_format.bytes_per_sample()
         }
+        (channels, 1, true) if (2..=4).contains(&channels) => {
+            let start_u8 = byte_start;
+            let end_u8 = byte_end;
+            let mut slices = [&[] as &[u8]; 4];
+            for (i, buf) in input_buf.iter().enumerate() {
+                slices[i] = &buf.get_row::<u8>(input_y)[start_u8..end_u8];
+            }
+            // Note that, by the conditions on the *_uninit methods on U8Vec, this function
+            // never writes uninitialized memory.
+            store_interleaved_u8(&slices[..channels], output_buf)
+        }
+        (channels, 2, true) if (2..=4).contains(&channels) => {
+            let ptr = output_buf.as_mut_ptr();
+            if ptr.align_offset(std::mem::align_of::<u16>()) == 0 {
+                let len_u16 = output_buf.len() / 2;
+                // SAFETY: we checked alignment above, and the size is correct by definition
+                // (note that it is guaranteed that MaybeUninit<T> has the same size and align
+                // of T for any T).
+                let output_u16 = unsafe {
+                    std::slice::from_raw_parts_mut(
+                        output_buf.as_mut_ptr().cast::<MaybeUninit<u16>>(),
+                        len_u16,
+                    )
+                };
+                let start_u16 = byte_start / 2;
+                let end_u16 = byte_end / 2;
+                let mut slices = [&[] as &[u16]; 4];
+                for (i, buf) in input_buf.iter().enumerate() {
+                    slices[i] = &buf.get_row::<u16>(input_y)[start_u16..end_u16];
+                }
+                // Note that, by the conditions on the *_uninit methods on U16Vec, this function
+                // never writes uninitialized memory.
+                store_interleaved_u16(&slices[..channels], output_u16)
+            } else {
+                0
+            }
+        }
         (channels, 4, true) if (2..=4).contains(&channels) => {
             let ptr = output_buf.as_mut_ptr();
             if ptr.align_offset(std::mem::align_of::<f32>()) == 0 {
@@ -177,7 +309,7 @@ pub(super) fn store(

                 // Note that, by the conditions on the *_uninit methods on F32Vec, this function
                 // never writes uninitialized memory.
-                store_interleaved(&slices[..channels], output_f32)
+                store_interleaved_f32(&slices[..channels], output_f32)
             } else {
                 0
             }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
index d5d13699ec3f7..98c2975535536 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
@@ -18,7 +18,7 @@ mod identity;
 impl SaveStage {
     // Takes as input only those channels that are *actually* saved.
     #[allow(clippy::too_many_arguments)]
-    pub(super) fn save_lowmem(
+    pub(crate) fn save_lowmem(
         &self,
         data: &[&RowBuffer],
         buffers: &mut [Option<JxlOutputBuffer>],
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
index 4270f22eb3af7..5748513ba7ad9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
@@ -17,8 +17,8 @@ pub mod buffer_splitter;
 mod builder;
 mod channels;
 mod internal;
-mod low_memory_pipeline;
-mod save;
+pub mod low_memory_pipeline;
+pub mod save;
 mod simd_utils;
 #[cfg(test)]
 mod simple_pipeline;
@@ -42,6 +42,11 @@ pub(crate) use low_memory_pipeline::LowMemoryRenderPipeline;
 #[cfg(test)]
 pub(crate) use simple_pipeline::SimpleRenderPipeline;

+pub enum StageSpecialCase {
+    F32ToU8 { channel: usize, bit_depth: u8 },
+    ModularToF32 { channel: usize, bit_depth: u8 },
+}
+
 /// Modifies channels in-place.
 pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display {
     type Type: ImageDataType;
@@ -60,6 +65,10 @@ pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display {
     }

     fn uses_channel(&self, c: usize) -> bool;
+
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        None
+    }
 }

 /// Modifies data and writes it to a new buffer, of possibly different type.
@@ -97,6 +106,10 @@ pub trait RenderPipelineInOutStage: Any + std::fmt::Display {
     }

     fn uses_channel(&self, c: usize) -> bool;
+
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        None
+    }
 }

 // TODO(veluca): find a way to reduce the generated code due to having two builders, to integrate
@@ -111,14 +124,13 @@ pub(crate) trait RenderPipeline: Sized {
     /// pass, a new buffer, or a re-used buffer from i.e. previously decoded frames.
     fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>>;

-    /// Gives back the buffer for a channel and group to the render pipeline, marking that
-    /// `num_passes` additional passes (wrt. the previous call to this method for the same channel
-    /// and group, or 0 if no previous call happend) were rendered into the input buffer.
+    /// Gives back the buffer for a channel and group to the render pipeline, marking whether
+    /// this will be the last time that this function is called for this group.
     fn set_buffer_for_group<T: ImageDataType>(
         &mut self,
         channel: usize,
         group_id: usize,
-        num_passes: usize,
+        complete: bool,
         buf: Image<T>,
         buffer_splitter: &mut BufferSplitter,
     ) -> Result<()>;
@@ -131,6 +143,9 @@ pub(crate) trait RenderPipeline: Sized {
     /// implementation to ensure rendering only happens once.
     fn render_outside_frame(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()>;

+    // Marks a group for being re-rendered later.
+    fn mark_group_to_rerender(&mut self, g: usize);
+
     fn box_inout_stage<S: RenderPipelineInOutStage>(
         stage: S,
     ) -> Box<dyn RunInOutStage<Self::Buffer>>;
@@ -138,4 +153,6 @@ pub(crate) trait RenderPipeline: Sized {
     fn box_inplace_stage<S: RenderPipelineInPlaceStage>(
         stage: S,
     ) -> Box<dyn RunInPlaceStage<Self::Buffer>>;
+
+    fn used_channel_mask(&self) -> &[bool];
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
index df09a8fd1c462..227003bdfe4f7 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
@@ -20,37 +20,28 @@ mod extend;
 mod run_stage;
 mod save;

-/// A RenderPipeline that waits for all input of a pass to be ready before doing any rendering, and
+/// A RenderPipeline that waits for all input to be ready before doing any rendering, and
 /// prioritizes simplicity over memory usage and computational efficiency.
 /// Eventually meant to be used only for verification purposes.
 pub struct SimpleRenderPipeline {
     shared: RenderPipelineShared<Image<f64>>,
     input_buffers: Vec<Image<f64>>,
-    completed_passes: usize,
 }

 impl SimpleRenderPipeline {
     #[instrument(skip_all, err)]
     fn do_render(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()> {
-        let ready_passes = self
+        let ready = self
             .shared
-            .group_chan_ready_passes
+            .group_chan_complete
             .iter()
             .flat_map(|x| x.iter())
-            .copied()
-            .min()
-            .unwrap();
-        if ready_passes <= self.completed_passes {
-            debug!(
-                "no more ready passes ({} completed, {ready_passes} ready)",
-                self.completed_passes
-            );
+            .all(|x| *x);
+        if !ready {
+            debug!("not yet ready");
             return Ok(());
         }
-        debug!(
-            "new ready passes ({} completed, {ready_passes} ready)",
-            self.completed_passes
-        );
+        debug!("ready to render");

         let mut current_buffers = clone_images(&self.input_buffers)?;

@@ -129,7 +120,6 @@ impl SimpleRenderPipeline {
             current_buffers = output_buffers;
         }

-        self.completed_passes = ready_passes;
         Ok(())
     }
 }
@@ -154,7 +144,6 @@ impl RenderPipeline for SimpleRenderPipeline {
         Ok(Self {
             shared,
             input_buffers,
-            completed_passes: 0,
         })
     }

@@ -168,7 +157,7 @@ impl RenderPipeline for SimpleRenderPipeline {
         &mut self,
         channel: usize,
         group_id: usize,
-        num_passes: usize,
+        complete: bool,
         buf: Image<T>,
         buffer_splitter: &mut BufferSplitter,
     ) -> Result<()> {
@@ -178,22 +167,24 @@ impl RenderPipeline for SimpleRenderPipeline {
             channel,
             T::DATA_TYPE_ID,
         );
-        let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
-        let goffset = self.shared.group_offset(group_id);
-        let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel];
-        let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1);
-        debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset);
-        let ty = ty.unwrap();
-        assert_eq!(ty, T::DATA_TYPE_ID);
-        let total_sz = self.input_buffers[channel].size();
-        for y in 0..sz.1.min(total_sz.1 - off.1) {
-            let row_in = buf.row(y);
-            let row_out = self.input_buffers[channel].row_mut(y + off.1);
-            for x in 0..sz.0.min(total_sz.0 - off.0) {
-                row_out[x + off.0] = row_in[x].to_f64();
+        if self.shared.channel_is_used[channel] {
+            let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
+            let goffset = self.shared.group_offset(group_id);
+            let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel];
+            let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1);
+            debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset);
+            let ty = ty.unwrap();
+            assert_eq!(ty, T::DATA_TYPE_ID);
+            let total_sz = self.input_buffers[channel].size();
+            for y in 0..sz.1.min(total_sz.1 - off.1) {
+                let row_in = buf.row(y);
+                let row_out = self.input_buffers[channel].row_mut(y + off.1);
+                for x in 0..sz.0.min(total_sz.0 - off.0) {
+                    row_out[x + off.0] = row_in[x].to_f64();
+                }
             }
+            self.shared.group_chan_complete[group_id][channel] = complete;
         }
-        self.shared.group_chan_ready_passes[group_id][channel] += num_passes;

         self.do_render(buffer_splitter)
     }
@@ -208,6 +199,8 @@ impl RenderPipeline for SimpleRenderPipeline {
         Ok(())
     }

+    fn mark_group_to_rerender(&mut self, _g: usize) {}
+
     fn box_inout_stage<S: RenderPipelineInOutStage>(
         stage: S,
     ) -> Box<dyn super::RunInOutStage<Self::Buffer>> {
@@ -219,4 +212,8 @@ impl RenderPipeline for SimpleRenderPipeline {
     ) -> Box<dyn super::RunInPlaceStage<Self::Buffer>> {
         Box::new(stage)
     }
+
+    fn used_channel_mask(&self) -> &[bool] {
+        &self.shared.channel_is_used
+    }
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
index 24a0ee6a59065..bfaea994305a2 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
@@ -13,7 +13,7 @@ use crate::{
         RenderPipelineInOutStage, RenderPipelineInPlaceStage, RunInOutStage, RunInPlaceStage,
         internal::PipelineBuffer,
     },
-    util::{SmallVec, round_up_size_to_cache_line, tracing_wrappers::*},
+    util::{SmallVec, mirror, round_up_size_to_cache_line, tracing_wrappers::*},
 };

 impl PipelineBuffer for Image<f64> {
@@ -122,31 +122,20 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<Image<f64>> for T {
             numc
         ];

-        let mirror = |mut v: i64, size: i64| {
-            while v < 0 || v >= size {
-                if v < 0 {
-                    v = -v - 1;
-                }
-                if v >= size {
-                    v = size + (size - v) - 1;
-                }
-            }
-            v as usize
-        };
         for y in 0..input_size.1 {
             for x in (0..input_size.0).step_by(chunk_size) {
-                let border_x = Self::BORDER.0 as i64;
-                let border_y = Self::BORDER.1 as i64;
+                let border_x = Self::BORDER.0 as isize;
+                let border_y = Self::BORDER.1 as isize;
                 let xsize = input_size.0.min(x + chunk_size) - x;
-                let xs = xsize as i64;
+                let xs = xsize as isize;
                 debug!("position: {x}x{y} xsize: {xsize}");
                 for c in 0..numc {
                     for iy in -border_y..=border_y {
-                        let imgy = mirror(y as i64 + iy, input_size.1 as i64);
+                        let imgy = mirror(y as isize + iy, input_size.1);
                         let in_row = input_buffers[c].row(imgy);
                         let buf_in_row = &mut buffer_in[c][(iy + border_y) as usize];
                         for ix in (-border_x..0).chain(xs..xs + border_x) {
-                            let imgx = mirror(x as i64 + ix, input_size.0 as i64);
+                            let imgx = mirror(x as isize + ix, input_size.0);
                             buf_in_row[(ix + border_x) as usize] =
                                 T::InputT::from_f64(in_row[imgx]);
                         }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
index 1ddd46f593538..b9a941106c39b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
@@ -3,64 +3,29 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::sync::Arc;
+
 use crate::{
     frame::quantizer::LfQuantFactors,
     headers::bit_depth::BitDepth,
-    render::{Channels, ChannelsMut, RenderPipelineInOutStage},
+    render::{Channels, ChannelsMut, RenderPipelineInOutStage, StageSpecialCase},
+    util::AtomicRefCell,
 };
-use jxl_simd::{F32SimdVec, I32SimdVec, simd_function};
-
-pub struct ConvertU8F32Stage {
-    channel: usize,
-}
-
-impl ConvertU8F32Stage {
-    pub fn new(channel: usize) -> ConvertU8F32Stage {
-        ConvertU8F32Stage { channel }
-    }
-}
-
-impl std::fmt::Display for ConvertU8F32Stage {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "convert U8 data to F32 in channel {}", self.channel)
-    }
-}
-
-impl RenderPipelineInOutStage for ConvertU8F32Stage {
-    type InputT = u8;
-    type OutputT = f32;
-    const SHIFT: (u8, u8) = (0, 0);
-    const BORDER: (u8, u8) = (0, 0);
-
-    fn uses_channel(&self, c: usize) -> bool {
-        c == self.channel
-    }
-
-    fn process_row_chunk(
-        &self,
-        _position: (usize, usize),
-        xsize: usize,
-        input_rows: &Channels<u8>,
-        output_rows: &mut ChannelsMut<f32>,
-        _state: Option<&mut dyn std::any::Any>,
-    ) {
-        let input = &input_rows[0];
-        for i in 0..xsize {
-            output_rows[0][0][i] = input[0][i] as f32 * (1.0 / 255.0);
-        }
-    }
-}
+use jxl_simd::{F32SimdVec, I32SimdVec, SimdMask, simd_function};

 pub struct ConvertModularXYBToF32Stage {
     first_channel: usize,
-    scale: [f32; 3],
+    lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
 }

 impl ConvertModularXYBToF32Stage {
-    pub fn new(first_channel: usize, lf_quant: &LfQuantFactors) -> ConvertModularXYBToF32Stage {
+    pub fn new(
+        first_channel: usize,
+        lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+    ) -> ConvertModularXYBToF32Stage {
         ConvertModularXYBToF32Stage {
             first_channel,
-            scale: lf_quant.quant_factors,
+            lf_quant,
         }
     }
 }
@@ -69,10 +34,9 @@ impl std::fmt::Display for ConvertModularXYBToF32Stage {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
-            "convert modular xyb data to F32 in channels {}..{} with scales {:?}",
+            "convert modular xyb data to F32 in channels {}..{}",
             self.first_channel,
             self.first_channel + 2,
-            self.scale
         )
     }
 }
@@ -95,7 +59,8 @@ impl RenderPipelineInOutStage for ConvertModularXYBToF32Stage {
         output_rows: &mut ChannelsMut<f32>,
         _state: Option<&mut dyn std::any::Any>,
     ) {
-        let [scale_x, scale_y, scale_b] = self.scale;
+        let lf_quant = self.lf_quant.borrow();
+        let [scale_x, scale_y, scale_b] = lf_quant.quant_factors;
         assert_eq!(
             input_rows.len(),
             3,
@@ -257,6 +222,27 @@ fn int_to_float_generic(input: &[i32], output: &mut [f32], bits: u32, exp_bits:
     }
 }

+// SIMD modular to 32 bit float conversion
+simd_function!(
+    modular_to_float_32bit_simd_dispatch,
+    d: D,
+    fn modular_to_float_32bit_simd(input: &[i32], output: &mut [f32], scale: f32, xsize: usize) {
+        let simd_width = D::I32Vec::LEN;
+
+        let scale = D::F32Vec::splat(d, scale);
+
+        // Process complete SIMD vectors
+        for (in_chunk, out_chunk) in input
+            .chunks_exact(simd_width)
+            .zip(output.chunks_exact_mut(simd_width))
+            .take(xsize.div_ceil(simd_width))
+        {
+            let val = D::I32Vec::load(d, in_chunk);
+            (val.as_f32() * scale).store(out_chunk);
+        }
+    }
+);
+
 impl RenderPipelineInOutStage for ConvertModularToF32Stage {
     type InputT = i32;
     type OutputT = f32;
@@ -279,11 +265,19 @@ impl RenderPipelineInOutStage for ConvertModularToF32Stage {
         if self.bit_depth.floating_point_sample() {
             int_to_float(input[0], output_rows[0][0], &self.bit_depth, xsize);
         } else {
-            // TODO(veluca): SIMDfy this code.
             let scale = 1.0 / ((1u64 << self.bit_depth.bits_per_sample()) - 1) as f32;
-            for i in 0..xsize {
-                output_rows[0][0][i] = input[0][i] as f32 * scale;
-            }
+            modular_to_float_32bit_simd_dispatch(input[0], output_rows[0][0], scale, xsize);
+        }
+    }
+
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        if self.bit_depth.floating_point_sample() {
+            None
+        } else {
+            Some(StageSpecialCase::ModularToF32 {
+                channel: self.channel,
+                bit_depth: self.bit_depth.bits_per_sample() as u8,
+            })
         }
     }
 }
@@ -358,6 +352,89 @@ impl RenderPipelineInOutStage for ConvertF32ToU8Stage {
         let max = ((1u32 << self.bit_depth) - 1) as f32;
         f32_to_u8_simd_dispatch(input, output, max, xsize);
     }
+
+    fn is_special_case(&self) -> Option<StageSpecialCase> {
+        Some(StageSpecialCase::F32ToU8 {
+            channel: self.channel,
+            bit_depth: self.bit_depth,
+        })
+    }
+}
+
+/// Stage that converts i32 values to u8 values, applying a multiplier.
+pub struct ConvertI32ToU8Stage {
+    channel: usize,
+    multiplier: i32,
+    max: i32,
+}
+
+impl ConvertI32ToU8Stage {
+    pub fn new(channel: usize, multiplier: i32, max: i32) -> ConvertI32ToU8Stage {
+        ConvertI32ToU8Stage {
+            channel,
+            multiplier,
+            max,
+        }
+    }
+}
+
+impl std::fmt::Display for ConvertI32ToU8Stage {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "convert I32 to U8 in channel {} with multiplier {}",
+            self.channel, self.multiplier
+        )
+    }
+}
+
+// SIMD I32 to U8 conversion
+simd_function!(
+    i32_to_u8_simd_dispatch,
+    d: D,
+    fn i32_to_u8_simd(input: &[i32], output: &mut [u8], scale: i32, max: i32, xsize: usize) {
+        let simd_width = D::F32Vec::LEN;
+        let scale = D::I32Vec::splat(d, scale);
+        let max = D::I32Vec::splat(d, max);
+        let zero = D::I32Vec::splat(d, 0);
+
+        // Process SIMD vectors using div_ceil (buffers are padded)
+        for (input_chunk, output_chunk) in input
+            .chunks_exact(simd_width)
+            .zip(output.chunks_exact_mut(simd_width))
+            .take(xsize.div_ceil(simd_width))
+        {
+            let val = D::I32Vec::load(d, input_chunk);
+            let scaled = val * scale;
+            let zeroclip = scaled.lt_zero().if_then_else_i32(zero, scaled);
+            let clip = scaled.gt(max).if_then_else_i32(max, zeroclip);
+            clip.store_u8(output_chunk);
+        }
+    }
+);
+
+impl RenderPipelineInOutStage for ConvertI32ToU8Stage {
+    type InputT = i32;
+    type OutputT = u8;
+    const SHIFT: (u8, u8) = (0, 0);
+    const BORDER: (u8, u8) = (0, 0);
+
+    fn uses_channel(&self, c: usize) -> bool {
+        c == self.channel
+    }
+
+    fn process_row_chunk(
+        &self,
+        _position: (usize, usize),
+        xsize: usize,
+        input_rows: &Channels<i32>,
+        output_rows: &mut ChannelsMut<u8>,
+        _state: Option<&mut dyn std::any::Any>,
+    ) {
+        let input = input_rows[0][0];
+        let output = &mut output_rows[0][0];
+        i32_to_u8_simd_dispatch(input, output, self.multiplier, self.max, xsize);
+    }
 }

 /// Stage that converts f32 values in [0, 1] range to u16 values.
@@ -481,11 +558,6 @@ mod test {
     use crate::headers::bit_depth::BitDepth;
     use test_log::test;

-    #[test]
-    fn u8_consistency() -> Result<()> {
-        crate::render::test::test_stage_consistency(|| ConvertU8F32Stage::new(0), (500, 500), 1)
-    }
-
     #[test]
     fn f32_to_u8_consistency() -> Result<()> {
         crate::render::test::test_stage_consistency(
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
index bbeff0ddab3c9..ca83a7b6623b8 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
@@ -3,6 +3,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::sync::Arc;
+
 use crate::{
     BLOCK_DIM, MIN_SIGMA,
     features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
         Channels, ChannelsMut, RenderPipelineInOutStage,
         stages::epf::common::{get_sigma, prepare_sad_mul_storage},
     },
+    util::AtomicRefCell,
 };

 use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf0Stage {
     /// (inverse) multiplier for sigma on borders
     border_sad_mul: f32,
     channel_scale: [f32; 3],
-    sigma: SigmaSource,
+    sigma: Arc<AtomicRefCell<SigmaSource>>,
 }

 impl std::fmt::Display for Epf0Stage {
@@ -39,7 +42,7 @@ impl Epf0Stage {
         sigma_scale: f32,
         border_sad_mul: f32,
         channel_scale: [f32; 3],
-        sigma: SigmaSource,
+        sigma: Arc<AtomicRefCell<SigmaSource>>,
     ) -> Self {
         Self {
             sigma,
@@ -64,7 +67,8 @@ simd_function!(
     assert_eq!(input_rows.len(), 3);
     assert_eq!(output_rows.len(), 3);

-    let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+    let sigma = stage.sigma.borrow();
+    let row_sigma = sigma.row(ypos / BLOCK_DIM);

     const { assert!(D::F32Vec::LEN <= 16) };

@@ -76,7 +80,8 @@ simd_function!(
         let sigma = get_sigma(d, x + xpos, row_sigma);
         let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);

-        if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+        let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+        if sigma_mask.all() {
             for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) {
                 D::F32Vec::load(d, &input_c[3][3 + x..]).store(&mut output_c[0][x..]);
             }
@@ -204,7 +209,10 @@ simd_function!(
             ] {
                 out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out);
             }
-            (out * inv_w).store(&mut output_c[0][x..]);
+            out *= inv_w;
+            let p33 = D::F32Vec::load(d, &input_c[3][3 + x..]);
+            let out = sigma_mask.if_then_else_f32(p33, out);
+            out.store(&mut output_c[0][x..]);
         }
     }
 });
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
index 53570b34c9b0b..5a1807050f60b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
@@ -3,6 +3,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::sync::Arc;
+
 use crate::{
     BLOCK_DIM, MIN_SIGMA,
     features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
         Channels, ChannelsMut, RenderPipelineInOutStage,
         stages::epf::common::{get_sigma, prepare_sad_mul_storage},
     },
+    util::AtomicRefCell,
 };

 use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf1Stage {
     /// (inverse) multiplier for sigma on borders
     border_sad_mul: f32,
     channel_scale: [f32; 3],
-    sigma: SigmaSource,
+    sigma: Arc<AtomicRefCell<SigmaSource>>,
 }

 impl std::fmt::Display for Epf1Stage {
@@ -39,7 +42,7 @@ impl Epf1Stage {
         sigma_scale: f32,
         border_sad_mul: f32,
         channel_scale: [f32; 3],
-        sigma: SigmaSource,
+        sigma: Arc<AtomicRefCell<SigmaSource>>,
     ) -> Self {
         Self {
             sigma,
@@ -64,7 +67,8 @@ fn epf1_process_row_chunk(
     assert_eq!(input_rows.len(), 3);
     assert_eq!(output_rows.len(), 3);

-    let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+    let sigma = stage.sigma.borrow();
+    let row_sigma = sigma.row(ypos / BLOCK_DIM);

     let sm = stage.sigma_scale * 1.65;
     let bsm = sm * stage.border_sad_mul;
@@ -74,7 +78,8 @@ fn epf1_process_row_chunk(
         let sigma = get_sigma(d, x + xpos, row_sigma);
         let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);

-        if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+        let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+        if sigma_mask.all() {
             for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) {
                 D::F32Vec::load(d, &input_c[2][2 + x..]).store(&mut output_c[0][x..]);
             }
@@ -140,7 +145,10 @@ fn epf1_process_row_chunk(
             ] {
                 out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out);
             }
-            (out * inv_w).store(&mut output_c[0][x..]);
+            out *= inv_w;
+            let p22 = D::F32Vec::load(d, &input_c[2][2 + x..]);
+            let out = sigma_mask.if_then_else_f32(p22, out);
+            out.store(&mut output_c[0][x..]);
         }
     }
 });
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
index 867b36b07c9f7..6c3cb920a62f5 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
@@ -3,6 +3,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::sync::Arc;
+
 use crate::{
     BLOCK_DIM, MIN_SIGMA,
     features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
         Channels, ChannelsMut, RenderPipelineInOutStage,
         stages::epf::common::{get_sigma, prepare_sad_mul_storage},
     },
+    util::AtomicRefCell,
 };

 use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf2Stage {
     /// (inverse) multiplier for sigma on borders
     border_sad_mul: f32,
     channel_scale: [f32; 3],
-    sigma: SigmaSource,
+    sigma: Arc<AtomicRefCell<SigmaSource>>,
 }

 impl std::fmt::Display for Epf2Stage {
@@ -39,7 +42,7 @@ impl Epf2Stage {
         sigma_scale: f32,
         border_sad_mul: f32,
         channel_scale: [f32; 3],
-        sigma: SigmaSource,
+        sigma: Arc<AtomicRefCell<SigmaSource>>,
     ) -> Self {
         Self {
             sigma,
@@ -65,7 +68,8 @@ fn epf2_process_row_chunk(
     let (input_x, input_y, input_b) = (&input_rows[0], &input_rows[1], &input_rows[2]);
     let (output_x, output_y, output_b) = output_rows.split_first_3_mut();

-    let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+    let sigma = stage.sigma.borrow();
+    let row_sigma = sigma.row(ypos / BLOCK_DIM);

     const { assert!(D::F32Vec::LEN <= 16) };

@@ -77,7 +81,8 @@ fn epf2_process_row_chunk(
         let sigma = get_sigma(d, x + xpos, row_sigma);
         let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);

-        if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+        let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+        if sigma_mask.all() {
             D::F32Vec::load(d, &input_x[1][1 + x..]).store(&mut output_x[0][x..]);
             D::F32Vec::load(d, &input_y[1][1 + x..]).store(&mut output_y[0][x..]);
             D::F32Vec::load(d, &input_b[1][1 + x..]).store(&mut output_b[0][x..]);
@@ -119,9 +124,15 @@ fn epf2_process_row_chunk(

         let inv_w = D::F32Vec::splat(d, 1.0) / w_acc;

-        (x_acc * inv_w).store(&mut output_x[0][x..]);
-        (y_acc * inv_w).store(&mut output_y[0][x..]);
-        (b_acc * inv_w).store(&mut output_b[0][x..]);
+        x_acc *= inv_w;
+        y_acc *= inv_w;
+        b_acc *= inv_w;
+        x_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_x[1][1+x..]), x_acc);
+        y_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_y[1][1+x..]), y_acc);
+        b_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_b[1][1+x..]), b_acc);
+        x_acc.store(&mut output_x[0][x..]);
+        y_acc.store(&mut output_y[0][x..]);
+        b_acc.store(&mut output_b[0][x..]);
     }
 });

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
index ae3ada234812c..90cccf901f064 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
@@ -9,12 +9,13 @@ use rand::SeedableRng;
 use test_log::test;

 use super::*;
-use crate::{error::Result, features::epf::SigmaSource, image::Image};
+use crate::{error::Result, features::epf::SigmaSource, image::Image, util::AtomicRefCell};

 #[test]
 fn epf0_consistency() -> Result<()> {
     let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
     let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+    let sigma = Arc::new(AtomicRefCell::new(sigma));
     crate::render::test::test_stage_consistency(
         || Epf0Stage::new(0.9, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
         (512, 512),
@@ -26,6 +27,7 @@ fn epf0_consistency() -> Result<()> {
 fn epf1_consistency() -> Result<()> {
     let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
     let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+    let sigma = Arc::new(AtomicRefCell::new(sigma));
     crate::render::test::test_stage_consistency(
         || Epf1Stage::new(1.0, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
         (512, 512),
@@ -37,6 +39,7 @@ fn epf1_consistency() -> Result<()> {
 fn epf2_consistency() -> Result<()> {
     let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
     let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+    let sigma = Arc::new(AtomicRefCell::new(sigma));
     crate::render::test::test_stage_consistency(
         || Epf2Stage::new(6.5, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
         (512, 512),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
index 3002776c9e2c9..a3fc4bb043027 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
@@ -17,7 +17,7 @@ mod premultiply_alpha;
 mod splines;
 mod spot;
 mod to_linear;
-mod upsample;
+pub mod upsample;
 mod xyb;
 mod ycbcr;

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
index 88dc6395f0421..fd717f345b5fe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
@@ -5,10 +5,13 @@

 #![allow(clippy::needless_range_loop)]

+use std::{any::Any, sync::Arc};
+
 use crate::{
     features::noise::Noise,
     frame::color_correlation_map::ColorCorrelationParams,
     render::{Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage},
+    util::AtomicRefCell,
 };
 use jxl_simd::{F32SimdVec, simd_function};

@@ -103,16 +106,16 @@ impl RenderPipelineInOutStage for ConvolveNoiseStage {
 }

 pub struct AddNoiseStage {
-    noise: Noise,
+    noise: Arc<AtomicRefCell<Noise>>,
     first_channel: usize,
-    color_correlation: ColorCorrelationParams,
+    color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>,
 }

 impl AddNoiseStage {
     #[allow(dead_code)]
     pub fn new(
-        noise: Noise,
-        color_correlation: ColorCorrelationParams,
+        noise: Arc<AtomicRefCell<Noise>>,
+        color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>,
         first_channel: usize,
     ) -> AddNoiseStage {
         assert!(first_channel > 2);
@@ -148,11 +151,16 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {
         _position: (usize, usize),
         xsize: usize,
         row: &mut [&mut [f32]],
-        _state: Option<&mut dyn std::any::Any>,
+        _state: Option<&mut dyn Any>,
     ) {
+        let noise = self.noise.borrow();
+        if noise.lut == [0.0; 8] {
+            return;
+        }
+        let color_correlation = self.color_correlation.borrow();
         let norm_const = 0.22;
-        let ytox = self.color_correlation.y_to_x_lf();
-        let ytob = self.color_correlation.y_to_b_lf();
+        let ytox = color_correlation.y_to_x_lf();
+        let ytob = color_correlation.y_to_b_lf();
         for x in 0..xsize {
             let row_rnd_r = row[3][x];
             let row_rnd_g = row[4][x];
@@ -161,8 +169,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {
             let vy = row[1][x];
             let in_g = vy - vx;
             let in_r = vy + vx;
-            let noise_strength_g = self.noise.strength(in_g * 0.5);
-            let noise_strength_r = self.noise.strength(in_r * 0.5);
+            let noise_strength_g = noise.strength(in_g * 0.5);
+            let noise_strength_r = noise.strength(in_r * 0.5);
             let addit_rnd_noise_red = row_rnd_r * norm_const;
             let addit_rnd_noise_green = row_rnd_g * norm_const;
             let addit_rnd_noise_correlated = row_rnd_c * norm_const;
@@ -182,6 +190,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {

 #[cfg(test)]
 mod test {
+    use std::sync::Arc;
+
     use crate::{
         error::Result,
         features::noise::Noise,
@@ -191,7 +201,7 @@ mod test {
             stages::noise::{AddNoiseStage, ConvolveNoiseStage},
             test::make_and_run_simple_pipeline,
         },
-        util::test::assert_almost_abs_eq,
+        util::{AtomicRefCell, test::assert_almost_abs_eq},
     };
     use test_log::test;

@@ -228,10 +238,10 @@ mod test {
         let input_c4: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?;
         let input_c5: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?;
         let stage = AddNoiseStage::new(
-            Noise {
+            Arc::new(AtomicRefCell::new(Noise {
                 lut: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
-            },
-            ColorCorrelationParams::default(),
+            })),
+            Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
             3,
         );
         let output = make_and_run_simple_pipeline(
@@ -325,10 +335,10 @@ mod test {
         crate::render::test::test_stage_consistency(
             || {
                 AddNoiseStage::new(
-                    Noise {
+                    Arc::new(AtomicRefCell::new(Noise {
                         lut: [0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.1, 2.3],
-                    },
-                    ColorCorrelationParams::default(),
+                    })),
+                    Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
                     3,
                 )
             },
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
index 225302a3955ee..38ac4e3d34085 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
@@ -6,15 +6,31 @@
 use std::{any::Any, sync::Arc};

 use crate::{
-    features::patches::PatchesDictionary, frame::ReferenceFrame,
-    headers::extra_channels::ExtraChannelInfo, render::RenderPipelineInPlaceStage,
-    util::NewWithCapacity as _,
+    features::patches::PatchesDictionary,
+    frame::ReferenceFrame,
+    headers::extra_channels::ExtraChannelInfo,
+    render::RenderPipelineInPlaceStage,
+    util::{AtomicRefCell, NewWithCapacity as _},
 };

 pub struct PatchesStage {
-    pub patches: Arc<PatchesDictionary>,
-    pub extra_channels: Vec<ExtraChannelInfo>,
-    pub decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+    patches: Arc<AtomicRefCell<PatchesDictionary>>,
+    extra_channels: Vec<ExtraChannelInfo>,
+    decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+}
+
+impl PatchesStage {
+    pub fn new(
+        patches: Arc<AtomicRefCell<PatchesDictionary>>,
+        extra_channels: Vec<ExtraChannelInfo>,
+        decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+    ) -> Self {
+        Self {
+            patches,
+            extra_channels,
+            decoder_state,
+        }
+    }
 }

 impl std::fmt::Display for PatchesStage {
@@ -37,8 +53,15 @@ impl RenderPipelineInPlaceStage for PatchesStage {
         row: &mut [&mut [f32]],
         state: Option<&mut dyn Any>,
     ) {
+        let patches = self.patches.borrow();
+        if patches.positions.is_empty() {
+            return;
+        }
         let state: &mut Vec<usize> = state.unwrap().downcast_mut().unwrap();
-        self.patches.add_one_row(
+        if state.capacity() < patches.positions.len() {
+            state.reserve(patches.positions.len() - state.len());
+        }
+        patches.add_one_row(
             row,
             position,
             xsize,
@@ -49,7 +72,10 @@ impl RenderPipelineInPlaceStage for PatchesStage {
     }

     fn init_local_state(&self, _thread_index: usize) -> crate::error::Result<Option<Box<dyn Any>>> {
-        let patches_for_row_result = Vec::<usize>::new_with_capacity(self.patches.positions.len())?;
+        // TODO(veluca): I think this is wrong, check that.
+        let patches = self.patches.borrow();
+        let len = patches.positions.len();
+        let patches_for_row_result = Vec::<usize>::new_with_capacity(len)?;
         Ok(Some(Box::new(patches_for_row_result) as Box<dyn Any>))
     }
 }
@@ -70,13 +96,13 @@ mod test {
         let (file_header, _, _) =
             read_headers_and_toc(include_bytes!("../../../resources/test/basic.jxl")).unwrap();
         let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
-        let patch_dict = Arc::new(PatchesDictionary::random(
+        let patch_dict = PatchesDictionary::random(
             (500, 500),
             file_header.image_metadata.extra_channel_info.len(),
             0,
             4,
             &mut rng,
-        ));
+        );
         let reference_frames = Arc::new([
             Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?),
             Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?),
@@ -85,7 +111,7 @@ mod test {
         ]);
         crate::render::test::test_stage_consistency(
             || PatchesStage {
-                patches: patch_dict.clone(),
+                patches: Arc::new(AtomicRefCell::new(patch_dict.clone())),
                 extra_channels: file_header.image_metadata.extra_channel_info.clone(),
                 decoder_state: reference_frames.clone(),
             },
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
index b2ebd88461e85..295c33c663ad2 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
@@ -3,29 +3,33 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+use std::{any::Any, sync::Arc};
+
 use crate::{
-    error::Result, features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams,
-    render::RenderPipelineInPlaceStage,
+    features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams,
+    render::RenderPipelineInPlaceStage, util::AtomicRefCell,
 };

 pub struct SplinesStage {
-    splines: Splines,
+    splines: Arc<AtomicRefCell<Splines>>,
+    image_size: (usize, usize),
+    color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+    high_precision: bool,
 }

 impl SplinesStage {
     pub fn new(
-        mut splines: Splines,
-        frame_size: (usize, usize),
-        color_correlation_params: &ColorCorrelationParams,
+        splines: Arc<AtomicRefCell<Splines>>,
+        image_size: (usize, usize),
+        color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
         high_precision: bool,
-    ) -> Result<Self> {
-        splines.initialize_draw_cache(
-            frame_size.0 as u64,
-            frame_size.1 as u64,
+    ) -> Self {
+        SplinesStage {
+            splines,
+            image_size,
             color_correlation_params,
             high_precision,
-        )?;
-        Ok(SplinesStage { splines })
+        }
     }
 }

@@ -47,17 +51,36 @@ impl RenderPipelineInPlaceStage for SplinesStage {
         position: (usize, usize),
         xsize: usize,
         row: &mut [&mut [f32]],
-        _state: Option<&mut dyn std::any::Any>,
+        _state: Option<&mut dyn Any>,
     ) {
-        self.splines.draw_segments(row, position, xsize);
+        // TODO(veluca): this is wrong!! Race condition in MT.
+        let mut splines = self.splines.borrow_mut();
+        if splines.splines.is_empty() {
+            return;
+        }
+        if !splines.is_initialized() {
+            let color_correlation_params = self.color_correlation_params.borrow();
+            splines
+                .initialize_draw_cache(
+                    self.image_size.0 as u64,
+                    self.image_size.1 as u64,
+                    &color_correlation_params,
+                    self.high_precision,
+                )
+                .unwrap();
+        }
+        splines.draw_segments(row, position, xsize);
     }
 }

 #[cfg(test)]
 mod test {
+    use std::sync::Arc;
+
     use crate::features::spline::{Point, QuantizedSpline, Splines};
     use crate::frame::color_correlation_map::ColorCorrelationParams;
     use crate::render::test::make_and_run_simple_pipeline;
+    use crate::util::AtomicRefCell;
     use crate::util::test::{self, assert_all_almost_abs_eq, read_pfm};
     use crate::{error::Result, image::Image, render::stages::splines::SplinesStage};
     use test_log::test;
@@ -104,12 +127,11 @@ mod test {
         );
         let output: Vec<Image<f32>> = make_and_run_simple_pipeline(
             SplinesStage::new(
-                splines.clone(),
+                Arc::new(AtomicRefCell::new(splines.clone())),
                 size,
-                &ColorCorrelationParams::default(),
+                Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
                 true,
-            )
-            .unwrap(),
+            ),
             &target_images,
             size,
             0,
@@ -123,6 +145,7 @@ mod test {
         Ok(())
     }

+    #[ignore = "spline rendering is not fully consistent due to sqrt precision differences"]
     #[test]
     fn splines_consistency() -> Result<()> {
         let splines = Splines::create(
@@ -160,12 +183,11 @@ mod test {
         crate::render::test::test_stage_consistency(
             || {
                 SplinesStage::new(
-                    splines.clone(),
+                    Arc::new(AtomicRefCell::new(splines.clone())),
                     (500, 500),
-                    &ColorCorrelationParams::default(),
+                    Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
                     false,
                 )
-                .unwrap()
             },
             (500, 500),
             6,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
index 4e1f80a5a3f90..77797bc4d2b23 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
@@ -103,10 +103,9 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
         image_size,
         downsampling_shift,
         LOG_GROUP_SIZE,
-        1,
         chunk_size,
     )
-    .add_stage_internal(stage)?;
+    .add_stage_internal(stage);

     let jxl_data_type = match OutputT::DATA_TYPE_ID {
         DataTypeTag::U8 | DataTypeTag::I8 => JxlDataFormat::U8 { bit_depth: 8 },
@@ -129,7 +128,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
             JxlColorType::Grayscale,
             jxl_data_type,
             false,
-        )?;
+        );
     }
     let mut pipeline = pipeline.build()?;

@@ -168,7 +167,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
             pipeline.set_buffer_for_group(
                 c,
                 g,
-                1,
+                true,
                 extract_group_rect(&input_images[c], g, log_group_size)?,
                 &mut buffer_splitter,
             )?;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
index 74cb6784bf85c..6fa51988ba393 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
@@ -58,6 +58,22 @@ pub fn fast_erff(x: f32) -> f32 {
     result.copysign(x)
 }

+#[inline(always)]
+pub fn fast_erff_simd<D: SimdDescriptor>(d: D, x: D::F32Vec) -> D::F32Vec {
+    let absx = x.abs();
+    let denom1 = absx.mul_add(
+        D::F32Vec::splat(d, 7.77394369e-02),
+        D::F32Vec::splat(d, 2.05260015e-04),
+    );
+    let denom2 = denom1.mul_add(absx, D::F32Vec::splat(d, 2.32120216e-01));
+    let denom3 = denom2.mul_add(absx, D::F32Vec::splat(d, 2.77820801e-01));
+    let denom4 = denom3.mul_add(absx, D::F32Vec::splat(d, 1.0));
+    let denom5 = denom4 * denom4;
+    let inv_denom5 = D::F32Vec::splat(d, 1.0) / denom5;
+    let result = D::F32Vec::splat(d, 1.0) - inv_denom5 * inv_denom5;
+    result.copysign(x)
+}
+
 #[inline]
 pub fn fast_pow2f(x: f32) -> f32 {
     let x_floor = x.floor();
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs
new file mode 100644
index 0000000000000..4d7e6c2fcd8e5
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// Mirror-reflects a value v to fit in a [0; s) range.
+pub fn mirror(mut v: isize, s: usize) -> usize {
+    // TODO(veluca): consider speeding this up if needed.
+    loop {
+        if v < 0 {
+            v = -v - 1;
+        } else if v >= s as isize {
+            v = s as isize * 2 - v - 1;
+        } else {
+            return v as usize;
+        }
+    }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
index ac8db74e3a558..514820bcd24ef 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
@@ -14,6 +14,7 @@ mod fast_math;
 mod float16;
 mod linalg;
 mod log2;
+mod mirror;
 pub mod ndarray;
 mod rational_poly;
 mod shift_right_ceil;
@@ -30,6 +31,7 @@ pub use fast_math::*;
 pub use float16::f16;
 pub use linalg::*;
 pub use log2::*;
+pub use mirror::*;
 pub(crate) use ndarray::*;
 pub use rational_poly::*;
 pub use shift_right_ceil::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
index a9b3ca24d4cd6..8e47c7bb1ce68 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
@@ -222,6 +222,39 @@ pub fn check_equal_images<T: ImageDataType>(a: &Image<T>, b: &Image<T>) {
     }
 }

+/// Encode a u64 value as a LEB128 varint. Useful for building test data for
+/// frame index boxes and other container structures.
+pub fn encode_varint(mut value: u64) -> Vec<u8> {
+    let mut result = Vec::new();
+    loop {
+        let mut byte = (value & 0x7f) as u8;
+        value >>= 7;
+        if value > 0 {
+            byte |= 0x80;
+        }
+        result.push(byte);
+        if value == 0 {
+            break;
+        }
+    }
+    result
+}
+
+/// Build raw jxli frame index box content bytes from tnum, tden, and
+/// delta-coded entries `(OFF_delta, T, F)`.
+pub fn build_frame_index_content(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> {
+    let mut buf = Vec::new();
+    buf.extend(encode_varint(entries.len() as u64));
+    buf.extend(tnum.to_be_bytes());
+    buf.extend(tden.to_be_bytes());
+    for &(off, t, f) in entries {
+        buf.extend(encode_varint(off));
+        buf.extend(encode_varint(t));
+        buf.extend(encode_varint(f));
+    }
+    buf
+}
+
 pub fn read_headers_and_toc(image: &[u8]) -> Result<(FileHeader, FrameHeader, Toc), JXLError> {
     let codestream = ContainerParser::collect_codestream(image).unwrap();
     let mut br = BitReader::new(&codestream);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
index c0d6499398b2c..5dbd975587f40 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
@@ -14,7 +14,7 @@ use std::{

 use crate::U32SimdVec;

-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};

 // Safety invariant: this type is only ever constructed if neon is available.
 #[derive(Clone, Copy, Debug)]
@@ -41,6 +41,10 @@ impl SimdDescriptor for NeonDescriptor {

     type U32Vec = U32VecNeon;

+    type U16Vec = U16VecNeon;
+
+    type U8Vec = U8VecNeon;
+
     type Mask = MaskNeon;
     type Bf16Table8 = Bf16Table8Neon;

@@ -122,7 +126,7 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn load(d: Self::Descriptor, mem: &[f32]) -> Self {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
-        // from the safety invariant on `d`.
+        // from the safety invariant on `d`. vld1q_f32 supports unaligned loads.
         Self(unsafe { vld1q_f32(mem.as_ptr()) }, d)
     }

@@ -130,7 +134,7 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn store(&self, mem: &mut [f32]) {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
-        // from the safety invariant on `d`.
+        // from the safety invariant on `d`. vst1q_f32 supports unaligned stores.
         unsafe { vst1q_f32(mem.as_mut_ptr(), self.0) }
     }

@@ -138,9 +142,9 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<f32>]) {
         assert!(dest.len() >= 2 * Self::LEN);
         // SAFETY: we just checked that `dest` has enough space, and neon is available
-        // from the safety invariant on the descriptor stored in `a`.
+        // from the safety invariant on the descriptor stored in `a`. vst2q_f32 supports unaligned stores.
         unsafe {
-            let dest_ptr = dest.as_mut_ptr() as *mut f32;
+            let dest_ptr = dest.as_mut_ptr().cast::<f32>();
             vst2q_f32(dest_ptr, float32x4x2_t(a.0, b.0));
         }
     }
@@ -148,9 +152,9 @@ unsafe impl F32SimdVec for F32VecNeon {
     #[inline(always)]
     fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<f32>]) {
         assert!(dest.len() >= 3 * Self::LEN);
-        // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+        // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. vst3q_f32 supports unaligned stores.
         unsafe {
-            let dest_ptr = dest.as_mut_ptr() as *mut f32;
+            let dest_ptr = dest.as_mut_ptr().cast::<f32>();
             vst3q_f32(dest_ptr, float32x4x3_t(a.0, b.0, c.0));
         }
     }
@@ -165,9 +169,9 @@ unsafe impl F32SimdVec for F32VecNeon {
     ) {
         assert!(dest.len() >= 4 * Self::LEN);
         // SAFETY: we just checked that `dest` has enough space, and neon is available
-        // from the safety invariant on the descriptor stored in `a`.
+        // from the safety invariant on the descriptor stored in `a`. vst4q_f32 supports unaligned stores.
         unsafe {
-            let dest_ptr = dest.as_mut_ptr() as *mut f32;
+            let dest_ptr = dest.as_mut_ptr().cast::<f32>();
             vst4q_f32(dest_ptr, float32x4x4_t(a.0, b.0, c.0, d.0));
         }
     }
@@ -277,7 +281,7 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self) {
         assert!(src.len() >= 2 * Self::LEN);
         // SAFETY: we just checked that `src` has enough space, and neon is available
-        // from the safety invariant on `d`.
+        // from the safety invariant on `d`. vld2q_f32 supports unaligned loads.
         let float32x4x2_t(a, b) = unsafe { vld2q_f32(src.as_ptr()) };
         (Self(a, d), Self(b, d))
     }
@@ -286,7 +290,7 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self) {
         assert!(src.len() >= 3 * Self::LEN);
         // SAFETY: we just checked that `src` has enough space, and neon is available
-        // from the safety invariant on `d`.
+        // from the safety invariant on `d`. vld3q_f32 supports unaligned loads.
         let float32x4x3_t(a, b, c) = unsafe { vld3q_f32(src.as_ptr()) };
         (Self(a, d), Self(b, d), Self(c, d))
     }
@@ -295,7 +299,7 @@ unsafe impl F32SimdVec for F32VecNeon {
     fn load_deinterleaved_4(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self, Self) {
         assert!(src.len() >= 4 * Self::LEN);
         // SAFETY: we just checked that `src` has enough space, and neon is available
-        // from the safety invariant on `d`.
+        // from the safety invariant on `d`. vld4q_f32 supports unaligned loads.
         let float32x4x4_t(a, b, c, e) = unsafe { vld4q_f32(src.as_ptr()) };
         (Self(a, d), Self(b, d), Self(c, d), Self(e, d))
     }
@@ -337,7 +341,7 @@ unsafe impl F32SimdVec for F32VecNeon {
             assert!(data.len() > 3);

             // Transposed load
-            // SAFETY: input is verified to be large enough for this pointer.
+            // SAFETY: input is verified to be large enough for this pointer. vld4q_f32 supports unaligned loads.
             let float32x4x4_t(p0, p1, p2, p3) = unsafe { vld4q_f32(data.as_ptr().cast()) };

             F32VecNeon(p0, d).store_array(&mut data[0]);
@@ -422,9 +426,9 @@ unsafe impl F32SimdVec for F32VecNeon {
             let u16s = vqmovun_s32(i32s);
             let u8s = vqmovn_u16(vcombine_u16(u16s, u16s));
             // Store lower 4 bytes
-            // SAFETY: we checked dest has enough space
+            // SAFETY: we checked dest has enough space. vst1_lane_u32 supports unaligned stores.
             unsafe {
-                vst1_lane_u32::<0>(dest.as_mut_ptr() as *mut u32, vreinterpret_u32_u8(u8s));
+                vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_u8(u8s));
             }
         }

@@ -436,7 +440,7 @@ unsafe impl F32SimdVec for F32VecNeon {
             let i32s = vcvtq_s32_f32(rounded);
             let u16s = vqmovun_s32(i32s);
             // Store 4 u16s (8 bytes)
-            // SAFETY: we checked dest has enough space
+            // SAFETY: we checked dest has enough space. vst1_u16 supports unaligned stores.
             unsafe {
                 vst1_u16(dest.as_mut_ptr(), u16s);
             }
@@ -447,7 +451,8 @@ unsafe impl F32SimdVec for F32VecNeon {
             // Use inline asm because Rust stdarch incorrectly requires fp16 target feature
             // for vcvt_f16_f32 (fixed in https://github.com/rust-lang/stdarch/pull/1978)
             let f16_bits: uint16x4_t;
-            // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space
+            // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space,
+            // vst1_u16 supports unaligned stores.
             unsafe {
                 std::arch::asm!(
                     "fcvtn {out:v}.4h, {inp:v}.4s",
@@ -466,7 +471,8 @@ unsafe impl F32SimdVec for F32VecNeon {
         // Use inline asm because Rust stdarch incorrectly requires fp16 target feature
         // for vcvt_f32_f16 (fixed in https://github.com/rust-lang/stdarch/pull/1978)
         let result: float32x4_t;
-        // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space
+        // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space.
+        // vld1_u16 supports unaligned loads.
         unsafe {
             let f16_bits = vld1_u16(mem.as_ptr());
             std::arch::asm!(
@@ -487,7 +493,7 @@ unsafe impl F32SimdVec for F32VecNeon {
             // Convert f32 table to BF16 packed in 128 bits (16 bytes for 8 entries)
             // BF16 is the high 16 bits of f32
             // SAFETY: neon is available from target_feature, and `table` is large
-            // enough for the loads.
+            // enough for the loads. vld1q_f32 supports unaligned loads.
             let (table_lo, table_hi) =
                 unsafe { (vld1q_f32(table.as_ptr()), vld1q_f32(table.as_ptr().add(4))) };

@@ -695,13 +701,28 @@ impl I32SimdVec for I32VecNeon {
     fn store_u16(self, dest: &mut [u16]) {
         assert!(dest.len() >= Self::LEN);
         // SAFETY: We know neon is available from the safety invariant on `self.1`,
-        // and we just checked that `dest` has enough space.
+        // and we just checked that `dest` has enough space. vst1_u16 supports unaligned
+        // stores.
         unsafe {
             // vmovn narrows i32 to i16 by taking the lower 16 bits
             let narrowed = vmovn_s32(self.0);
             vst1_u16(dest.as_mut_ptr(), vreinterpret_u16_s16(narrowed));
         }
     }
+
+    #[inline(always)]
+    fn store_u8(self, dest: &mut [u8]) {
+        assert!(dest.len() >= Self::LEN);
+        // SAFETY: We know neon is available from the safety invariant on `self.1`,
+        // and we just checked that `dest` has enough space. vst1_lane_u32 supports unaligned stores.
+        unsafe {
+            // vmovn narrows i32 -> i16 -> i8
+            let narrowed_i16 = vmovn_s32(self.0);
+            let combined_i16 = vcombine_s16(narrowed_i16, narrowed_i16);
+            let narrowed_i8 = vmovn_s16(combined_i16);
+            vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_s8(narrowed_i8));
+        }
+    }
 }

 impl Add<I32VecNeon> for I32VecNeon {
@@ -837,6 +858,150 @@ impl U32SimdVec for U32VecNeon {
     }
 }

+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecNeon(uint8x16_t, NeonDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecNeon {
+    type Descriptor = NeonDescriptor;
+    const LEN: usize = 16;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+        // from the safety invariant on `d`. vld1q_u8 supports unaligned loads.
+        Self(unsafe { vld1q_u8(mem.as_ptr()) }, d)
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u8) -> Self {
+        // SAFETY: We know neon is available from the safety invariant on `d`.
+        Self(unsafe { vdupq_n_u8(v) }, d)
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u8]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+        // from the safety invariant on `d`. vst1q_u8 supports unaligned stores.
+        unsafe { vst1q_u8(mem.as_mut_ptr(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+        assert!(dest.len() >= 2 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst2q_u8 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+            vst2q_u8(dest_ptr, uint8x16x2_t(a.0, b.0));
+        }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+        assert!(dest.len() >= 3 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst3q_u8 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+            vst3q_u8(dest_ptr, uint8x16x3_t(a.0, b.0, c.0));
+        }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u8>],
+    ) {
+        assert!(dest.len() >= 4 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst4q_u8 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+            vst4q_u8(dest_ptr, uint8x16x4_t(a.0, b.0, c.0, d.0));
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecNeon(uint16x8_t, NeonDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecNeon {
+    type Descriptor = NeonDescriptor;
+    const LEN: usize = 8;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+        // from the safety invariant on `d`. vld1q_u16 supports unaligned loads.
+        Self(unsafe { vld1q_u16(mem.as_ptr().cast()) }, d)
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u16) -> Self {
+        // SAFETY: We know neon is available from the safety invariant on `d`.
+        Self(unsafe { vdupq_n_u16(v) }, d)
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u16]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+        // from the safety invariant on `d`. vst1q_u16 supports unaligned stores.
+        unsafe { vst1q_u16(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+        assert!(dest.len() >= 2 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst2q_u16 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+            vst2q_u16(dest_ptr, uint16x8x2_t(a.0, b.0));
+        }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+        assert!(dest.len() >= 3 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst3q_u16 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+            vst3q_u16(dest_ptr, uint16x8x3_t(a.0, b.0, c.0));
+        }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    ) {
+        assert!(dest.len() >= 4 * Self::LEN);
+        // SAFETY: we just checked that `dest` has enough space, and neon is available
+        // from the safety invariant on the descriptor stored in `a`. vst4q_u16 supports unaligned stores.
+        unsafe {
+            let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+            vst4q_u16(dest_ptr, uint16x8x4_t(a.0, b.0, c.0, d.0));
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug)]
 #[repr(transparent)]
 pub struct MaskNeon(uint32x4_t, NeonDescriptor);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
index 4f06dbddcd7b8..0129db2a1fa96 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
@@ -44,6 +44,10 @@ pub trait SimdDescriptor: Sized + Copy + Debug + Send + Sync {

     type U32Vec: U32SimdVec<Descriptor = Self>;

+    type U16Vec: U16SimdVec<Descriptor = Self>;
+
+    type U8Vec: U8SimdVec<Descriptor = Self>;
+
     type Mask: SimdMask<Descriptor = Self>;

     /// Prepared 8-entry BF16 lookup table for fast approximate lookups.
@@ -124,7 +128,7 @@ pub unsafe trait F32SimdVec:
         // SAFETY: f32 and MaybeUninit<f32> have the same layout.
         // We are writing to initialized memory, so treating it as uninit for writing is fine.
         let dest = unsafe {
-            std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
         };
         Self::store_interleaved_2_uninit(a, b, dest);
     }
@@ -136,7 +140,7 @@ pub unsafe trait F32SimdVec:
         // SAFETY: f32 and MaybeUninit<f32> have the same layout.
         // We are writing to initialized memory, so treating it as uninit for writing is fine.
         let dest = unsafe {
-            std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
         };
         Self::store_interleaved_3_uninit(a, b, c, dest);
     }
@@ -148,7 +152,7 @@ pub unsafe trait F32SimdVec:
         // SAFETY: f32 and MaybeUninit<f32> have the same layout.
         // We are writing to initialized memory, so treating it as uninit for writing is fine.
         let dest = unsafe {
-            std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
         };
         Self::store_interleaved_4_uninit(a, b, c, d, dest);
     }
@@ -344,6 +348,10 @@ pub trait I32SimdVec:
     /// Stores the lower 16 bits of each i32 lane as u16 values.
     /// Requires `dest.len() >= Self::LEN` or it will panic.
     fn store_u16(self, dest: &mut [u16]);
+
+    /// Stores the lower 8 bits of each i32 lane as u8 values.
+    /// Requires `dest.len() >= Self::LEN` or it will panic.
+    fn store_u8(self, dest: &mut [u8]);
 }

 pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync {
@@ -357,6 +365,150 @@ pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync {
     fn shr<const AMOUNT_U: u32, const AMOUNT_I: i32>(self) -> Self;
 }

+/// # Safety
+///
+/// Implementors are required to respect the safety promises of the methods in this trait.
+/// Specifically, this applies to the store_*_uninit methods.
+pub unsafe trait U8SimdVec: Sized + Copy + Debug + Send + Sync {
+    type Descriptor: SimdDescriptor;
+
+    const LEN: usize;
+
+    fn load(d: Self::Descriptor, mem: &[u8]) -> Self;
+    fn splat(d: Self::Descriptor, v: u8) -> Self;
+    fn store(&self, mem: &mut [u8]);
+
+    /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+    /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_2(a: Self, b: Self, dest: &mut [u8]) {
+        // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+        };
+        Self::store_interleaved_2_uninit(a, b, dest);
+    }
+
+    /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+    /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u8]) {
+        // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+        };
+        Self::store_interleaved_3_uninit(a, b, c, dest);
+    }
+
+    /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+    /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u8]) {
+        // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+        };
+        Self::store_interleaved_4_uninit(a, b, c, d, dest);
+    }
+
+    /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+    /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+    ///
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]);
+
+    /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+    /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]);
+
+    /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+    /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_4_uninit(a: Self, b: Self, c: Self, d: Self, dest: &mut [MaybeUninit<u8>]);
+}
+
+/// # Safety
+///
+/// Implementors are required to respect the safety promises of the methods in this trait.
+/// Specifically, this applies to the store_*_uninit methods.
+pub unsafe trait U16SimdVec: Sized + Copy + Debug + Send + Sync {
+    type Descriptor: SimdDescriptor;
+
+    const LEN: usize;
+
+    fn load(d: Self::Descriptor, mem: &[u16]) -> Self;
+    fn splat(d: Self::Descriptor, v: u16) -> Self;
+    fn store(&self, mem: &mut [u16]);
+
+    /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+    /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_2(a: Self, b: Self, dest: &mut [u16]) {
+        // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+        };
+        Self::store_interleaved_2_uninit(a, b, dest);
+    }
+
+    /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+    /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u16]) {
+        // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+        };
+        Self::store_interleaved_3_uninit(a, b, c, dest);
+    }
+
+    /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+    /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+    #[inline(always)]
+    fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u16]) {
+        // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+        // We are writing to initialized memory, so treating it as uninit for writing is fine.
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+        };
+        Self::store_interleaved_4_uninit(a, b, c, d, dest);
+    }
+
+    /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+    /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+    ///
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]);
+
+    /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+    /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]);
+
+    /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+    /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+    /// Safety note:
+    /// Does not write uninitialized data into `dest`.
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    );
+}
+
 #[macro_export]
 macro_rules! shl {
     ($val: expr, $amount: literal) => {
@@ -436,7 +588,8 @@ mod test {
     use arbtest::arbitrary::Unstructured;

     use crate::{
-        F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, test_all_instruction_sets,
+        F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, U8SimdVec, U16SimdVec,
+        test_all_instruction_sets,
     };

     enum Distribution {
@@ -1215,4 +1368,160 @@ mod test {
         }
     }
     test_all_instruction_sets!(test_store_u16);
+
+    fn test_store_interleaved_2_u8<D: SimdDescriptor>(d: D) {
+        let len = D::U8Vec::LEN;
+        let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+        let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+        let mut output = vec![0u8; 2 * len];
+
+        let a_vec = D::U8Vec::load(d, &a);
+        let b_vec = D::U8Vec::load(d, &b);
+        D::U8Vec::store_interleaved_2(a_vec, b_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[2 * i], a[i]);
+            assert_eq!(output[2 * i + 1], b[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_2_u8);
+
+    fn test_store_interleaved_3_u8<D: SimdDescriptor>(d: D) {
+        let len = D::U8Vec::LEN;
+        let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+        let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+        let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect();
+        let mut output = vec![0u8; 3 * len];
+
+        let a_vec = D::U8Vec::load(d, &a);
+        let b_vec = D::U8Vec::load(d, &b);
+        let c_vec = D::U8Vec::load(d, &c);
+        D::U8Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[3 * i], a[i]);
+            assert_eq!(output[3 * i + 1], b[i]);
+            assert_eq!(output[3 * i + 2], c[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_3_u8);
+
+    fn test_store_interleaved_4_u8<D: SimdDescriptor>(d: D) {
+        let len = D::U8Vec::LEN;
+        let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+        let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+        let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect();
+        let e: Vec<u8> = (0..len).map(|i| (i + 200) as u8).collect();
+        let mut output = vec![0u8; 4 * len];
+
+        let a_vec = D::U8Vec::load(d, &a);
+        let b_vec = D::U8Vec::load(d, &b);
+        let c_vec = D::U8Vec::load(d, &c);
+        let d_vec = D::U8Vec::load(d, &e);
+        D::U8Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[4 * i], a[i]);
+            assert_eq!(output[4 * i + 1], b[i]);
+            assert_eq!(output[4 * i + 2], c[i]);
+            assert_eq!(output[4 * i + 3], e[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_4_u8);
+
+    fn test_store_interleaved_2_u16<D: SimdDescriptor>(d: D) {
+        let len = D::U16Vec::LEN;
+        let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+        let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+        let mut output = vec![0u16; 2 * len];
+
+        let a_vec = D::U16Vec::load(d, &a);
+        let b_vec = D::U16Vec::load(d, &b);
+        D::U16Vec::store_interleaved_2(a_vec, b_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[2 * i], a[i]);
+            assert_eq!(output[2 * i + 1], b[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_2_u16);
+
+    fn test_store_interleaved_3_u16<D: SimdDescriptor>(d: D) {
+        let len = D::U16Vec::LEN;
+        let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+        let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+        let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect();
+        let mut output = vec![0u16; 3 * len];
+
+        let a_vec = D::U16Vec::load(d, &a);
+        let b_vec = D::U16Vec::load(d, &b);
+        let c_vec = D::U16Vec::load(d, &c);
+        D::U16Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[3 * i], a[i]);
+            assert_eq!(output[3 * i + 1], b[i]);
+            assert_eq!(output[3 * i + 2], c[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_3_u16);
+
+    fn test_store_interleaved_4_u16<D: SimdDescriptor>(d: D) {
+        let len = D::U16Vec::LEN;
+        let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+        let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+        let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect();
+        let e: Vec<u16> = (0..len).map(|i| (i + 3000) as u16).collect();
+        let mut output = vec![0u16; 4 * len];
+
+        let a_vec = D::U16Vec::load(d, &a);
+        let b_vec = D::U16Vec::load(d, &b);
+        let c_vec = D::U16Vec::load(d, &c);
+        let d_vec = D::U16Vec::load(d, &e);
+        D::U16Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output);
+
+        for i in 0..len {
+            assert_eq!(output[4 * i], a[i]);
+            assert_eq!(output[4 * i + 1], b[i]);
+            assert_eq!(output[4 * i + 2], c[i]);
+            assert_eq!(output[4 * i + 3], e[i]);
+        }
+    }
+    test_all_instruction_sets!(test_store_interleaved_4_u16);
+
+    fn test_store_u8<D: SimdDescriptor>(d: D) {
+        let data = [
+            0xba_i32,
+            0x12345678_i32,
+            0xdeadbabeu32 as i32,
+            0x76543210_i32,
+            0x11111111_i32,
+            0x00000000_i32,
+            0xffffffffu32 as i32,
+            0x12345678_i32,
+            0x87654321u32 as i32,
+            0xabcdef01u32 as i32,
+            0x10203040_i32,
+            0x50607080_i32,
+            0x01020304_i32,
+            0x05060708_i32,
+            0x090a0b0c_i32,
+            0x0d0e0f00_i32,
+        ];
+        let mut output = [0u8; 16];
+        for i in (0..16).step_by(D::I32Vec::LEN) {
+            let vec = D::I32Vec::load(d, &data[i..]);
+            vec.store_u8(&mut output[i..]);
+        }
+
+        for i in 0..16 {
+            let expected = (data[i] & 0xff) as u8;
+            assert_eq!(
+                output[i], expected,
+                "store_u8 failed at index {}: expected 0x{:02x}, got 0x{:02x}",
+                i, expected, output[i]
+            );
+        }
+    }
+    test_all_instruction_sets!(test_store_u8);
 }
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
index f0444c34bf4f8..a423db2f1d56b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
@@ -8,7 +8,7 @@ use std::num::Wrapping;

 use crate::{U32SimdVec, f16, impl_f32_array_interface};

-use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};

 #[derive(Clone, Copy, Debug)]
 pub struct ScalarDescriptor;
@@ -17,6 +17,8 @@ impl SimdDescriptor for ScalarDescriptor {
     type F32Vec = f32;
     type I32Vec = Wrapping<i32>;
     type U32Vec = Wrapping<u32>;
+    type U8Vec = u8;
+    type U16Vec = u16;
     type Mask = bool;
     type Bf16Table8 = [f32; 8];

@@ -310,6 +312,11 @@ impl I32SimdVec for Wrapping<i32> {
     fn store_u16(self, dest: &mut [u16]) {
         dest[0] = self.0 as u16;
     }
+
+    #[inline(always)]
+    fn store_u8(self, dest: &mut [u8]) {
+        dest[0] = self.0 as u8;
+    }
 }

 impl U32SimdVec for Wrapping<u32> {
@@ -328,6 +335,104 @@ impl U32SimdVec for Wrapping<u32> {
     }
 }

+// SAFETY: This implementation only write initialized data in the
+// `&mut [MaybeUninit<u8>]` arguments to *_uninit methods.
+unsafe impl U8SimdVec for u8 {
+    type Descriptor = ScalarDescriptor;
+    const LEN: usize = 1;
+
+    #[inline(always)]
+    fn load(_d: Self::Descriptor, mem: &[u8]) -> Self {
+        mem[0]
+    }
+
+    #[inline(always)]
+    fn splat(_d: Self::Descriptor, v: u8) -> Self {
+        v
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u8]) {
+        mem[0] = *self;
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+        dest[0].write(a);
+        dest[1].write(b);
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+        dest[0].write(a);
+        dest[1].write(b);
+        dest[2].write(c);
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u8>],
+    ) {
+        dest[0].write(a);
+        dest[1].write(b);
+        dest[2].write(c);
+        dest[3].write(d);
+    }
+}
+
+// SAFETY: This implementation only write initialized data in the
+// `&mut [MaybeUninit<u16>]` arguments to *_uninit methods.
+unsafe impl U16SimdVec for u16 {
+    type Descriptor = ScalarDescriptor;
+    const LEN: usize = 1;
+
+    #[inline(always)]
+    fn load(_d: Self::Descriptor, mem: &[u16]) -> Self {
+        mem[0]
+    }
+
+    #[inline(always)]
+    fn splat(_d: Self::Descriptor, v: u16) -> Self {
+        v
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u16]) {
+        mem[0] = *self;
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+        dest[0].write(a);
+        dest[1].write(b);
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+        dest[0].write(a);
+        dest[1].write(b);
+        dest[2].write(c);
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    ) {
+        dest[0].write(a);
+        dest[1].write(b);
+        dest[2].write(c);
+        dest[3].write(d);
+    }
+}
+
 impl SimdMask for bool {
     type Descriptor = ScalarDescriptor;

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
index 0da8ec9f0da4d..0ab752a656478 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
@@ -5,7 +5,7 @@

 use crate::{U32SimdVec, impl_f32_array_interface, x86_64::sse42::Sse42Descriptor};

-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
 use std::{
     arch::x86_64::*,
     mem::MaybeUninit,
@@ -124,6 +124,8 @@ impl SimdDescriptor for AvxDescriptor {
     type F32Vec = F32VecAvx;
     type I32Vec = I32VecAvx;
     type U32Vec = U32VecAvx;
+    type U8Vec = U8VecAvx;
+    type U16Vec = U16VecAvx;
     type Mask = MaskAvx;
     type Bf16Table8 = Bf16Table8Avx;

@@ -198,16 +200,16 @@ unsafe impl F32SimdVec for F32VecAvx {
     fn load(d: Self::Descriptor, mem: &[f32]) -> Self {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
-        // from the safety invariant on `d`.
-        Self(unsafe { _mm256_loadu_ps(mem.as_ptr()) }, d)
+        // from the safety invariant on `d`. _mm256_loadu_ps supports unaligned loads.
+        Self(unsafe { _mm256_loadu_ps(mem.as_ptr().cast()) }, d)
     }

     #[inline(always)]
     fn store(&self, mem: &mut [f32]) {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
-        // from the safety invariant on `self.1`.
-        unsafe { _mm256_storeu_ps(mem.as_mut_ptr(), self.0) }
+        // from the safety invariant on `self.1`. _mm256_storeu_ps supports unaligned stores.
+        unsafe { _mm256_storeu_ps(mem.as_mut_ptr().cast(), self.0) }
     }

     #[inline(always)]
@@ -223,9 +225,9 @@ unsafe impl F32SimdVec for F32VecAvx {
             // Need to permute to get correct order
             let out0 = _mm256_permute2f128_ps::<0x20>(lo, hi); // lower halves: [a0,b0,a1,b1, a2,b2,a3,b3]
             let out1 = _mm256_permute2f128_ps::<0x31>(lo, hi); // upper halves: [a4,b4,a5,b5, a6,b6,a7,b7]
-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm256_storeu_ps(dest_ptr, out0);
                 _mm256_storeu_ps(dest_ptr.add(8), out1);
             }
@@ -274,9 +276,9 @@ unsafe impl F32SimdVec for F32VecAvx {
             let out2 = _mm256_blend_ps::<0b01001001>(a2, b2);
             let out2 = _mm256_blend_ps::<0b10010010>(out2, c2);

-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm256_storeu_ps(dest_ptr, out0);
                 _mm256_storeu_ps(dest_ptr.add(8), out1);
                 _mm256_storeu_ps(dest_ptr.add(16), out2);
@@ -335,9 +337,9 @@ unsafe impl F32SimdVec for F32VecAvx {
             let out2 = _mm256_permute2f128_ps::<0x31>(abcd_0, abcd_1);
             let out3 = _mm256_permute2f128_ps::<0x31>(abcd_2, abcd_3);

-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm256_storeu_ps(dest_ptr, out0);
                 _mm256_storeu_ps(dest_ptr.add(8), out1);
                 _mm256_storeu_ps(dest_ptr.add(16), out2);
@@ -636,9 +638,15 @@ unsafe impl F32SimdVec for F32VecAvx {
             // Pack 8 u16s to 8 u8s (use same vector twice, take lower half)
             let u8s = _mm_packus_epi16(u16s, u16s);
             // Store lower 8 bytes
-            // SAFETY: we checked dest has enough space
+            let val = _mm_cvtsi128_si64(u8s);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+            // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8.
+            // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 4. `src` and `dst` do not overlap as `src` is a local stack array.
             unsafe {
-                _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u8s);
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
             }
         }
         // SAFETY: avx2 is available from the safety invariant on the descriptor.
@@ -661,9 +669,9 @@ unsafe impl F32SimdVec for F32VecAvx {
             // Pack 4+4 i32s to 8 u16s
             let u16s = _mm_packus_epi32(lo, hi);
             // Store 8 u16s (16 bytes)
-            // SAFETY: we checked dest has enough space
+            // SAFETY: we checked dest has enough space. _mm_storeu_si128 supports unaligned stores.
             unsafe {
-                _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u16s);
+                _mm_storeu_si128(dest.as_mut_ptr().cast(), u16s);
             }
         }
         // SAFETY: avx2 is available from the safety invariant on the descriptor.
@@ -678,8 +686,8 @@ unsafe impl F32SimdVec for F32VecAvx {
         #[inline]
         fn load_f16_impl(d: AvxDescriptor, mem: &[u16]) -> F32VecAvx {
             assert!(mem.len() >= F32VecAvx::LEN);
-            // SAFETY: mem.len() >= 8 is checked above
-            let bits = unsafe { _mm_loadu_si128(mem.as_ptr() as *const __m128i) };
+            // SAFETY: mem.len() >= 8 is checked above. _mm_loadu_si128 supports unaligned loads.
+            let bits = unsafe { _mm_loadu_si128(mem.as_ptr().cast()) };
             F32VecAvx(_mm256_cvtph_ps(bits), d)
         }
         // SAFETY: avx2 and f16c are available from the safety invariant on the descriptor
@@ -693,8 +701,8 @@ unsafe impl F32SimdVec for F32VecAvx {
         fn store_f16_bits_impl(v: __m256, dest: &mut [u16]) {
             assert!(dest.len() >= F32VecAvx::LEN);
             let bits = _mm256_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v);
-            // SAFETY: dest.len() >= 8 is checked above
-            unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, bits) };
+            // SAFETY: dest.len() >= 8 is checked above. _mm_storeu_si128 supports unaligned stores.
+            unsafe { _mm_storeu_si128(dest.as_mut_ptr().cast(), bits) };
         }
         // SAFETY: avx2 and f16c are available from the safety invariant on the descriptor
         unsafe { store_f16_bits_impl(self.0, dest) }
@@ -800,8 +808,8 @@ impl I32SimdVec for I32VecAvx {
     fn load(d: Self::Descriptor, mem: &[i32]) -> Self {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
-        // from the safety invariant on `d`.
-        Self(unsafe { _mm256_loadu_si256(mem.as_ptr() as *const _) }, d)
+        // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+        Self(unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) }, d)
     }

     #[inline(always)]
@@ -893,7 +901,7 @@ impl I32SimdVec for I32VecAvx {
                 ),
             );
             let tmp = _mm256_permute4x64_epi64(tmp, 0xD8);
-            // SAFETY: we just checked that `dest` has enough space.
+            // SAFETY: we just checked that `dest` has enough space. _mm_storeu_si128 supports unaligned stores.
             unsafe {
                 _mm_storeu_si128(dest.as_mut_ptr().cast(), _mm256_extracti128_si256::<0>(tmp))
             };
@@ -901,6 +909,38 @@ impl I32SimdVec for I32VecAvx {
         // SAFETY: avx2 is available from the safety invariant on the descriptor.
         unsafe { store_u16_impl(self.0, dest) }
     }
+
+    #[inline(always)]
+    fn store_u8(self, dest: &mut [u8]) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_u8_impl(v: __m256i, dest: &mut [u8]) {
+            assert!(dest.len() >= I32VecAvx::LEN);
+            let tmp = _mm256_shuffle_epi8(
+                v,
+                _mm256_setr_epi8(
+                    0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, //
+                    0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                ),
+            );
+            let lo = _mm256_castsi256_si128(tmp);
+            let hi = _mm256_extracti128_si256::<1>(tmp);
+            let packed = _mm_unpacklo_epi32(lo, hi);
+            let val = _mm_cvtsi128_si64(packed);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. we just checked that `dest` has enough space (dest.len() >= 8).
+            // 2. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+            // 3. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8.
+            // 4. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 5. `src` and `dst` do not overlap as `src` is a local stack array.
+            unsafe {
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_u8_impl(self.0, dest) }
+    }
 }

 impl Add<I32VecAvx> for I32VecAvx {
@@ -1035,6 +1075,414 @@ impl U32SimdVec for U32VecAvx {
     }
 }

+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecAvx(__m256i, AvxDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecAvx {
+    type Descriptor = AvxDescriptor;
+    const LEN: usize = 32;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+        assert!(mem.len() >= U8VecAvx::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+        // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+        unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u8) -> Self {
+        // SAFETY: We know avx2 is available from the safety invariant on `self.1`.
+        unsafe { Self(_mm256_set1_epi8(v as i8), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u8]) {
+        assert!(mem.len() >= U8VecAvx::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+        // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores.
+        unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u8>]) {
+            assert!(dest.len() >= 2 * U8VecAvx::LEN);
+            // a = [A0..A15 | A16..A31]
+            // b = [B0..B15 | B16..B31]
+            let lo = _mm256_unpacklo_epi8(a, b); // [A0 B0..A7 B7 | A16 B16..A23 B23]
+            let hi = _mm256_unpackhi_epi8(a, b); // [A8 B8..A15 B15 | A24 B24..A31 B31]
+
+            // R0 = [A0 B0..A7 B7 | A8 B8..A15 B15]
+            let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi);
+            // R1 = [A16 B16..A23 B23 | A24 B24..A31 B31]
+            let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_3_impl(
+            a: __m256i,
+            b: __m256i,
+            c: __m256i,
+            dest: &mut [MaybeUninit<u8>],
+        ) {
+            assert!(dest.len() >= 3 * U8VecAvx::LEN);
+
+            // U8 Masks
+            let mask_a0 = _mm256_setr_epi8(
+                0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1,
+                -1, 8, -1, -1, 9, -1, -1, 10, -1,
+            );
+            let mask_a1 = _mm256_setr_epi8(
+                -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1,
+                -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5,
+            );
+            let mask_a2 = _mm256_setr_epi8(
+                -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1,
+                -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+            );
+            let mask_b0 = _mm256_setr_epi8(
+                -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7,
+                -1, -1, 8, -1, -1, 9, -1, -1, 10,
+            );
+            let mask_b1 = _mm256_setr_epi8(
+                -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1,
+                -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1,
+            );
+            let mask_b2 = _mm256_setr_epi8(
+                5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12,
+                -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+            );
+            let mask_c0 = _mm256_setr_epi8(
+                -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1,
+                -1, 7, -1, -1, 8, -1, -1, 9, -1, -1,
+            );
+            let mask_c1 = _mm256_setr_epi8(
+                10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1,
+                1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1,
+            );
+            let mask_c2 = _mm256_setr_epi8(
+                -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1,
+                12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+            );
+
+            // Create duplicated vectors for lane swizzling
+            let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a);
+            let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b);
+            let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c);
+
+            let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a);
+            let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b);
+            let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c);
+
+            let out0 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a_dup_lo, mask_a0),
+                    _mm256_shuffle_epi8(b_dup_lo, mask_b0),
+                ),
+                _mm256_shuffle_epi8(c_dup_lo, mask_c0),
+            );
+
+            let out1 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a, mask_a1),
+                    _mm256_shuffle_epi8(b, mask_b1),
+                ),
+                _mm256_shuffle_epi8(c, mask_c1),
+            );
+
+            let out2 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a_dup_hi, mask_a2),
+                    _mm256_shuffle_epi8(b_dup_hi, mask_b2),
+                ),
+                _mm256_shuffle_epi8(c_dup_hi, mask_c2),
+            );
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+                _mm256_storeu_si256(dest_ptr.add(2), out2);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u8>],
+    ) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_4_impl(
+            a: __m256i,
+            b: __m256i,
+            c: __m256i,
+            d: __m256i,
+            dest: &mut [MaybeUninit<u8>],
+        ) {
+            assert!(dest.len() >= 4 * U8VecAvx::LEN);
+            // First interleave pairs: ab and cd
+            let ab_lo = _mm256_unpacklo_epi8(a, b);
+            let ab_hi = _mm256_unpackhi_epi8(a, b);
+            let cd_lo = _mm256_unpacklo_epi8(c, d);
+            let cd_hi = _mm256_unpackhi_epi8(c, d);
+
+            // Then interleave the pairs to get 4-byte chunks
+            let out0_p = _mm256_unpacklo_epi16(ab_lo, cd_lo);
+            let out1_p = _mm256_unpackhi_epi16(ab_lo, cd_lo);
+            let out2_p = _mm256_unpacklo_epi16(ab_hi, cd_hi);
+            let out3_p = _mm256_unpackhi_epi16(ab_hi, cd_hi);
+
+            // Reorder lanes
+            let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p);
+            let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p);
+            let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p);
+            let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+                _mm256_storeu_si256(dest_ptr.add(2), out2);
+                _mm256_storeu_si256(dest_ptr.add(3), out3);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecAvx(__m256i, AvxDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecAvx {
+    type Descriptor = AvxDescriptor;
+    const LEN: usize = 16;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+        assert!(mem.len() >= U16VecAvx::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+        // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+        unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u16) -> Self {
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { Self(_mm256_set1_epi16(v as i16), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u16]) {
+        assert!(mem.len() >= U16VecAvx::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+        // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores.
+        unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u16>]) {
+            assert!(dest.len() >= 2 * U16VecAvx::LEN);
+            // a = [A0..A7 | A8..A15]
+            // b = [B0..B7 | B8..B15]
+            let lo = _mm256_unpacklo_epi16(a, b); // [A0 B0..A3 B3 | A8 B8..A11 B11]
+            let hi = _mm256_unpackhi_epi16(a, b); // [A4 B4..A7 B7 | A12 B12..A15 B15]
+
+            // R0 = [A0 B0..A7 B7]
+            let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi);
+            // R1 = [A8 B8..A15 B15]
+            let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_3_impl(
+            a: __m256i,
+            b: __m256i,
+            c: __m256i,
+            dest: &mut [MaybeUninit<u16>],
+        ) {
+            assert!(dest.len() >= 3 * U16VecAvx::LEN);
+
+            // U16 Masks
+            let mask_a0 = _mm256_setr_epi8(
+                0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1,
+                -1, 8, 9, -1, -1, -1, -1, 10, 11,
+            );
+            let mask_a1 = _mm256_setr_epi8(
+                -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1, -1, -1,
+                -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1,
+            );
+            let mask_a2 = _mm256_setr_epi8(
+                -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1, 12, 13,
+                -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+            );
+            let mask_b0 = _mm256_setr_epi8(
+                -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1,
+                -1, -1, -1, 8, 9, -1, -1, -1, -1,
+            );
+            let mask_b1 = _mm256_setr_epi8(
+                10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1,
+                -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5,
+            );
+            let mask_b2 = _mm256_setr_epi8(
+                -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1,
+                12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+            );
+            let mask_c0 = _mm256_setr_epi8(
+                -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1,
+                6, 7, -1, -1, -1, -1, 8, 9, -1, -1,
+            );
+            let mask_c1 = _mm256_setr_epi8(
+                -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0,
+                1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1,
+            );
+            let mask_c2 = _mm256_setr_epi8(
+                4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1,
+                -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+            );
+
+            // Create duplicated vectors for lane swizzling
+            let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a);
+            let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b);
+            let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c);
+
+            let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a);
+            let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b);
+            let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c);
+
+            let out0 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a_dup_lo, mask_a0),
+                    _mm256_shuffle_epi8(b_dup_lo, mask_b0),
+                ),
+                _mm256_shuffle_epi8(c_dup_lo, mask_c0),
+            );
+
+            let out1 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a, mask_a1),
+                    _mm256_shuffle_epi8(b, mask_b1),
+                ),
+                _mm256_shuffle_epi8(c, mask_c1),
+            );
+
+            let out2 = _mm256_or_si256(
+                _mm256_or_si256(
+                    _mm256_shuffle_epi8(a_dup_hi, mask_a2),
+                    _mm256_shuffle_epi8(b_dup_hi, mask_b2),
+                ),
+                _mm256_shuffle_epi8(c_dup_hi, mask_c2),
+            );
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+                _mm256_storeu_si256(dest_ptr.add(2), out2);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    ) {
+        #[target_feature(enable = "avx2")]
+        #[inline]
+        fn store_interleaved_4_impl(
+            a: __m256i,
+            b: __m256i,
+            c: __m256i,
+            d: __m256i,
+            dest: &mut [MaybeUninit<u16>],
+        ) {
+            assert!(dest.len() >= 4 * U16VecAvx::LEN);
+            // First interleave pairs: ab and cd
+            let ab_lo = _mm256_unpacklo_epi16(a, b);
+            let ab_hi = _mm256_unpackhi_epi16(a, b);
+            let cd_lo = _mm256_unpacklo_epi16(c, d);
+            let cd_hi = _mm256_unpackhi_epi16(c, d);
+
+            // Then interleave the pairs to get 4-u16 chunks (8 bytes)
+            let out0_p = _mm256_unpacklo_epi32(ab_lo, cd_lo);
+            let out1_p = _mm256_unpackhi_epi32(ab_lo, cd_lo);
+            let out2_p = _mm256_unpacklo_epi32(ab_hi, cd_hi);
+            let out3_p = _mm256_unpackhi_epi32(ab_hi, cd_hi);
+
+            // Reorder lanes
+            let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p);
+            let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p);
+            let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p);
+            let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+                _mm256_storeu_si256(dest_ptr, out0);
+                _mm256_storeu_si256(dest_ptr.add(1), out1);
+                _mm256_storeu_si256(dest_ptr.add(2), out2);
+                _mm256_storeu_si256(dest_ptr.add(3), out3);
+            }
+        }
+        // SAFETY: avx2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
 impl SimdMask for MaskAvx {
     type Descriptor = AvxDescriptor;

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
index 89086c50c9715..48bc32a61032b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
@@ -3,7 +3,9 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-use super::super::{AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{
+    AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec,
+};
 use crate::{Sse42Descriptor, U32SimdVec, impl_f32_array_interface};
 use std::{
     arch::x86_64::*,
@@ -43,6 +45,8 @@ impl SimdDescriptor for Avx512Descriptor {
     type F32Vec = F32VecAvx512;
     type I32Vec = I32VecAvx512;
     type U32Vec = U32VecAvx512;
+    type U8Vec = U8VecAvx512;
+    type U16Vec = U16VecAvx512;
     type Mask = MaskAvx512;
     type Bf16Table8 = Bf16Table8Avx512;

@@ -149,9 +153,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             let out0 = _mm512_permutex2var_ps(lo, idx_lo, hi);
             let out1 = _mm512_permutex2var_ps(lo, idx_hi, hi);

-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm512_storeu_ps(dest_ptr, out0);
                 _mm512_storeu_ps(dest_ptr.add(16), out1);
             }
@@ -192,9 +196,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             let out2 = _mm512_permutex2var_ps(a, idx_ab2, b);
             let out2 = _mm512_mask_permutexvar_ps(out2, 0b1001001001001001, idx_c2, c);

-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm512_storeu_ps(dest_ptr, out0);
                 _mm512_storeu_ps(dest_ptr.add(16), out1);
                 _mm512_storeu_ps(dest_ptr.add(32), out2);
@@ -291,9 +295,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             let out1 = _mm512_permutex2var_ps(pair01_13, idx_0, pair23_13);
             let out3 = _mm512_permutex2var_ps(pair01_13, idx_1, pair23_13);

-            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm512_storeu_ps(dest_ptr, out0);
                 _mm512_storeu_ps(dest_ptr.add(16), out1);
                 _mm512_storeu_ps(dest_ptr.add(32), out2);
@@ -428,9 +432,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             let out6 = _mm512_permutex2var_ps(full_0_13, idx_hi, full_1_13);
             let out7 = _mm512_permutex2var_ps(full_2_13, idx_hi, full_3_13);

-            // SAFETY: we just checked that dest has enough space.
+            // SAFETY: we just checked that dest has enough space. _mm512_storeu_ps supports unaligned stores.
             unsafe {
-                let ptr = dest.as_mut_ptr();
+                let ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm512_storeu_ps(ptr, out0);
                 _mm512_storeu_ps(ptr.add(16), out1);
                 _mm512_storeu_ps(ptr.add(32), out2);
@@ -454,7 +458,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             assert!(src.len() >= 2 * F32VecAvx512::LEN);
             // Input: [a0,b0,a1,b1,...,a15,b15]
             // Output: a = [a0..a15], b = [b0..b15]
-            // SAFETY: we just checked that src has enough space.
+            // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
             let (in0, in1) = unsafe {
                 (
                     _mm512_loadu_ps(src.as_ptr()),
@@ -491,7 +495,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             // in2: [c10,a11,b11,c11,a12,b12,c12,a13,b13,c13,a14,b14,c14,a15,b15,c15]
             // Output: a = [a0..a15], b = [b0..b15], c = [c0..c15]

-            // SAFETY: we just checked that src has enough space.
+            // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
             let (in0, in1, in2) = unsafe {
                 (
                     _mm512_loadu_ps(src.as_ptr()),
@@ -544,7 +548,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             assert!(src.len() >= 4 * F32VecAvx512::LEN);
             // Input: [a0,b0,c0,d0,a1,b1,c1,d1,...] (64 floats)
             // Output: a = [a0..a15], b = [b0..b15], c = [c0..c15], d = [d0..d15]
-            // SAFETY: we just checked that src has enough space.
+            // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
             let (in0, in1, in2, in3) = unsafe {
                 (
                     _mm512_loadu_ps(src.as_ptr()),
@@ -700,7 +704,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             // Store 16 bytes
             // SAFETY: we checked dest has enough space
             unsafe {
-                _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u8s);
+                _mm_storeu_si128(dest.as_mut_ptr().cast(), u8s);
             }
         }
         // SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor.
@@ -722,7 +726,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
             // Store 16 u16s (32 bytes)
             // SAFETY: we checked dest has enough space
             unsafe {
-                _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, u16s);
+                _mm256_storeu_si256(dest.as_mut_ptr().cast(), u16s);
             }
         }
         // SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor.
@@ -738,8 +742,8 @@ unsafe impl F32SimdVec for F32VecAvx512 {
         #[inline]
         fn load_f16_impl(d: Avx512Descriptor, mem: &[u16]) -> F32VecAvx512 {
             assert!(mem.len() >= F32VecAvx512::LEN);
-            // SAFETY: mem.len() >= 16 is checked above
-            let bits = unsafe { _mm256_loadu_si256(mem.as_ptr() as *const __m256i) };
+            // SAFETY: mem.len() >= 16 is checked above.
+            let bits = unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) };
             F32VecAvx512(_mm512_cvtph_ps(bits), d)
         }
         // SAFETY: avx512f is available from the safety invariant on the descriptor
@@ -754,8 +758,8 @@ unsafe impl F32SimdVec for F32VecAvx512 {
         fn store_f16_bits_impl(v: __m512, dest: &mut [u16]) {
             assert!(dest.len() >= F32VecAvx512::LEN);
             let bits = _mm512_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v);
-            // SAFETY: dest.len() >= 16 is checked above
-            unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, bits) };
+            // SAFETY: dest.len() >= 16 is checked above.
+            unsafe { _mm256_storeu_si256(dest.as_mut_ptr().cast(), bits) };
         }
         // SAFETY: avx512f is available from the safety invariant on the descriptor
         unsafe { store_f16_bits_impl(self.0, dest) }
@@ -1070,6 +1074,22 @@ impl I32SimdVec for I32VecAvx512 {
         // SAFETY: avx512f is available from the safety invariant on the descriptor.
         unsafe { store_u16_impl(self.0, dest) }
     }
+
+    #[inline(always)]
+    fn store_u8(self, dest: &mut [u8]) {
+        #[target_feature(enable = "avx512f")]
+        #[inline]
+        fn store_u8_impl(v: __m512i, dest: &mut [u8]) {
+            assert!(dest.len() >= I32VecAvx512::LEN);
+            let tmp_vec = _mm512_cvtepi32_epi8(v);
+            // SAFETY: We just checked `dst` has enough space.
+            unsafe {
+                _mm_storeu_si128(dest.as_mut_ptr().cast(), tmp_vec);
+            }
+        }
+        // SAFETY: avx512f is available from the safety invariant on the descriptor.
+        unsafe { store_u8_impl(self.0, dest) }
+    }
 }

 impl Add<I32VecAvx512> for I32VecAvx512 {
@@ -1204,6 +1224,398 @@ impl U32SimdVec for U32VecAvx512 {
     }
 }

+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecAvx512(__m512i, Avx512Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecAvx512 {
+    type Descriptor = Avx512Descriptor;
+    const LEN: usize = 64;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+        // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads.
+        unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u8) -> Self {
+        // SAFETY: We know avx512f is available from the safety invariant on `d`.
+        unsafe { Self(_mm512_set1_epi8(v as i8), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u8]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+        // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores.
+        unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u8_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u8>]) {
+            assert!(dest.len() >= 2 * U8VecAvx512::LEN);
+            let lo = _mm512_unpacklo_epi8(a, b);
+            let hi = _mm512_unpackhi_epi8(a, b);
+            let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+            let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+            let out0 = _mm512_permutex2var_epi64(lo, idx0, hi);
+            let out1 = _mm512_permutex2var_epi64(lo, idx1, hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, out0);
+                _mm512_storeu_si512(ptr.add(1), out1);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u8_2(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u8_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u8>]) {
+            assert!(dest.len() >= 3 * U8VecAvx512::LEN);
+
+            let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5,
+            ));
+            let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1,
+            ));
+            let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1,
+            ));
+
+            let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1,
+            ));
+            let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10,
+            ));
+            let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1,
+            ));
+
+            let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+            ));
+            let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+            ));
+            let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+            ));
+
+            let res0 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a0),
+                    _mm512_shuffle_epi8(b, mask_b0),
+                ),
+                _mm512_shuffle_epi8(c, mask_c0),
+            );
+            let res1 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a1),
+                    _mm512_shuffle_epi8(b, mask_b1),
+                ),
+                _mm512_shuffle_epi8(c, mask_c1),
+            );
+            let res2 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a2),
+                    _mm512_shuffle_epi8(b, mask_b2),
+                ),
+                _mm512_shuffle_epi8(c, mask_c2),
+            );
+            let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1);
+            let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1);
+            let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5);
+            let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2);
+            let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1);
+            let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2);
+            let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5);
+            let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0);
+            let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1);
+            let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0);
+            let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5);
+            let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, final0);
+                _mm512_storeu_si512(ptr.add(1), final1);
+                _mm512_storeu_si512(ptr.add(2), final2);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u8_3(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u8>],
+    ) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u8_4(a: __m512i, b: __m512i, c: __m512i, d: __m512i, dest: &mut [MaybeUninit<u8>]) {
+            assert!(dest.len() >= 4 * U8VecAvx512::LEN);
+            let ab_lo = _mm512_unpacklo_epi8(a, b);
+            let ab_hi = _mm512_unpackhi_epi8(a, b);
+            let cd_lo = _mm512_unpacklo_epi8(c, d);
+            let cd_hi = _mm512_unpackhi_epi8(c, d);
+
+            let abcd_0 = _mm512_unpacklo_epi16(ab_lo, cd_lo);
+            let abcd_1 = _mm512_unpackhi_epi16(ab_lo, cd_lo);
+            let abcd_2 = _mm512_unpacklo_epi16(ab_hi, cd_hi);
+            let abcd_3 = _mm512_unpackhi_epi16(ab_hi, cd_hi);
+
+            let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+            let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+
+            let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1);
+            let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1);
+            let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3);
+            let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3);
+
+            let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11);
+            let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15);
+
+            let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02);
+            let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02);
+            let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13);
+            let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, out0);
+                _mm512_storeu_si512(ptr.add(1), out1);
+                _mm512_storeu_si512(ptr.add(2), out2);
+                _mm512_storeu_si512(ptr.add(3), out3);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u8_4(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecAvx512(__m512i, Avx512Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecAvx512 {
+    type Descriptor = Avx512Descriptor;
+    const LEN: usize = 32;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+        // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads.
+        unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u16) -> Self {
+        // SAFETY: avx512 available.
+        unsafe { Self(_mm512_set1_epi16(v as i16), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u16]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+        // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores.
+        unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u16_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u16>]) {
+            assert!(dest.len() >= 2 * U16VecAvx512::LEN);
+            let lo = _mm512_unpacklo_epi16(a, b);
+            let hi = _mm512_unpackhi_epi16(a, b);
+            let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+            let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+            let out0 = _mm512_permutex2var_epi64(lo, idx0, hi);
+            let out1 = _mm512_permutex2var_epi64(lo, idx1, hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, out0);
+                _mm512_storeu_si512(ptr.add(1), out1);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u16_2(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u16_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u16>]) {
+            assert!(dest.len() >= 3 * U16VecAvx512::LEN);
+
+            let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1,
+            ));
+            let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5,
+            ));
+            let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1,
+            ));
+
+            let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11,
+            ));
+            let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1,
+            ));
+            let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1,
+            ));
+
+            let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+            ));
+            let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+            ));
+            let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+                -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+            ));
+
+            let res0 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a0),
+                    _mm512_shuffle_epi8(b, mask_b0),
+                ),
+                _mm512_shuffle_epi8(c, mask_c0),
+            );
+            let res1 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a1),
+                    _mm512_shuffle_epi8(b, mask_b1),
+                ),
+                _mm512_shuffle_epi8(c, mask_c1),
+            );
+            let res2 = _mm512_or_si512(
+                _mm512_or_si512(
+                    _mm512_shuffle_epi8(a, mask_a2),
+                    _mm512_shuffle_epi8(b, mask_b2),
+                ),
+                _mm512_shuffle_epi8(c, mask_c2),
+            );
+
+            let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1);
+            let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1);
+            let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5);
+            let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2);
+
+            let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1);
+            let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2);
+            let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5);
+            let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0);
+
+            let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1);
+            let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0);
+            let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5);
+            let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, final0);
+                _mm512_storeu_si512(ptr.add(1), final1);
+                _mm512_storeu_si512(ptr.add(2), final2);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u16_3(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    ) {
+        #[target_feature(enable = "avx512f,avx512bw")]
+        #[inline]
+        fn impl_u16_4(
+            a: __m512i,
+            b: __m512i,
+            c: __m512i,
+            d: __m512i,
+            dest: &mut [MaybeUninit<u16>],
+        ) {
+            assert!(dest.len() >= 4 * U16VecAvx512::LEN);
+            let ab_lo = _mm512_unpacklo_epi16(a, b);
+            let ab_hi = _mm512_unpackhi_epi16(a, b);
+            let cd_lo = _mm512_unpacklo_epi16(c, d);
+            let cd_hi = _mm512_unpackhi_epi16(c, d);
+
+            let abcd_0 = _mm512_unpacklo_epi32(ab_lo, cd_lo);
+            let abcd_1 = _mm512_unpackhi_epi32(ab_lo, cd_lo);
+            let abcd_2 = _mm512_unpacklo_epi32(ab_hi, cd_hi);
+            let abcd_3 = _mm512_unpackhi_epi32(ab_hi, cd_hi);
+
+            // Transpose 4x4 of 128-bit lanes (same as u8)
+            let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+            let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+
+            let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1);
+            let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1);
+            let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3);
+            let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3);
+
+            let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11);
+            let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15);
+
+            let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02);
+            let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02);
+            let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13);
+            let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m512i>();
+                _mm512_storeu_si512(ptr, out0);
+                _mm512_storeu_si512(ptr.add(1), out1);
+                _mm512_storeu_si512(ptr.add(2), out2);
+                _mm512_storeu_si512(ptr.add(3), out3);
+            }
+        }
+        // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+        unsafe { impl_u16_4(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
 impl SimdMask for MaskAvx512 {
     type Descriptor = Avx512Descriptor;

diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
index b4021570c3f6d..5a4f52f4c30f9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
@@ -5,7 +5,7 @@

 use crate::{U32SimdVec, impl_f32_array_interface};

-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
 use std::{
     arch::x86_64::*,
     mem::MaybeUninit,
@@ -31,6 +31,8 @@ impl SimdDescriptor for Sse42Descriptor {
     type F32Vec = F32VecSse42;
     type I32Vec = I32VecSse42;
     type U32Vec = U32VecSse42;
+    type U16Vec = U16VecSse42;
+    type U8Vec = U8VecSse42;
     type Mask = MaskSse42;
     type Bf16Table8 = Bf16Table8Sse42;

@@ -131,7 +133,7 @@ unsafe impl F32SimdVec for F32VecSse42 {
             let hi = _mm_unpackhi_ps(a, b);
             // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm_storeu_ps(dest_ptr, lo);
                 _mm_storeu_ps(dest_ptr.add(4), hi);
             }
@@ -184,7 +186,7 @@ unsafe impl F32SimdVec for F32VecSse42 {
             // Store the results
             // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm_storeu_ps(dest_ptr, out0);
                 _mm_storeu_ps(dest_ptr.add(4), out1);
                 _mm_storeu_ps(dest_ptr.add(8), out2);
@@ -227,7 +229,7 @@ unsafe impl F32SimdVec for F32VecSse42 {

             // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
             unsafe {
-                let dest_ptr = dest.as_mut_ptr() as *mut f32;
+                let dest_ptr = dest.as_mut_ptr().cast::<f32>();
                 _mm_storeu_ps(dest_ptr, out0);
                 _mm_storeu_ps(dest_ptr.add(4), out1);
                 _mm_storeu_ps(dest_ptr.add(8), out2);
@@ -575,10 +577,15 @@ unsafe impl F32SimdVec for F32VecSse42 {
             let u16s = _mm_packus_epi32(i32s, i32s);
             let u8s = _mm_packus_epi16(u16s, u16s);
             // Store lower 4 bytes
-            // SAFETY: we checked dest has enough space
+            let val = _mm_cvtsi128_si32(u8s);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4].
+            // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4.
+            // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 4. `src` and `dst` do not overlap as `src` is a local stack array.
             unsafe {
-                let ptr = dest.as_mut_ptr() as *mut i32;
-                *ptr = _mm_cvtsi128_si32(u8s);
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4);
             }
         }
         // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
@@ -598,9 +605,15 @@ unsafe impl F32SimdVec for F32VecSse42 {
             // Pack i32 -> u16 (use same vector twice, take lower half)
             let u16s = _mm_packus_epi32(i32s, i32s);
             // Store lower 8 bytes (4 u16s)
-            // SAFETY: we checked dest has enough space
+            let val = _mm_cvtsi128_si64(u16s);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+            // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes.
+            // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 4. `src` and `dst` do not overlap as `src` is a local stack array.
             unsafe {
-                _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u16s);
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
             }
         }
         // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
@@ -732,7 +745,7 @@ impl I32SimdVec for I32VecSse42 {
         assert!(mem.len() >= Self::LEN);
         // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
         // from the safety invariant on `d`.
-        Self(unsafe { _mm_loadu_si128(mem.as_ptr() as *const _) }, d)
+        Self(unsafe { _mm_loadu_si128(mem.as_ptr().cast()) }, d)
     }

     #[inline(always)]
@@ -820,17 +833,50 @@ impl I32SimdVec for I32VecSse42 {
         #[inline]
         fn store_u16_impl(v: __m128i, dest: &mut [u16]) {
             assert!(dest.len() >= I32VecSse42::LEN);
-            // Use scalar loop since _mm_packs_epi32 would saturate incorrectly for unsigned values
-            let mut tmp = [0i32; 4];
-            // SAFETY: tmp has 4 elements, matching LEN
-            unsafe { _mm_storeu_si128(tmp.as_mut_ptr() as *mut __m128i, v) };
-            for i in 0..4 {
-                dest[i] = tmp[i] as u16;
+            // Truncate i32 -> u16 using shuffle
+            let shuffle_mask =
+                _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1);
+            let u16s = _mm_shuffle_epi8(v, shuffle_mask);
+            let val = _mm_cvtsi128_si64(u16s);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+            // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes.
+            // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 4. `src` and `dst` do not overlap as `src` is a local stack array.
+            unsafe {
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
             }
         }
         // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
         unsafe { store_u16_impl(self.0, dest) }
     }
+
+    #[inline(always)]
+    fn store_u8(self, dest: &mut [u8]) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_u8_impl(v: __m128i, dest: &mut [u8]) {
+            assert!(dest.len() >= I32VecSse42::LEN);
+            // Truncate i32 -> u8 using shuffle
+            let shuffle_mask =
+                _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+            let u8s = _mm_shuffle_epi8(v, shuffle_mask);
+            // Store lower 4 bytes
+            let val = _mm_cvtsi128_si32(u8s);
+            let bytes = val.to_ne_bytes();
+            // SAFETY:
+            // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4].
+            // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4.
+            // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+            // 4. `src` and `dst` do not overlap as `src` is a local stack array.
+            unsafe {
+                std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_u8_impl(self.0, dest) }
+    }
 }

 impl Add<I32VecSse42> for I32VecSse42 {
@@ -939,6 +985,312 @@ impl U32SimdVec for U32VecSse42 {
     }
 }

+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecSse42(__m128i, Sse42Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecSse42 {
+    type Descriptor = Sse42Descriptor;
+    const LEN: usize = 16;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+        // from the safety invariant on `d`.
+        unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u8) -> Self {
+        // SAFETY: We know sse4.2 is available from the safety invariant on `d`.
+        unsafe { Self(_mm_set1_epi8(v as i8), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u8]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+        // from the safety invariant on `self.1`.
+        unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u8>]) {
+            assert!(dest.len() >= 2 * U8VecSse42::LEN);
+            let lo = _mm_unpacklo_epi8(a, b);
+            let hi = _mm_unpackhi_epi8(a, b);
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(dest_ptr, lo);
+                _mm_storeu_si128(dest_ptr.add(1), hi);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_3_impl(
+            a: __m128i,
+            b: __m128i,
+            c: __m128i,
+            dest: &mut [MaybeUninit<u8>],
+        ) {
+            assert!(dest.len() >= 3 * U8VecSse42::LEN);
+
+            // Masks for out0
+            let mask_a0 = _mm_setr_epi8(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5);
+            let mask_b0 = _mm_setr_epi8(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1);
+            let mask_c0 = _mm_setr_epi8(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1);
+
+            // Masks for out1
+            let mask_a1 = _mm_setr_epi8(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1);
+            let mask_b1 = _mm_setr_epi8(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10);
+            let mask_c1 = _mm_setr_epi8(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1);
+
+            // Masks for out2
+            let mask_a2 = _mm_setr_epi8(
+                -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+            );
+            let mask_b2 = _mm_setr_epi8(
+                -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+            );
+            let mask_c2 = _mm_setr_epi8(
+                10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+            );
+
+            let out0 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)),
+                _mm_shuffle_epi8(c, mask_c0),
+            );
+            let out1 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)),
+                _mm_shuffle_epi8(c, mask_c1),
+            );
+            let out2 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)),
+                _mm_shuffle_epi8(c, mask_c2),
+            );
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(ptr, out0);
+                _mm_storeu_si128(ptr.add(1), out1);
+                _mm_storeu_si128(ptr.add(2), out2);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u8>],
+    ) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_4_impl(
+            a: __m128i,
+            b: __m128i,
+            c: __m128i,
+            d: __m128i,
+            dest: &mut [MaybeUninit<u8>],
+        ) {
+            assert!(dest.len() >= 4 * U8VecSse42::LEN);
+            // First interleave pairs: ab and cd
+            let ab_lo = _mm_unpacklo_epi8(a, b);
+            let ab_hi = _mm_unpackhi_epi8(a, b);
+            let cd_lo = _mm_unpacklo_epi8(c, d);
+            let cd_hi = _mm_unpackhi_epi8(c, d);
+
+            // Then interleave the pairs to get final layout
+            let out0 = _mm_unpacklo_epi16(ab_lo, cd_lo);
+            let out1 = _mm_unpackhi_epi16(ab_lo, cd_lo);
+            let out2 = _mm_unpacklo_epi16(ab_hi, cd_hi);
+            let out3 = _mm_unpackhi_epi16(ab_hi, cd_hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(dest_ptr, out0);
+                _mm_storeu_si128(dest_ptr.add(1), out1);
+                _mm_storeu_si128(dest_ptr.add(2), out2);
+                _mm_storeu_si128(dest_ptr.add(3), out3);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecSse42(__m128i, Sse42Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecSse42 {
+    type Descriptor = Sse42Descriptor;
+    const LEN: usize = 8;
+
+    #[inline(always)]
+    fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+        // from the safety invariant on `d`.
+        unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) }
+    }
+
+    #[inline(always)]
+    fn splat(d: Self::Descriptor, v: u16) -> Self {
+        // SAFETY: We know sse4.2 is available from the safety invariant on `d`.
+        unsafe { Self(_mm_set1_epi16(v as i16), d) }
+    }
+
+    #[inline(always)]
+    fn store(&self, mem: &mut [u16]) {
+        assert!(mem.len() >= Self::LEN);
+        // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+        // from the safety invariant on `self.1`.
+        unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u16>]) {
+            assert!(dest.len() >= 2 * U16VecSse42::LEN);
+            let lo = _mm_unpacklo_epi16(a, b);
+            let hi = _mm_unpackhi_epi16(a, b);
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(dest_ptr, lo);
+                _mm_storeu_si128(dest_ptr.add(1), hi);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_3_impl(
+            a: __m128i,
+            b: __m128i,
+            c: __m128i,
+            dest: &mut [MaybeUninit<u16>],
+        ) {
+            assert!(dest.len() >= 3 * U16VecSse42::LEN);
+
+            // Masks for out0
+            let mask_a0 = _mm_setr_epi8(0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1);
+            let mask_b0 = _mm_setr_epi8(-1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5);
+            let mask_c0 = _mm_setr_epi8(-1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1);
+
+            // Masks for out1
+            let mask_a1 = _mm_setr_epi8(-1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11);
+            let mask_b1 = _mm_setr_epi8(-1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1);
+            let mask_c1 = _mm_setr_epi8(4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1);
+
+            // Masks for out2
+            let mask_a2 = _mm_setr_epi8(
+                -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+            );
+            let mask_b2 = _mm_setr_epi8(
+                10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+            );
+            let mask_c2 = _mm_setr_epi8(
+                -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+            );
+
+            let out0 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)),
+                _mm_shuffle_epi8(c, mask_c0),
+            );
+            let out1 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)),
+                _mm_shuffle_epi8(c, mask_c1),
+            );
+            let out2 = _mm_or_si128(
+                _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)),
+                _mm_shuffle_epi8(c, mask_c2),
+            );
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+            unsafe {
+                let ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(ptr, out0);
+                _mm_storeu_si128(ptr.add(1), out1);
+                _mm_storeu_si128(ptr.add(2), out2);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+    }
+
+    #[inline(always)]
+    fn store_interleaved_4_uninit(
+        a: Self,
+        b: Self,
+        c: Self,
+        d: Self,
+        dest: &mut [MaybeUninit<u16>],
+    ) {
+        #[target_feature(enable = "sse4.2")]
+        #[inline]
+        fn store_interleaved_4_impl(
+            a: __m128i,
+            b: __m128i,
+            c: __m128i,
+            d: __m128i,
+            dest: &mut [MaybeUninit<u16>],
+        ) {
+            assert!(dest.len() >= 4 * U16VecSse42::LEN);
+            // First interleave pairs: ab and cd
+            let ab_lo = _mm_unpacklo_epi16(a, b);
+            let ab_hi = _mm_unpackhi_epi16(a, b);
+            let cd_lo = _mm_unpacklo_epi16(c, d);
+            let cd_hi = _mm_unpackhi_epi16(c, d);
+
+            // Then interleave the pairs to get final layout
+            let out0 = _mm_unpacklo_epi32(ab_lo, cd_lo);
+            let out1 = _mm_unpackhi_epi32(ab_lo, cd_lo);
+            let out2 = _mm_unpacklo_epi32(ab_hi, cd_hi);
+            let out3 = _mm_unpackhi_epi32(ab_hi, cd_hi);
+
+            // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+            unsafe {
+                let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+                _mm_storeu_si128(dest_ptr, out0);
+                _mm_storeu_si128(dest_ptr.add(1), out1);
+                _mm_storeu_si128(dest_ptr.add(2), out2);
+                _mm_storeu_si128(dest_ptr.add(3), out3);
+            }
+        }
+        // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+        unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+    }
+}
+
 impl SimdMask for MaskSse42 {
     type Descriptor = Sse42Descriptor;

diff --git a/third_party/rust/jxl/v0_3/BUILD.gn b/third_party/rust/jxl/v0_3/BUILD.gn
index 4407346d077db..87612e3a76cfa 100644
--- a/third_party/rust/jxl/v0_3/BUILD.gn
+++ b/third_party/rust/jxl/v0_3/BUILD.gn
@@ -28,11 +28,13 @@ cargo_crate("lib") {
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/signature.rs",
+    "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/bit_reader.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/mod.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/tf.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/box_header.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs",
+    "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/parse.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/context_map.rs",
@@ -53,6 +55,7 @@ cargo_crate("lib") {
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/color_correlation_map.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs",
+    "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs",
@@ -101,6 +104,7 @@ cargo_crate("lib") {
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/channels.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs",
+    "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs",
@@ -148,6 +152,7 @@ cargo_crate("lib") {
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/float16.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/linalg.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/log2.rs",
+    "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/ndarray.rs",
     "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/rational_poly.rs",
diff --git a/third_party/rust/jxl/v0_3/wrapper/lib.rs b/third_party/rust/jxl/v0_3/wrapper/lib.rs
index 0e7e83dc6f1d5..66696c1235d80 100644
--- a/third_party/rust/jxl/v0_3/wrapper/lib.rs
+++ b/third_party/rust/jxl/v0_3/wrapper/lib.rs
@@ -4,8 +4,12 @@

 //! Minimal C++ wrapper for jxl-rs decoder.
 //!
-//! This thin wrapper provides C++-compatible types for the jxl-rs decoder.
-//! State tracking is handled by the C++ caller (JXLImageDecoder).
+//! Two decoder types are exposed:
+//!
+//! - `JxlRsFrameScanner`: lightweight frame-header-only scanner that discovers
+//!   frame count, durations, and seek offsets without decoding any pixels.
+//! - `JxlRsDecoder`: full pixel decoder with the original state-machine API,
+//!   plus new seeking and progressive flush support.

 use jxl::api::{
     check_signature, Endianness, JxlBasicInfo, JxlColorEncoding, JxlColorProfile, JxlColorType,
@@ -62,15 +66,60 @@ mod ffi {
         bytes_consumed: usize,
     }

+    /// Information about a single visible frame discovered by the scanner.
+    #[derive(Debug, Clone)]
+    struct JxlRsVisibleFrameInfo {
+        /// Duration in milliseconds.
+        duration_ms: f64,
+        /// Whether this frame can be decoded independently (no dependencies).
+        is_keyframe: bool,
+        /// Whether this is the last frame in the codestream.
+        is_last: bool,
+        /// File byte offset to start feeding input from when seeking.
+        decode_start_file_offset: usize,
+        /// Box parser state at seek point (for container-wrapped files).
+        remaining_in_box: u64,
+        /// Number of visible frames to skip after seeking before decoding
+        /// the target.
+        visible_frames_to_skip: usize,
+    }
+
     extern "Rust" {
+        // ---- Frame scanner (lightweight, no pixel decoding) ----
+        type JxlRsFrameScanner;
+
+        fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner>;
+
+        /// Feed data to the scanner. Returns Success when all frames have been
+        /// scanned (is_last seen), NeedMoreInput if more data is needed, or
+        /// Error on failure.
+        fn feed(
+            self: &mut JxlRsFrameScanner,
+            data: &[u8],
+            all_input: bool,
+        ) -> JxlRsProcessResult;
+
+        /// Get basic info (valid after first successful feed).
+        fn get_basic_info(self: &JxlRsFrameScanner) -> JxlRsBasicInfo;
+
+        /// Get ICC profile data.
+        fn get_icc_profile(self: &JxlRsFrameScanner) -> &[u8];
+
+        /// Number of visible frames discovered so far.
+        fn frame_count(self: &JxlRsFrameScanner) -> usize;
+
+        /// Get info for a specific frame index.
+        fn get_frame_info(self: &JxlRsFrameScanner, index: usize) -> JxlRsVisibleFrameInfo;
+
+        /// Whether basic info has been parsed.
+        fn has_basic_info(self: &JxlRsFrameScanner) -> bool;
+
+        // ---- Full pixel decoder ----
         type JxlRsDecoder;

         fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder>;
         fn jxl_rs_signature_check(data: &[u8]) -> bool;

-        /// Rewind decoder for animation loop replay.
-        fn rewind(self: &mut JxlRsDecoder);
-
         /// Set the output pixel format. Must be called after getting basic info.
         fn set_pixel_format(
             self: &mut JxlRsDecoder,
@@ -85,53 +134,198 @@ mod ffi {
             all_input: bool,
         ) -> JxlRsProcessResult;

-        /// Parse until next frame header is available. Returns Success if no more frames.
+        /// Parse until next frame header is available.
         fn parse_frame_header(
             self: &mut JxlRsDecoder,
             data: &[u8],
             all_input: bool,
         ) -> JxlRsProcessResult;

-        /// Decode frame pixels into the provided buffer.
-        fn decode_frame(
+        /// Decode frame pixels with custom stride (for direct frame buffer
+        /// decoding).
+        fn decode_frame_with_stride(
             self: &mut JxlRsDecoder,
             data: &[u8],
             all_input: bool,
             buffer: &mut [u8],
             width: u32,
             height: u32,
+            row_stride: usize,
         ) -> JxlRsProcessResult;

-        /// Decode frame pixels with custom stride (for direct frame buffer decoding).
-        fn decode_frame_with_stride(
+        /// Flush whatever pixels have been decoded so far into the buffer.
+        /// Use for progressive rendering.
+        fn flush_pixels(
             self: &mut JxlRsDecoder,
-            data: &[u8],
-            all_input: bool,
             buffer: &mut [u8],
             width: u32,
             height: u32,
             row_stride: usize,
         ) -> JxlRsProcessResult;

-        /// Get basic info (valid after parse_basic_info succeeds, or a decode
-        /// call that yields BasicInfo).
+        /// Get basic info (valid after parse_basic_info succeeds).
         fn get_basic_info(self: &JxlRsDecoder) -> JxlRsBasicInfo;

         /// Get frame header (valid after parse_frame_header succeeds).
         fn get_frame_header(self: &JxlRsDecoder) -> JxlRsFrameHeader;

         /// Get ICC profile data (valid after parse_basic_info succeeds).
-        /// Returns an empty slice if no embedded ICC profile exists.
         fn get_icc_profile(self: &JxlRsDecoder) -> &[u8];

         /// Check if more frames are available.
         fn has_more_frames(self: &JxlRsDecoder) -> bool;
+
+        /// Seek the decoder to a specific frame using offsets from the scanner.
+        /// After calling this, provide input starting from
+        /// decode_start_file_offset. The decoder must have basic info parsed.
+        fn seek_to_frame(
+            self: &mut JxlRsDecoder,
+            remaining_in_box: u64,
+        );
+
+        /// Skip N visible frames without decoding pixels.
+        /// Use after seek_to_frame when visible_frames_to_skip > 0.
+        /// Returns Success when one frame has been skipped, NeedMoreInput or
+        /// Error otherwise.
+        fn skip_visible_frame(
+            self: &mut JxlRsDecoder,
+            data: &[u8],
+            all_input: bool,
+        ) -> JxlRsProcessResult;
     }
 }

 use ffi::*;

-/// Thin wrapper around JxlDecoderInner.
+// ---------------------------------------------------------------------------
+// Frame Scanner
+// ---------------------------------------------------------------------------
+
+/// Lightweight scanner that discovers frame info without decoding pixels.
+pub struct JxlRsFrameScanner {
+    decoder: JxlDecoderInner,
+    icc_profile: Vec<u8>,
+    has_basic_info: bool,
+}
+
+fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner> {
+    let mut opts = JxlDecoderOptions::default();
+    opts.scan_frames_only = true;
+    if pixel_limit > 0 {
+        opts.pixel_limit = Some(pixel_limit as usize);
+    }
+
+    Box::new(JxlRsFrameScanner {
+        decoder: JxlDecoderInner::new(opts),
+        icc_profile: Vec::new(),
+        has_basic_info: false,
+    })
+}
+
+impl JxlRsFrameScanner {
+    fn feed(&mut self, data: &[u8], all_input: bool) -> JxlRsProcessResult {
+        let mut input = data;
+        let len_before = input.len();
+
+        loop {
+            match self.decoder.process(&mut input, None) {
+                Ok(ProcessingResult::Complete { .. }) => {
+                    if !self.has_basic_info && self.decoder.basic_info().is_some() {
+                        self.has_basic_info = true;
+                        if let Some(profile) = self.decoder.output_color_profile() {
+                            if let Some(icc) = profile.try_as_icc() {
+                                if !icc.is_empty() {
+                                    self.icc_profile = icc.into_owned();
+                                }
+                            }
+                        }
+                    }
+
+                    if !self.decoder.has_more_frames() {
+                        return JxlRsProcessResult {
+                            status: JxlRsStatus::Success,
+                            bytes_consumed: len_before - input.len(),
+                        };
+                    }
+                }
+                Ok(ProcessingResult::NeedsMoreInput { .. }) => {
+                    return JxlRsProcessResult {
+                        status: if all_input {
+                            JxlRsStatus::Error
+                        } else {
+                            JxlRsStatus::NeedMoreInput
+                        },
+                        bytes_consumed: len_before - input.len(),
+                    };
+                }
+                Err(_) => {
+                    return JxlRsProcessResult {
+                        status: JxlRsStatus::Error,
+                        bytes_consumed: 0,
+                    };
+                }
+            }
+        }
+    }
+
+    fn get_basic_info(&self) -> JxlRsBasicInfo {
+        let mut info = self
+            .decoder
+            .basic_info()
+            .map(JxlRsBasicInfo::from)
+            .unwrap_or_default();
+
+        if let Some(profile) = self.decoder.embedded_color_profile() {
+            info.is_grayscale = matches!(
+                profile,
+                JxlColorProfile::Simple(JxlColorEncoding::GrayscaleColorSpace { .. })
+            );
+        }
+
+        info
+    }
+
+    fn get_icc_profile(&self) -> &[u8] {
+        &self.icc_profile
+    }
+
+    fn frame_count(&self) -> usize {
+        self.decoder.scanned_frames().len()
+    }
+
+    fn get_frame_info(&self, index: usize) -> JxlRsVisibleFrameInfo {
+        let frames = self.decoder.scanned_frames();
+        if index >= frames.len() {
+            return JxlRsVisibleFrameInfo {
+                duration_ms: 0.0,
+                is_keyframe: false,
+                is_last: false,
+                decode_start_file_offset: 0,
+                remaining_in_box: 0,
+                visible_frames_to_skip: 0,
+            };
+        }
+        let f = &frames[index];
+        JxlRsVisibleFrameInfo {
+            duration_ms: f.duration_ms,
+            is_keyframe: f.is_keyframe,
+            is_last: f.is_last,
+            decode_start_file_offset: f.seek_target.decode_start_file_offset,
+            remaining_in_box: f.seek_target.remaining_in_box,
+            visible_frames_to_skip: f.seek_target.visible_frames_to_skip,
+        }
+    }
+
+    fn has_basic_info(&self) -> bool {
+        self.has_basic_info
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Full Pixel Decoder
+// ---------------------------------------------------------------------------
+
+/// Full pixel decoder with seeking and progressive flush support.
 pub struct JxlRsDecoder {
     decoder: JxlDecoderInner,
     pixel_format: Option<JxlPixelFormat>,
@@ -140,7 +334,7 @@ pub struct JxlRsDecoder {

 fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder> {
     let mut opts = JxlDecoderOptions::default();
-    opts.progressive_mode = JxlProgressiveMode::FullFrame;
+    opts.progressive_mode = JxlProgressiveMode::Pass;
     opts.premultiply_output = premultiply_alpha;
     if pixel_limit > 0 {
         opts.pixel_limit = Some(pixel_limit as usize);
@@ -162,10 +356,6 @@ fn jxl_rs_signature_check(data: &[u8]) -> bool {
 }

 impl JxlRsDecoder {
-    fn rewind(&mut self) {
-        let _ = self.decoder.rewind();
-    }
-
     fn set_pixel_format(&mut self, format: JxlRsPixelFormat, num_extra_channels: u32) {
         let pixel_format = match format {
             JxlRsPixelFormat::Rgba8 => JxlPixelFormat {
@@ -216,9 +406,6 @@ impl JxlRsDecoder {

         match self.decoder.process(&mut input, None) {
             Ok(ProcessingResult::Complete { .. }) => {
-                // Extract ICC profile on first successful parse.
-                // Use try_as_icc() which returns None on error instead of
-                // as_icc() which panics on malformed color profiles.
                 if self.icc_profile.is_empty() {
                     if let Some(profile) = self.decoder.output_color_profile() {
                         if let Some(icc) = profile.try_as_icc() {
@@ -282,22 +469,17 @@ impl JxlRsDecoder {
         }
     }

-    fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> {
-        let fh = self.decoder.frame_header()?;
-        Some(JxlRsFrameHeader {
-            duration_ms: fh.duration.unwrap_or(0.0),
-            name_length: fh.name.len() as u32,
-        })
-    }
-
-    fn decode_frame(
+    fn decode_frame_with_stride(
         &mut self,
         data: &[u8],
         all_input: bool,
         buffer: &mut [u8],
         width: u32,
         height: u32,
+        row_stride: usize,
     ) -> JxlRsProcessResult {
+        use std::mem::MaybeUninit;
+
         let mut input = data;
         let len_before = input.len();

@@ -308,8 +490,8 @@ impl JxlRsDecoder {
             .map(|d| d.bytes_per_sample() * 4)
             .unwrap_or(4);
         let bytes_per_row = width as usize * bytes_per_pixel;
-        let expected_size = bytes_per_row * height as usize;

+        let expected_size = row_stride * (height as usize - 1) + bytes_per_row;
         if buffer.len() < expected_size {
             return JxlRsProcessResult {
                 status: JxlRsStatus::Error,
@@ -317,7 +499,16 @@ impl JxlRsDecoder {
             };
         }

-        let output = JxlOutputBuffer::new(buffer, height as usize, bytes_per_row);
+        // SAFETY: The buffer is valid for writes, and we've verified it has
+        // enough space.
+        let output = unsafe {
+            JxlOutputBuffer::new_from_ptr(
+                buffer.as_mut_ptr() as *mut MaybeUninit<u8>,
+                height as usize,
+                bytes_per_row,
+                row_stride,
+            )
+        };

         match self.decoder.process(&mut input, Some(&mut [output])) {
             Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
@@ -344,10 +535,8 @@ impl JxlRsDecoder {
         }
     }

-    fn decode_frame_with_stride(
+    fn flush_pixels(
         &mut self,
-        data: &[u8],
-        all_input: bool,
         buffer: &mut [u8],
         width: u32,
         height: u32,
@@ -355,9 +544,6 @@ impl JxlRsDecoder {
     ) -> JxlRsProcessResult {
         use std::mem::MaybeUninit;

-        let mut input = data;
-        let len_before = input.len();
-
         let bytes_per_pixel = self
             .pixel_format
             .as_ref()
@@ -366,7 +552,6 @@ impl JxlRsDecoder {
             .unwrap_or(4);
         let bytes_per_row = width as usize * bytes_per_pixel;

-        // Validate buffer size with custom stride
         let expected_size = row_stride * (height as usize - 1) + bytes_per_row;
         if buffer.len() < expected_size {
             return JxlRsProcessResult {
@@ -375,8 +560,6 @@ impl JxlRsDecoder {
             };
         }

-        // SAFETY: The buffer is valid for writes, and we've verified it has enough space.
-        // new_from_ptr allows custom stride (bytes_between_rows).
         let output = unsafe {
             JxlOutputBuffer::new_from_ptr(
                 buffer.as_mut_ptr() as *mut MaybeUninit<u8>,
@@ -386,24 +569,11 @@ impl JxlRsDecoder {
             )
         };

-        match self.decoder.process(&mut input, Some(&mut [output])) {
-            Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
+        match self.decoder.flush_pixels(&mut [output]) {
+            Ok(()) => JxlRsProcessResult {
                 status: JxlRsStatus::Success,
-                bytes_consumed: len_before - input.len(),
+                bytes_consumed: 0,
             },
-            Ok(ProcessingResult::NeedsMoreInput { .. }) => {
-                if all_input {
-                    JxlRsProcessResult {
-                        status: JxlRsStatus::Error,
-                        bytes_consumed: 0,
-                    }
-                } else {
-                    JxlRsProcessResult {
-                        status: JxlRsStatus::NeedMoreInput,
-                        bytes_consumed: len_before - input.len(),
-                    }
-                }
-            }
             Err(_) => JxlRsProcessResult {
                 status: JxlRsStatus::Error,
                 bytes_consumed: 0,
@@ -418,7 +588,6 @@ impl JxlRsDecoder {
             .map(JxlRsBasicInfo::from)
             .unwrap_or_default();

-        // Check if the image is grayscale based on the embedded color profile.
         if let Some(profile) = self.decoder.embedded_color_profile() {
             info.is_grayscale = matches!(
                 profile,
@@ -429,6 +598,14 @@ impl JxlRsDecoder {
         info
     }

+    fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> {
+        let fh = self.decoder.frame_header()?;
+        Some(JxlRsFrameHeader {
+            duration_ms: fh.duration.unwrap_or(0.0),
+            name_length: fh.name.len() as u32,
+        })
+    }
+
     fn get_frame_header(&self) -> JxlRsFrameHeader {
         self.extract_frame_header().unwrap_or_default()
     }
@@ -440,6 +617,60 @@ impl JxlRsDecoder {
     fn has_more_frames(&self) -> bool {
         self.decoder.has_more_frames()
     }
+
+    fn seek_to_frame(&mut self, remaining_in_box: u64) {
+        self.decoder.start_new_frame(remaining_in_box);
+    }
+
+    fn skip_visible_frame(
+        &mut self,
+        data: &[u8],
+        all_input: bool,
+    ) -> JxlRsProcessResult {
+        let mut input = data;
+        let len_before = input.len();
+
+        // Phase 1: process to get frame header (WithImageInfo -> WithFrameInfo)
+        match self.decoder.process(&mut input, None) {
+            Ok(ProcessingResult::Complete { .. }) => {}
+            Ok(ProcessingResult::NeedsMoreInput { .. }) => {
+                return JxlRsProcessResult {
+                    status: if all_input {
+                        JxlRsStatus::Error
+                    } else {
+                        JxlRsStatus::NeedMoreInput
+                    },
+                    bytes_consumed: len_before - input.len(),
+                };
+            }
+            Err(_) => {
+                return JxlRsProcessResult {
+                    status: JxlRsStatus::Error,
+                    bytes_consumed: 0,
+                };
+            }
+        }
+
+        // Phase 2: skip frame (WithFrameInfo -> WithImageInfo)
+        match self.decoder.process(&mut input, None) {
+            Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
+                status: JxlRsStatus::Success,
+                bytes_consumed: len_before - input.len(),
+            },
+            Ok(ProcessingResult::NeedsMoreInput { .. }) => JxlRsProcessResult {
+                status: if all_input {
+                    JxlRsStatus::Error
+                } else {
+                    JxlRsStatus::NeedMoreInput
+                },
+                bytes_consumed: len_before - input.len(),
+            },
+            Err(_) => JxlRsProcessResult {
+                status: JxlRsStatus::Error,
+                bytes_consumed: 0,
+            },
+        }
+    }
 }

 impl Default for JxlRsBasicInfo {
@@ -486,8 +717,6 @@ impl From<&JxlBasicInfo> for JxlRsBasicInfo {
             animation_tps_denominator: tps_den,
             uses_original_profile: info.uses_original_profile,
             orientation: info.orientation as u32,
-            // Note: is_grayscale is set by get_basic_info() after checking the
-            // color profile, since JxlBasicInfo doesn't contain color info.
             is_grayscale: false,
         }
     }
No results found