Skip to content

Instantly share code, notes, and snippets.

@hjanuschka
Created March 12, 2026 07:51
Show Gist options
  • Select an option

  • Save hjanuschka/76c4bd303876ac22675c1f3c58b4f6bd to your computer and use it in GitHub Desktop.

Select an option

Save hjanuschka/76c4bd303876ac22675c1f3c58b4f6bd to your computer and use it in GitHub Desktop.
JXL decoder: use frame scanner + seeking for animations, progressive flush
diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
index 5cadd5f76b2b7..995f304d6fffa 100644
--- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
+++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc
@@ -16,20 +16,21 @@
namespace blink {
using jxl_rs::jxl_rs_decoder_create;
+using jxl_rs::jxl_rs_frame_scanner_create;
using jxl_rs::jxl_rs_signature_check;
using jxl_rs::JxlRsBasicInfo;
using jxl_rs::JxlRsDecoder;
using jxl_rs::JxlRsFrameHeader;
+using jxl_rs::JxlRsFrameScanner;
using jxl_rs::JxlRsPixelFormat;
using jxl_rs::JxlRsProcessResult;
using jxl_rs::JxlRsStatus;
+using jxl_rs::JxlRsVisibleFrameInfo;
namespace {
-// The maximum number of decoded samples we allow. This helps prevent resource
-// exhaustion from malicious files. The jxl-rs API counts pixels * channels,
-// so an RGBA image counts 4 samples per pixel. JPEG XL codestream level 5
-// limits specify ~268M pixels, so we allow ~1B samples to support that.
+// The maximum number of decoded samples we allow. JPEG XL codestream level 5
+// limits specify ~268M pixels; we allow ~1B samples for RGBA.
constexpr uint64_t kMaxDecodedPixels = 1024ULL * 1024 * 1024;
} // namespace
@@ -72,6 +73,118 @@ bool JXLImageDecoder::MatchesJXLSignature(
rust::Slice<const uint8_t>(data.data(), data.size()));
}
+// ---------------------------------------------------------------------------
+// Frame scanning (no pixel decoding)
+// ---------------------------------------------------------------------------
+
+void JXLImageDecoder::ScanFrames() {
+ if (scanner_done_) {
+ return;
+ }
+
+ if (!scanner_.has_value()) {
+ scanner_ = jxl_rs_frame_scanner_create(kMaxDecodedPixels);
+ }
+
+ FastSharedBufferReader reader(data_.get());
+ size_t data_size = reader.size();
+ size_t remaining = data_size - scanner_input_offset_;
+
+ if (remaining == 0 && !IsAllDataReceived()) {
+ return;
+ }
+
+ Vector<uint8_t> chunk_buffer;
+ if (remaining > 0) {
+ chunk_buffer.resize(remaining);
+ }
+ auto data_span = remaining > 0
+ ? reader.GetConsecutiveData(scanner_input_offset_,
+ remaining,
+ base::span(chunk_buffer))
+ : base::span<const uint8_t>();
+
+ bool all_input =
+ IsAllDataReceived() && (scanner_input_offset_ + remaining >= data_size);
+ rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+ JxlRsProcessResult result = (*scanner_)->feed(input_slice, all_input);
+
+ if (result.status == JxlRsStatus::Error) {
+ SetFailed();
+ return;
+ }
+
+ scanner_input_offset_ += result.bytes_consumed;
+
+ if (result.status == JxlRsStatus::Success) {
+ scanner_done_ = true;
+ }
+
+ // Extract basic info from scanner if not yet available.
+ if (!have_basic_info_ && (*scanner_)->has_basic_info()) {
+ basic_info_ = (*scanner_)->get_basic_info();
+
+ if (!SetSize(basic_info_.width, basic_info_.height)) {
+ return;
+ }
+
+ if (basic_info_.bits_per_sample > 8) {
+ is_high_bit_depth_ = true;
+ }
+
+ decode_to_half_float_ =
+ ImageIsHighBitDepth() &&
+ high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat;
+
+ if (!IgnoresColorSpace()) {
+ auto icc_data = (*scanner_)->get_icc_profile();
+ if (!icc_data.empty()) {
+ auto profile = ColorProfile::Create(icc_data);
+ if (profile) {
+ SetEmbeddedColorProfile(std::move(profile));
+ }
+ }
+ }
+
+ if (basic_info_.bits_per_sample == 8 && !basic_info_.is_grayscale &&
+ !basic_info_.have_animation && !basic_info_.has_alpha) {
+ static constexpr char kType[] = "Jxl";
+ update_bpp_histogram_callback_ =
+ CrossThreadBindOnce(&UpdateBppHistogram<kType>);
+ }
+
+ have_basic_info_ = true;
+ }
+
+ // Update frame_seek_info_ from the scanner's discovered frames.
+ size_t scanned_count = (*scanner_)->frame_count();
+ base::TimeDelta cumulative_time;
+
+ if (!frame_seek_info_.empty()) {
+ const auto& last = frame_seek_info_.back();
+ cumulative_time = last.timestamp + last.duration;
+ }
+
+ for (size_t i = frame_seek_info_.size(); i < scanned_count; i++) {
+ JxlRsVisibleFrameInfo info = (*scanner_)->get_frame_info(i);
+ FrameSeekInfo seek;
+ seek.duration = base::Milliseconds(info.duration_ms);
+ seek.timestamp = cumulative_time;
+ seek.is_keyframe = info.is_keyframe;
+ seek.decode_start_file_offset = info.decode_start_file_offset;
+ seek.remaining_in_box = info.remaining_in_box;
+ seek.visible_frames_to_skip = info.visible_frames_to_skip;
+
+ cumulative_time += seek.duration;
+ frame_seek_info_.push_back(seek);
+ }
+}
+
+// ---------------------------------------------------------------------------
+// ImageDecoder overrides
+// ---------------------------------------------------------------------------
+
void JXLImageDecoder::DecodeSize() {
Decode(0, /*only_size=*/true);
}
@@ -86,21 +199,12 @@ wtf_size_t JXLImageDecoder::DecodeFrameCount() {
return 1;
}
- // If we have received all the data, we must produce the correct
- // frame count. Thus, we always decode all the data we have.
- // TODO(veluca): for long animations, this will currently decode
- // the entire file, using a large amount of memory and CPU time.
- // Avoid doing that once jxl-rs supports seeking and/or frame
- // skipping.
- while (decoder_state_ != DecoderState::kDone) {
- size_t offset_pre = input_offset_;
- size_t decoded_frames_pre = num_decoded_frames_;
- Decode(num_decoded_frames_, /*only_size=*/false);
- // Exit the loop if the image is corrupted or we didn't make any progress.
- if (Failed() || (offset_pre == input_offset_ &&
- num_decoded_frames_ == decoded_frames_pre)) {
- break;
- }
+ // Use the lightweight scanner to discover frames without decoding pixels.
+ ScanFrames();
+
+ // Resize the frame buffer cache to match discovered frames.
+ if (frame_seek_info_.size() > frame_buffer_cache_.size()) {
+ frame_buffer_cache_.resize(frame_seek_info_.size());
}
return frame_buffer_cache_.size();
@@ -120,10 +224,8 @@ void JXLImageDecoder::InitializeNewFrame(wtf_size_t index) {
buffer.SetOriginalFrameRect(gfx::Rect(Size()));
buffer.SetRequiredPreviousFrameIndex(kNotFound);
- // Set duration/timestamp if the frame header has been parsed.
- // This is available before the frame is fully decoded.
- if (index < frame_info_.size()) {
- const FrameInfo& info = frame_info_[index];
+ if (index < frame_seek_info_.size()) {
+ const FrameSeekInfo& info = frame_seek_info_[index];
buffer.SetDuration(info.duration);
buffer.SetTimestamp(info.timestamp);
}
@@ -150,40 +252,36 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
}
}
- FastSharedBufferReader reader(data_.get());
- size_t data_size = reader.size();
-
- // Handle animation loop rewind.
- if (decoder_.has_value() && !only_size && basic_info_.have_animation) {
- bool frame_already_cached =
- index < frame_buffer_cache_.size() &&
- frame_buffer_cache_[index].GetStatus() == ImageFrame::kFrameComplete;
-
- if (!frame_already_cached && index < num_decoded_frames_) {
- (*decoder_)->rewind();
- decoder_state_ = DecoderState::kInitial;
- num_decoded_frames_ = 0;
- input_offset_ = 0;
- // Keep basic_info_ and have_basic_info_ since the stream hasn't changed.
+ // For animation frames that need seeking (not the next sequential frame),
+ // use the seek path.
+ if (!only_size && have_basic_info_ && basic_info_.have_animation &&
+ index != num_decoded_frames_) {
+ // Ensure we have seek info for this frame.
+ if (index >= frame_seek_info_.size()) {
+ ScanFrames();
+ if (Failed() || index >= frame_seek_info_.size()) {
+ return;
+ }
}
+ SeekAndDecodeFrame(index);
+ return;
}
- // Create decoder if needed. Pass premultiply_alpha_ so jxl-rs handles
- // premultiplication natively (faster and handles alpha_associated correctly).
+ FastSharedBufferReader reader(data_.get());
+ size_t data_size = reader.size();
+
+ // Create decoder if needed.
if (!decoder_.has_value()) {
decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_);
}
// Process until we get what we need.
for (;;) {
- size_t remaining_size = data_size - input_offset_;
- // When all data is received, process it all at once for efficiency.
- // Only use smaller chunks for true progressive loading (streaming data).
+ size_t remaining_size = data_size - decoder_input_offset_;
size_t chunk_size;
if (IsAllDataReceived()) {
- chunk_size = remaining_size; // Process all available data
+ chunk_size = remaining_size;
} else {
- // Progressive streaming: use smaller chunks to allow partial rendering
constexpr size_t kMaxChunkSize = 64 * 1024;
chunk_size = std::min(remaining_size, kMaxChunkSize);
}
@@ -192,12 +290,13 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
Vector<uint8_t> chunk_buffer;
if (chunk_size > 0) {
chunk_buffer.resize(chunk_size);
- data_span = reader.GetConsecutiveData(input_offset_, chunk_size,
+ data_span = reader.GetConsecutiveData(decoder_input_offset_, chunk_size,
base::span(chunk_buffer));
}
bool all_input =
- IsAllDataReceived() && (input_offset_ + chunk_size >= data_size);
+ IsAllDataReceived() &&
+ (decoder_input_offset_ + chunk_size >= data_size);
rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
switch (decoder_state_) {
@@ -210,16 +309,16 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
return;
}
if (result.status == JxlRsStatus::NeedMoreInput) {
- input_offset_ += result.bytes_consumed;
+ decoder_input_offset_ += result.bytes_consumed;
if (all_input) {
SetFailed();
}
return;
}
- // Success - got basic info
+ // Success - got basic info.
basic_info_ = (*decoder_)->get_basic_info();
- input_offset_ += result.bytes_consumed;
+ decoder_input_offset_ += result.bytes_consumed;
if (!SetSize(basic_info_.width, basic_info_.height)) {
return;
@@ -233,12 +332,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
ImageIsHighBitDepth() &&
high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat;
- // Set pixel format on decoder.
- // Use native 8-bit ordering for kN32, and RGBA F16 for half float.
#if SK_PMCOLOR_BYTE_ORDER(B, G, R, A)
- constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8;
+ constexpr JxlRsPixelFormat kNativePixelFormat =
+ JxlRsPixelFormat::Bgra8;
#elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A)
- constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8;
+ constexpr JxlRsPixelFormat kNativePixelFormat =
+ JxlRsPixelFormat::Rgba8;
#else
#error "Unsupported Skia pixel order"
#endif
@@ -248,7 +347,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
(*decoder_)->set_pixel_format(pixel_format,
basic_info_.num_extra_channels);
- // Extract ICC color profile.
if (!IgnoresColorSpace()) {
auto icc_data = (*decoder_)->get_icc_profile();
if (!icc_data.empty()) {
@@ -259,8 +357,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
}
}
- // Record bpp information only for 8-bit, color, still images without
- // alpha.
if (!have_basic_info_ && basic_info_.bits_per_sample == 8 &&
!basic_info_.is_grayscale && !basic_info_.have_animation &&
!basic_info_.has_alpha) {
@@ -287,31 +383,27 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
return;
}
if (result.status == JxlRsStatus::NeedMoreInput) {
- input_offset_ += result.bytes_consumed;
+ decoder_input_offset_ += result.bytes_consumed;
return;
}
- input_offset_ += result.bytes_consumed;
+ decoder_input_offset_ += result.bytes_consumed;
- // Successfully parsed a frame header - increment discovered count.
JxlRsFrameHeader header = (*decoder_)->get_frame_header();
if (basic_info_.have_animation) {
wtf_size_t frame_idx = num_decoded_frames_;
- FrameInfo info;
- info.duration = base::Milliseconds(header.duration_ms);
- info.timestamp = base::TimeDelta();
-
- if (frame_idx > 0 && frame_idx - 1 < frame_info_.size()) {
- const FrameInfo& prev = frame_info_[frame_idx - 1];
- info.timestamp = prev.timestamp + prev.duration;
- }
- if (frame_idx < frame_info_.size()) {
- frame_info_[frame_idx] = info;
- } else {
- CHECK_EQ(frame_idx, frame_info_.size());
- frame_info_.push_back(info);
+ // Update frame_seek_info_ if we don't have it yet from the scanner.
+ if (frame_idx >= frame_seek_info_.size()) {
+ FrameSeekInfo info;
+ info.duration = base::Milliseconds(header.duration_ms);
+ info.timestamp = base::TimeDelta();
+ if (frame_idx > 0 && frame_idx - 1 < frame_seek_info_.size()) {
+ const FrameSeekInfo& prev = frame_seek_info_[frame_idx - 1];
+ info.timestamp = prev.timestamp + prev.duration;
+ }
+ frame_seek_info_.push_back(info);
}
}
@@ -322,23 +414,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
case DecoderState::kHaveFrameHeader: {
wtf_size_t frame_index = num_decoded_frames_;
- // Ensure frame buffer cache is large enough.
if (frame_buffer_cache_.size() <= frame_index) {
frame_buffer_cache_.resize(frame_index + 1);
}
ImageFrame& frame = frame_buffer_cache_[frame_index];
if (frame.GetStatus() == ImageFrame::kFrameEmpty) {
- // We call InitializeNewFrame manually here because JXLImageDecoder,
- // unlike other image decoder classes, handles the frame buffer cache
- // in the decode loop. This happens because decoding the frame count
- // also fully renders the frames - when we switch to lightweight
- // decoding for frame count + decoding individual frames via seeking,
- // we will likely be able to remove this call.
- //
- // IMPORTANT: InitializeNewFrame() must run before InitFrameBuffer(),
- // so the base class allocates the correct backing store (e.g.
- // RGBA_F16 for high bit depth + half float).
InitializeNewFrame(frame_index);
if (!InitFrameBuffer(frame_index)) {
SetFailed();
@@ -351,7 +432,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
const uint32_t width = basic_info_.width;
const uint32_t height = basic_info_.height;
- // Get direct access to the frame buffer's backing store.
const SkBitmap& bitmap = frame.Bitmap();
uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels());
size_t row_stride = bitmap.rowBytes();
@@ -361,12 +441,9 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
return;
}
- // Calculate buffer size for the decoder.
size_t buffer_size = row_stride * height;
rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size);
- // Decode directly into the frame buffer.
- // Premultiplication is handled by jxl-rs based on premultiply_alpha_.
JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride(
input_slice, all_input, output_slice, width, height, row_stride);
@@ -375,56 +452,244 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) {
return;
}
if (result.status == JxlRsStatus::NeedMoreInput) {
- // Update offset with consumed bytes for progressive decoding.
- input_offset_ += result.bytes_consumed;
-
- // Signal that pixels may have changed for progressive rendering.
- // TODO(veluca): set the frame status to kFramePartial if and only
- // if jxl-rs signals that some data has been painted (jxl-rs
- // does not yet expose this functionality, nor does it do
- // progressive rendering properly).
- frame.SetStatus(ImageFrame::kFramePartial);
- frame.SetPixelsChanged(true);
+ decoder_input_offset_ += result.bytes_consumed;
+
+ // Progressive flush: render whatever pixels are available.
+ JxlRsProcessResult flush_result = (*decoder_)->flush_pixels(
+ output_slice, width, height, row_stride);
+ if (flush_result.status == JxlRsStatus::Success) {
+ frame.SetPixelsChanged(true);
+ frame.SetStatus(ImageFrame::kFramePartial);
+ }
+
if (all_input) {
SetFailed();
}
return;
}
- input_offset_ += result.bytes_consumed;
+ decoder_input_offset_ += result.bytes_consumed;
frame.SetPixelsChanged(true);
frame.SetStatus(ImageFrame::kFrameComplete);
- if (frame_index < frame_info_.size()) {
- const FrameInfo& info = frame_info_[frame_index];
+ if (frame_index < frame_seek_info_.size()) {
+ const FrameSeekInfo& info = frame_seek_info_[frame_index];
frame.SetDuration(info.duration);
frame.SetTimestamp(info.timestamp);
}
num_decoded_frames_++;
- // Record bpp histogram for still images when fully decoded.
if (IsAllDataReceived() && update_bpp_histogram_callback_) {
std::move(update_bpp_histogram_callback_).Run(Size(), data_->size());
}
if ((*decoder_)->has_more_frames()) {
- // Go back to waiting for next frame header.
decoder_state_ = DecoderState::kHaveBasicInfo;
} else {
decoder_state_ = DecoderState::kDone;
}
- // Check if we've decoded the requested frame.
if (frame_index >= index) {
return;
}
break;
}
case DecoderState::kDone:
- break;
+ return;
+ }
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Seek-based animation frame decode
+// ---------------------------------------------------------------------------
+
+void JXLImageDecoder::SeekAndDecodeFrame(wtf_size_t index) {
+ CHECK_LT(index, frame_seek_info_.size());
+ const FrameSeekInfo& seek = frame_seek_info_[index];
+
+ // Create a fresh decoder for seeking. The decoder needs to have basic info
+ // parsed before we can seek.
+ if (!decoder_.has_value()) {
+ decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_);
+ decoder_state_ = DecoderState::kInitial;
+ decoder_input_offset_ = 0;
+ }
+
+ FastSharedBufferReader reader(data_.get());
+ size_t data_size = reader.size();
+ bool all_input = IsAllDataReceived();
+
+ // Ensure decoder has basic info.
+ if (decoder_state_ == DecoderState::kInitial) {
+ size_t remaining = data_size - decoder_input_offset_;
+ Vector<uint8_t> chunk_buffer;
+ if (remaining > 0) {
+ chunk_buffer.resize(remaining);
+ }
+ auto data_span =
+ remaining > 0
+ ? reader.GetConsecutiveData(decoder_input_offset_, remaining,
+ base::span(chunk_buffer))
+ : base::span<const uint8_t>();
+ rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+ JxlRsProcessResult result =
+ (*decoder_)->parse_basic_info(input_slice, all_input);
+ if (result.status != JxlRsStatus::Success) {
+ if (result.status == JxlRsStatus::Error || all_input) {
+ SetFailed();
+ }
+ return;
+ }
+ decoder_input_offset_ += result.bytes_consumed;
+
+ // Configure pixel format.
+#if SK_PMCOLOR_BYTE_ORDER(B, G, R, A)
+ constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8;
+#elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A)
+ constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8;
+#else
+#error "Unsupported Skia pixel order"
+#endif
+ JxlRsPixelFormat pixel_format =
+ decode_to_half_float_ ? JxlRsPixelFormat::RgbaF16
+ : kNativePixelFormat;
+ (*decoder_)->set_pixel_format(pixel_format, basic_info_.num_extra_channels);
+ decoder_state_ = DecoderState::kHaveBasicInfo;
+ }
+
+ // Seek to the frame's decode start position.
+ (*decoder_)->seek_to_frame(seek.remaining_in_box);
+ size_t input_offset = seek.decode_start_file_offset;
+
+ // Skip preceding visible frames if needed.
+ for (size_t i = 0; i < seek.visible_frames_to_skip; i++) {
+ size_t remaining = data_size - input_offset;
+ Vector<uint8_t> chunk_buffer;
+ if (remaining > 0) {
+ chunk_buffer.resize(remaining);
+ }
+ auto data_span =
+ remaining > 0
+ ? reader.GetConsecutiveData(input_offset, remaining,
+ base::span(chunk_buffer))
+ : base::span<const uint8_t>();
+ rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+ JxlRsProcessResult result =
+ (*decoder_)->skip_visible_frame(input_slice, all_input);
+ if (result.status != JxlRsStatus::Success) {
+ if (result.status == JxlRsStatus::Error || all_input) {
+ SetFailed();
+ }
+ return;
+ }
+ input_offset += result.bytes_consumed;
+ }
+
+ // Ensure frame buffer cache is large enough.
+ if (frame_buffer_cache_.size() <= index) {
+ frame_buffer_cache_.resize(index + 1);
+ }
+
+ ImageFrame& frame = frame_buffer_cache_[index];
+ if (frame.GetStatus() == ImageFrame::kFrameEmpty) {
+ InitializeNewFrame(index);
+ if (!InitFrameBuffer(index)) {
+ SetFailed();
+ return;
+ }
+ }
+
+ frame.SetHasAlpha(basic_info_.has_alpha);
+
+ const uint32_t width = basic_info_.width;
+ const uint32_t height = basic_info_.height;
+
+ const SkBitmap& bitmap = frame.Bitmap();
+ uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels());
+ size_t row_stride = bitmap.rowBytes();
+
+ if (!frame_pixels) {
+ SetFailed();
+ return;
+ }
+
+ size_t buffer_size = row_stride * height;
+ rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size);
+
+ // Parse frame header.
+ {
+ size_t remaining = data_size - input_offset;
+ Vector<uint8_t> chunk_buffer;
+ if (remaining > 0) {
+ chunk_buffer.resize(remaining);
+ }
+ auto data_span =
+ remaining > 0
+ ? reader.GetConsecutiveData(input_offset, remaining,
+ base::span(chunk_buffer))
+ : base::span<const uint8_t>();
+ rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+ JxlRsProcessResult result =
+ (*decoder_)->parse_frame_header(input_slice, all_input);
+ if (result.status != JxlRsStatus::Success) {
+ if (result.status == JxlRsStatus::Error || all_input) {
+ SetFailed();
+ }
+ return;
+ }
+ input_offset += result.bytes_consumed;
+ }
+
+ // Decode pixels.
+ {
+ size_t remaining = data_size - input_offset;
+ Vector<uint8_t> chunk_buffer;
+ if (remaining > 0) {
+ chunk_buffer.resize(remaining);
}
+ auto data_span =
+ remaining > 0
+ ? reader.GetConsecutiveData(input_offset, remaining,
+ base::span(chunk_buffer))
+ : base::span<const uint8_t>();
+ rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size());
+
+ JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride(
+ input_slice, all_input, output_slice, width, height, row_stride);
+
+ if (result.status == JxlRsStatus::Error) {
+ SetFailed();
+ return;
+ }
+ if (result.status == JxlRsStatus::NeedMoreInput) {
+ JxlRsProcessResult flush_result =
+ (*decoder_)->flush_pixels(output_slice, width, height, row_stride);
+ if (flush_result.status == JxlRsStatus::Success) {
+ frame.SetPixelsChanged(true);
+ frame.SetStatus(ImageFrame::kFramePartial);
+ }
+ if (all_input) {
+ SetFailed();
+ }
+ return;
+ }
+ input_offset += result.bytes_consumed;
}
+
+ frame.SetPixelsChanged(true);
+ frame.SetStatus(ImageFrame::kFrameComplete);
+ frame.SetDuration(seek.duration);
+ frame.SetTimestamp(seek.timestamp);
+
+ // After seeking, the decoder is in an indeterminate state for sequential
+ // decode. Mark it so the next sequential decode will either continue
+ // from the right place or seek again.
+ decoder_state_ = DecoderState::kHaveBasicInfo;
}
bool JXLImageDecoder::CanReusePreviousFrameBuffer(
@@ -441,20 +706,15 @@ bool JXLImageDecoder::FrameIsReceivedAtIndex(wtf_size_t index) const {
std::optional<base::TimeDelta> JXLImageDecoder::FrameTimestampAtIndex(
wtf_size_t index) const {
- // Use frame_info_ which is populated at header parsing time,
- // not frame_buffer_cache_ which is only set after decoding.
- if (index < frame_info_.size()) {
- return frame_info_[index].timestamp;
+ if (index < frame_seek_info_.size()) {
+ return frame_seek_info_[index].timestamp;
}
return std::nullopt;
}
base::TimeDelta JXLImageDecoder::FrameDurationAtIndex(wtf_size_t index) const {
- // Durations are available in frame_info_ for all discovered frames.
- // Frame discovery happens in DecodeFrameCount() which is called by
- // FrameCount() whenever new data arrives.
- if (index < frame_info_.size()) {
- return frame_info_[index].duration;
+ if (index < frame_seek_info_.size()) {
+ return frame_seek_info_[index].duration;
}
return base::TimeDelta();
}
@@ -472,13 +732,8 @@ int JXLImageDecoder::RepetitionCount() const {
wtf_size_t JXLImageDecoder::ClearCacheExceptFrame(
wtf_size_t clear_except_frame) {
- if (basic_info_.have_animation) {
- // TODO(veluca): jxl-rs does not (yet) support seeking to specific frames.
- // For now, deal with this by disallowing clearing the cache.
-
- return 0;
- }
-
+ // With frame seeking support, we can clear cached frames and re-decode
+ // them on demand by seeking to the appropriate offset.
return ImageDecoder::ClearCacheExceptFrame(clear_except_frame);
}
diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
index 1a3f502fdba83..b09bfda992e2f 100644
--- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
+++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h
@@ -45,10 +45,11 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
static bool MatchesJXLSignature(const FastSharedBufferReader& fast_reader);
private:
- // C++-managed Rust Box for JxlRsDecoder.
+ // C++-managed Rust Box types.
using JxlRsDecoderPtr = rust::Box<jxl_rs::JxlRsDecoder>;
+ using JxlRsScannerPtr = rust::Box<jxl_rs::JxlRsFrameScanner>;
- // Decoder state machine.
+ // Decoder state machine for the pixel decoder.
enum class DecoderState {
kInitial, // Waiting for basic info
kHaveBasicInfo, // Have basic info, waiting for frame header
@@ -56,10 +57,14 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
kDone // Decoding is done
};
- // Frame information tracked during decoding.
- struct FrameInfo {
+ // Seek info for a visible frame, cached from the scanner.
+ struct FrameSeekInfo {
base::TimeDelta duration;
base::TimeDelta timestamp;
+ bool is_keyframe = false;
+ size_t decode_start_file_offset = 0;
+ uint64_t remaining_in_box = 0;
+ size_t visible_frames_to_skip = 0;
};
// ImageDecoder:
@@ -72,29 +77,39 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder {
// Internal decode function that optionally stops after metadata.
void Decode(wtf_size_t index, bool only_size);
- // Eagerly decode all animation frames upfront.
- void DecodeAllFrames();
+ // Run the frame scanner to discover frame metadata without decoding pixels.
+ void ScanFrames();
+
+ // Seek the pixel decoder to the target frame and decode it.
+ void SeekAndDecodeFrame(wtf_size_t index);
// Converts JXL pixel format to Skia color type.
SkColorType GetSkColorType() const;
- // Decoder state.
+ // Lightweight frame scanner -- discovers frame count, durations, and seek
+ // offsets without decoding any pixels.
+ std::optional<JxlRsScannerPtr> scanner_;
+ size_t scanner_input_offset_ = 0;
+ bool scanner_done_ = false;
+
+ // Full pixel decoder with state machine.
std::optional<JxlRsDecoderPtr> decoder_;
DecoderState decoder_state_ = DecoderState::kInitial;
+ size_t decoder_input_offset_ = 0;
+ wtf_size_t num_decoded_frames_ = 0;
+
+ // Cached metadata.
jxl_rs::JxlRsBasicInfo basic_info_{};
bool have_basic_info_ = false;
- wtf_size_t num_decoded_frames_ = 0; // Frames whose pixels we've decoded.
- size_t input_offset_ = 0; // Current position in input stream.
- // Animation frame tracking.
- Vector<FrameInfo> frame_info_;
+ // Per-frame seek info populated by the scanner.
+ Vector<FrameSeekInfo> frame_seek_info_;
// Color management.
bool is_high_bit_depth_ = false;
bool decode_to_half_float_ = false;
- // Used to call UpdateBppHistogram<"Jxl">() at most once to record the
- // bits-per-pixel value of the image when the image is successfully decoded.
+ // Used to call UpdateBppHistogram<"Jxl">() at most once.
CrossThreadOnceFunction<void(gfx::Size, size_t)>
update_bpp_histogram_callback_;
};
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
index 2c3ce9855618b..ba55581559ced 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs
@@ -937,7 +937,29 @@ impl JxlColorEncoding {
}
} else {
match self {
- JxlColorEncoding::XYB { .. } => todo!("implement A2B0 and B2A0 tags"),
+ JxlColorEncoding::XYB { .. } => {
+ // Create A2B0 tag for XYB color space
+ let a2b0_start = tags_data.len() as u32;
+ create_icc_lut_atob_tag_for_xyb(&mut tags_data)?;
+ pad_to_4_byte_boundary(&mut tags_data);
+ let a2b0_size = (tags_data.len() as u32) - a2b0_start;
+ collected_tags.push(TagInfo {
+ signature: *b"A2B0",
+ offset_in_tags_blob: a2b0_start,
+ size_unpadded: a2b0_size,
+ });
+
+ // Create B2A0 tag (no-op, required by Apple software)
+ let b2a0_start = tags_data.len() as u32;
+ create_icc_noop_btoa_tag(&mut tags_data)?;
+ pad_to_4_byte_boundary(&mut tags_data);
+ let b2a0_size = (tags_data.len() as u32) - b2a0_start;
+ collected_tags.push(TagInfo {
+ signature: *b"B2A0",
+ offset_in_tags_blob: b2a0_start,
+ size_unpadded: b2a0_size,
+ });
+ }
JxlColorEncoding::RgbColorSpace {
transfer_function, ..
}
@@ -2047,6 +2069,108 @@ fn tone_map_pixel(
])
}
+/// Create mAB A2B0 tag for XYB color space.
+fn create_icc_lut_atob_tag_for_xyb(tags: &mut Vec<u8>) -> Result<(), Error> {
+ use super::xyb_constants::*;
+ use byteorder::{BigEndian, WriteBytesExt};
+
+ // Tag signature: 'mAB '
+ tags.extend_from_slice(b"mAB ");
+ // 4 reserved bytes set to 0
+ tags.write_u32::<BigEndian>(0)
+ .map_err(|_| Error::InvalidIccStream)?;
+ // Number of input channels
+ tags.push(3);
+ // Number of output channels
+ tags.push(3);
+ // 2 reserved bytes for padding
+ tags.write_u16::<BigEndian>(0)
+ .map_err(|_| Error::InvalidIccStream)?;
+
+ // Offsets (calculated based on structure size)
+ // offset to first B curve: 32
+ tags.write_u32::<BigEndian>(32)
+ .map_err(|_| Error::InvalidIccStream)?;
+ // offset to matrix: 244
+ tags.write_u32::<BigEndian>(244)
+ .map_err(|_| Error::InvalidIccStream)?;
+ // offset to first M curve: 148
+ tags.write_u32::<BigEndian>(148)
+ .map_err(|_| Error::InvalidIccStream)?;
+ // offset to CLUT: 80
+ tags.write_u32::<BigEndian>(80)
+ .map_err(|_| Error::InvalidIccStream)?;
+ // offset to first A curve (reuse linear B curves): 32
+ tags.write_u32::<BigEndian>(32)
+ .map_err(|_| Error::InvalidIccStream)?;
+
+ // offset = 32: B curves (3 identity/linear curves)
+ // Each curve is 12 bytes: 'para' (4) + reserved (4) + function type (2) + reserved (2)
+ // For type 0: Y = X^gamma, with gamma = 1.0 (identity)
+ for _ in 0..3 {
+ create_icc_curv_para_tag(tags, &[1.0], 0)?;
+ }
+
+ // offset = 80: CLUT
+ // 16 bytes for grid points (only first 3 used, rest 0)
+ for i in 0..16 {
+ tags.push(if i < 3 { 2 } else { 0 });
+ }
+ // precision = 2 (16-bit)
+ tags.push(2);
+ // 3 bytes padding
+ tags.push(0);
+ tags.write_u16::<BigEndian>(0)
+ .map_err(|_| Error::InvalidIccStream)?;
+
+ // 2x2x2x3 entries of 2 bytes each = 48 bytes
+ let cube = unscaled_a2b_cube_full();
+ for row_x in &cube {
+ for row_y in row_x {
+ for out_f in row_y {
+ for &val_f in out_f {
+ let val = (65535.0 * val_f).round().clamp(0.0, 65535.0) as u16;
+ tags.write_u16::<BigEndian>(val)
+ .map_err(|_| Error::InvalidIccStream)?;
+ }
+ }
+ }
+ }
+
+ // offset = 148: M curves (3 parametric curves)
+ // Type 3 parametric curve: Y = (aX + b)^gamma + c for X >= d, else Y = cX
+ // Each curve: 12 + 5*4 = 32 bytes
+ let scale = xyb_scale();
+ for i in 0..3 {
+ let b = -XYB_OFFSET[i] - NEG_OPSIN_ABSORBANCE_BIAS_RGB[i].cbrt();
+ let params = [
+ 3.0, // gamma
+ 1.0 / scale[i], // a
+ b, // b
+ 0.0, // c (unused)
+ (-b * scale[i]).max(0.0), // d (make skcms happy)
+ ];
+ create_icc_curv_para_tag(tags, &params, 3)?;
+ }
+
+ // offset = 244: Matrix (12 values as s15Fixed16)
+ // 9 matrix values + 3 intercepts = 12 * 4 = 48 bytes
+ for v in XYB_ICC_MATRIX {
+ append_s15_fixed_16(tags, v as f32)?;
+ }
+
+ // Intercepts
+ for i in 0..3 {
+ let mut intercept: f64 = 0.0;
+ for j in 0..3 {
+ intercept += XYB_ICC_MATRIX[i * 3 + j] * (NEG_OPSIN_ABSORBANCE_BIAS_RGB[j] as f64);
+ }
+ append_s15_fixed_16(tags, intercept as f32)?;
+ }
+
+ Ok(())
+}
+
/// Create mft1 (8-bit LUT) A2B0 tag for HDR tone mapping.
fn create_icc_lut_atob_tag_for_hdr(
transfer_function: &JxlTransferFunction,
@@ -2642,4 +2766,17 @@ mod test {
assert!(!rgb.same_color_encoding(&gray));
assert!(!gray.same_color_encoding(&rgb));
}
+
+ /// Verify XYB color profiles generate valid ICC profiles with A2B0/B2A0 tags.
+ #[test]
+ fn test_xyb_icc_profile_generation() {
+ let xyb = JxlColorProfile::Simple(JxlColorEncoding::XYB {
+ rendering_intent: RenderingIntent::Perceptual,
+ });
+
+ let icc = xyb.try_as_icc().expect("XYB should generate ICC profile");
+ assert!(!icc.is_empty());
+ assert!(icc.windows(4).any(|w| w == b"mAB "));
+ assert!(icc.windows(4).any(|w| w == b"mBA "));
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
index 5debb0c1df1d6..966033c5a5d50 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs
@@ -40,6 +40,13 @@ impl JxlColorType {
Self::Rgba | Self::Bgra => false,
}
}
+ pub fn add_alpha(&self) -> Self {
+ match self {
+ Self::Grayscale | Self::GrayscaleAlpha => Self::GrayscaleAlpha,
+ Self::Rgb | Self::Rgba => Self::Rgba,
+ Self::Bgr | Self::Bgra => Self::Bgra,
+ }
+ }
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
index d74b0ffa18e06..848adce67153f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs
@@ -9,7 +9,7 @@ use super::{
};
#[cfg(test)]
use crate::frame::Frame;
-use crate::{api::JxlFrameHeader, error::Result};
+use crate::{api::JxlFrameHeader, container::frame_index::FrameIndexBox, error::Result};
use states::*;
use std::marker::PhantomData;
@@ -35,6 +35,42 @@ pub struct JxlDecoder<State: JxlState> {
#[cfg(test)]
pub type FrameCallback = dyn FnMut(&Frame, usize) -> Result<()>;
+/// Information about a single visible frame discovered while decoding.
+#[derive(Debug, Clone, PartialEq)]
+pub struct VisibleFrameInfo {
+ /// Zero-based index among visible frames.
+ pub index: usize,
+ /// Duration in milliseconds (0 for still images or the last frame).
+ pub duration_ms: f64,
+ /// Duration in raw ticks from the animation header.
+ pub duration_ticks: u32,
+ /// Byte offset of this frame's header in the input file.
+ pub(crate) file_offset: usize,
+ /// Whether this is the last frame in the codestream.
+ pub is_last: bool,
+ /// Whether this frame is a seek-keyframe for visible-frame playback.
+ ///
+ /// This is equivalent to `seek_target.visible_frames_to_skip == 0`.
+ pub is_keyframe: bool,
+ /// Precomputed seek inputs for this visible frame.
+ pub seek_target: VisibleFrameSeekTarget,
+ /// Frame name, if any.
+ pub name: String,
+}
+
+/// Computed seek inputs for a target visible frame.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct VisibleFrameSeekTarget {
+ /// File byte offset to start feeding input from.
+ pub decode_start_file_offset: usize,
+ /// Remaining codestream bytes in the current container box at the seek
+ /// point. Pass this to [`JxlDecoder::start_new_frame`].
+ pub remaining_in_box: u64,
+ /// Number of visible frames to skip after seek-start before decoding the
+ /// requested target frame.
+ pub visible_frames_to_skip: usize,
+}
+
impl<S: JxlState> JxlDecoder<S> {
fn wrap_inner(inner: Box<JxlDecoderInner>) -> Self {
Self {
@@ -54,6 +90,26 @@ impl<S: JxlState> JxlDecoder<S> {
self.inner.decoded_frames()
}
+ /// Returns the parsed frame index box, if the file contained one.
+ ///
+ /// The frame index box (`jxli`) is an optional part of the JXL container
+ /// format that provides a seek table for animated files, listing keyframe
+ /// byte offsets, timestamps, and frame counts.
+ ///
+ /// TODO(veluca): Provide a higher-level frame-index API aligned with
+ /// `scanned_frames()` / `VisibleFrameInfo` seek metadata.
+ pub fn frame_index(&self) -> Option<&FrameIndexBox> {
+ self.inner.frame_index()
+ }
+
+ /// Returns visible frame info entries collected so far.
+ ///
+ /// When `JxlDecoderOptions::scan_frames_only` is enabled this is the
+ /// primary output of decoding.
+ pub fn scanned_frames(&self) -> &[VisibleFrameInfo] {
+ self.inner.scanned_frames()
+ }
+
/// Rewinds a decoder to the start of the file, allowing past frames to be displayed again.
pub fn rewind(mut self) -> JxlDecoder<Initialized> {
self.inner.rewind();
@@ -93,8 +149,6 @@ impl JxlDecoder<Initialized> {
}
impl JxlDecoder<WithImageInfo> {
- // TODO(veluca): once frame skipping is implemented properly, expose that in the API.
-
/// Obtains the image's basic information.
pub fn basic_info(&self) -> &JxlBasicInfo {
self.inner.basic_info().unwrap()
@@ -116,10 +170,15 @@ impl JxlDecoder<WithImageInfo> {
self.inner.set_output_color_profile(profile)
}
+ /// Retrieves the current pixel format for output buffers.
pub fn current_pixel_format(&self) -> &JxlPixelFormat {
self.inner.current_pixel_format().unwrap()
}
+ /// Specifies pixel format for output buffers.
+ ///
+ /// Setting this may also change output color profile in some cases, if the profile was not set
+ /// manually before.
pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) {
self.inner.set_pixel_format(pixel_format);
}
@@ -132,10 +191,54 @@ impl JxlDecoder<WithImageInfo> {
Ok(self.map_inner_processing_result(inner_result))
}
+ /// Draws all the pixels we have data for. This is useful for i.e. previewing LF frames.
+ ///
+ /// Note: see `process` for alignment requirements for the buffer data.
+ pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer<'_>]) -> Result<()> {
+ self.inner.flush_pixels(buffers)
+ }
+
pub fn has_more_frames(&self) -> bool {
self.inner.has_more_frames()
}
+ /// Resets frame-level decoder state to prepare for decoding a new frame.
+ ///
+ /// This clears intermediate buffers (frame header, TOC, section data) while
+ /// preserving image-level state (file header, color profiles, pixel format,
+ /// reference frames). The box parser is restored to the correct
+ /// mid-codestream state using `remaining_in_box`, so the next `process()`
+ /// call correctly parses a new frame header from the input.
+ ///
+ /// # Arguments
+ ///
+ /// * `seek_target` -- from `VisibleFrameInfo::seek_target`.
+ /// Includes both the box-parser state (`remaining_in_box`) and the input
+ /// resume offset (`decode_start_file_offset`).
+ ///
+ /// After calling this, provide raw file input starting from
+ /// `seek_target.decode_start_file_offset`.
+ ///
+ /// # Example
+ ///
+ /// ```rust,ignore
+ /// // 1. Scan frame info using the regular decoder API.
+ /// let options = JxlDecoderOptions {
+ /// scan_frames_only: true,
+ /// ..Default::default()
+ /// };
+ /// let decoder = JxlDecoder::<states::Initialized>::new(options);
+ /// // ...advance decoder and call `scanned_frames()`...
+ ///
+ /// // 2. Seek to frame N (bare codestream).
+ /// let target = &frames[n];
+ /// decoder.start_new_frame(target.seek_target);
+ /// // 3. Provide input from target.seek_target.decode_start_file_offset and process().
+ /// ```
+ pub fn start_new_frame(&mut self, seek_target: VisibleFrameSeekTarget) {
+ self.inner.start_new_frame(seek_target.remaining_in_box);
+ }
+
#[cfg(test)]
pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) {
self.inner.set_use_simple_pipeline(u);
@@ -143,7 +246,17 @@ impl JxlDecoder<WithImageInfo> {
}
impl JxlDecoder<WithFrameInfo> {
- /// Skip the current frame.
+ /// Skip the current frame without decoding pixels.
+ ///
+ /// This reads section data from the input to advance past the frame, but
+ /// does not render pixels. Reference frames that may be needed by later
+ /// frames are still decoded internally.
+ ///
+ /// For efficient frame seeking in animations, enable
+ /// `JxlDecoderOptions::scan_frames_only` and use
+ /// [`scanned_frames`](JxlDecoder::scanned_frames), then
+ /// [`start_new_frame`](JxlDecoder::start_new_frame) to jump directly to a
+ /// target frame.
pub fn skip_frame(
mut self,
input: &mut impl JxlBitstreamInput,
@@ -191,7 +304,6 @@ pub(crate) mod tests {
use crate::api::{JxlDataFormat, JxlDecoderOptions};
use crate::error::Error;
use crate::image::{Image, Rect};
- use crate::util::test::assert_almost_abs_eq_coords;
use jxl_macros::for_each_test_file;
use std::path::Path;
@@ -202,6 +314,7 @@ pub(crate) mod tests {
&std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap(),
u.arbitrary::<u8>().unwrap() as usize + 1,
false,
+ false,
None,
)
.unwrap();
@@ -214,6 +327,7 @@ pub(crate) mod tests {
mut input: &[u8],
chunk_size: usize,
use_simple_pipeline: bool,
+ do_flush: bool,
callback: Option<Box<dyn FnMut(&Frame, usize) -> Result<(), Error>>>,
) -> Result<(usize, Vec<Vec<Image<f32>>>), Error> {
let options = JxlDecoderOptions::default();
@@ -226,7 +340,7 @@ pub(crate) mod tests {
let mut chunk_input = &input[0..0];
macro_rules! advance_decoder {
- ($decoder: ident $(, $extra_arg: expr)?) => {
+ ($decoder: ident $(, $extra_arg: expr)? $(; $flush_arg: expr)?) => {
loop {
chunk_input =
&input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
@@ -236,6 +350,12 @@ pub(crate) mod tests {
match process_result.unwrap() {
ProcessingResult::Complete { result } => break result,
ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ $(
+ let mut fallback = fallback;
+ if do_flush && !input.is_empty() {
+ fallback.flush_pixels($flush_arg)?;
+ }
+ )?
if input.is_empty() {
panic!("Unexpected end of input");
}
@@ -281,9 +401,6 @@ pub(crate) mod tests {
let mut frames = vec![];
loop {
- // Process until we have frame info
- let mut decoder_with_frame_info = advance_decoder!(decoder_with_image_info);
-
// First channel is interleaved.
let mut buffers = vec![Image::new_with_value(
(buffer_width * num_channels, buffer_height),
@@ -313,7 +430,11 @@ pub(crate) mod tests {
})
.collect();
- decoder_with_image_info = advance_decoder!(decoder_with_frame_info, &mut api_buffers);
+ // Process until we have frame info
+ let mut decoder_with_frame_info =
+ advance_decoder!(decoder_with_image_info; &mut api_buffers);
+ decoder_with_image_info =
+ advance_decoder!(decoder_with_frame_info, &mut api_buffers; &mut api_buffers);
// All pixels should have been overwritten, so they should no longer be NaNs.
for buf in buffers.iter() {
@@ -341,76 +462,108 @@ pub(crate) mod tests {
}
fn decode_test_file(path: &Path) -> Result<(), Error> {
- decode(&std::fs::read(path)?, usize::MAX, false, None)?;
+ decode(&std::fs::read(path)?, usize::MAX, false, false, None)?;
Ok(())
}
for_each_test_file!(decode_test_file);
fn decode_test_file_chunks(path: &Path) -> Result<(), Error> {
- decode(&std::fs::read(path)?, 1, false, None)?;
+ decode(&std::fs::read(path)?, 1, false, false, None)?;
Ok(())
}
for_each_test_file!(decode_test_file_chunks);
+ fn compare_frames(
+ path: &Path,
+ fc: usize,
+ f: &[Image<f32>],
+ sf: &[Image<f32>],
+ ) -> Result<(), Error> {
+ assert_eq!(
+ f.len(),
+ sf.len(),
+ "Frame {fc} has different channels counts",
+ );
+ for (c, (b, sb)) in f.iter().zip(sf.iter()).enumerate() {
+ assert_eq!(
+ b.size(),
+ sb.size(),
+ "Channel {c} in frame {fc} has different sizes",
+ );
+ let sz = b.size();
+ if false {
+ let f = std::fs::File::create(Path::new("/tmp/").join(format!(
+ "{}_diff_chan{c}.pbm",
+ path.as_os_str().to_string_lossy().replace("/", "_")
+ )))?;
+ use std::io::Write;
+ let mut f = std::io::BufWriter::new(f);
+ writeln!(f, "P1\n{} {}", sz.0, sz.1)?;
+ for y in 0..sz.1 {
+ for x in 0..sz.0 {
+ if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 {
+ write!(f, "1")?;
+ } else {
+ write!(f, "0")?;
+ }
+ }
+ }
+ drop(f);
+ }
+ for y in 0..sz.1 {
+ for x in 0..sz.0 {
+ assert_eq!(
+ b.row(y)[x],
+ sb.row(y)[x],
+ "Pixels differ at position ({x}, {y}), channel {c}"
+ );
+ }
+ }
+ }
+ Ok(())
+ }
+
fn compare_pipelines(path: &Path) -> Result<(), Error> {
let file = std::fs::read(path)?;
- let simple_frames = decode(&file, usize::MAX, true, None)?.1;
- let frames = decode(&file, usize::MAX, false, None)?.1;
+ let simple_frames = decode(&file, usize::MAX, true, false, None)?.1;
+ let frames = decode(&file, usize::MAX, false, false, None)?.1;
assert_eq!(frames.len(), simple_frames.len());
for (fc, (f, sf)) in frames
.into_iter()
.zip(simple_frames.into_iter())
.enumerate()
{
- assert_eq!(
- f.len(),
- sf.len(),
- "Frame {fc} has different channels counts",
- );
- for (c, (b, sb)) in f.into_iter().zip(sf.into_iter()).enumerate() {
- assert_eq!(
- b.size(),
- sb.size(),
- "Channel {c} in frame {fc} has different sizes",
- );
- // TODO(veluca): This check actually succeeds if we disable SIMD.
- // With SIMD, the exact output of computations in epf.rs appear to depend on the
- // lane that the computation was done in (???). We should investigate this.
- // b.as_rect().check_equal(sb.as_rect());
- let sz = b.size();
- if false {
- let f = std::fs::File::create(Path::new("/tmp/").join(format!(
- "{}_diff_chan{c}.pbm",
- path.as_os_str().to_string_lossy().replace("/", "_")
- )))?;
- use std::io::Write;
- let mut f = std::io::BufWriter::new(f);
- writeln!(f, "P1\n{} {}", sz.0, sz.1)?;
- for y in 0..sz.1 {
- for x in 0..sz.0 {
- if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 {
- write!(f, "1")?;
- } else {
- write!(f, "0")?;
- }
- }
- }
- drop(f);
- }
- for y in 0..sz.1 {
- for x in 0..sz.0 {
- assert_almost_abs_eq_coords(b.row(y)[x], sb.row(y)[x], 1e-5, (x, y), c);
- }
- }
- }
+ compare_frames(path, fc, &f, &sf)?;
}
Ok(())
}
for_each_test_file!(compare_pipelines);
+ fn compare_incremental(path: &Path) -> Result<(), Error> {
+ let file = std::fs::read(path).unwrap();
+ // One-shot decode
+ let (_, one_shot_frames) = decode(&file, usize::MAX, false, false, None)?;
+ // Incremental decode with arbitrary flushes.
+ let (_, frames) = decode(&file, 123, false, true, None)?;
+
+ // Compare one_shot_frames and frames
+ assert_eq!(one_shot_frames.len(), frames.len());
+ for (fc, (f, sf)) in frames
+ .into_iter()
+ .zip(one_shot_frames.into_iter())
+ .enumerate()
+ {
+ compare_frames(path, fc, &f, &sf)?;
+ }
+
+ Ok(())
+ }
+
+ for_each_test_file!(compare_incremental);
+
#[test]
fn test_preview_size_none_for_regular_files() {
let file = std::fs::read("resources/test/basic.jxl").unwrap();
@@ -539,6 +692,55 @@ pub(crate) mod tests {
assert!(result.is_err());
}
+ #[test]
+ fn test_default_output_tf_by_pixel_format() {
+ use crate::api::{JxlColorEncoding, JxlTransferFunction};
+
+ // Using test image with ICC profile to trigger default transfer function path
+ let file = std::fs::read("resources/test/lossy_with_icc.jxl").unwrap();
+ let options = JxlDecoderOptions::default();
+ let mut decoder = JxlDecoder::<states::Initialized>::new(options);
+ let mut input = file.as_slice();
+ let mut decoder = loop {
+ match decoder.process(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => decoder = fallback,
+ }
+ };
+
+ // Output data format will default to F32, so output color profile will be linear sRGB
+ assert_eq!(
+ *decoder.output_color_profile().transfer_function().unwrap(),
+ JxlTransferFunction::Linear,
+ );
+
+ // Integer data format will set output color profile to sRGB
+ decoder.set_pixel_format(JxlPixelFormat::rgba8(0));
+ assert_eq!(
+ *decoder.output_color_profile().transfer_function().unwrap(),
+ JxlTransferFunction::SRGB,
+ );
+
+ decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0));
+ assert_eq!(
+ *decoder.output_color_profile().transfer_function().unwrap(),
+ JxlTransferFunction::Linear,
+ );
+
+ decoder.set_pixel_format(JxlPixelFormat::rgba16(0));
+ assert_eq!(
+ *decoder.output_color_profile().transfer_function().unwrap(),
+ JxlTransferFunction::SRGB,
+ );
+
+ // Once output color profile is set by user, it will remain as is regardless of what pixel
+ // format is set
+ let profile = JxlColorProfile::Simple(JxlColorEncoding::srgb(false));
+ decoder.set_output_color_profile(profile.clone()).unwrap();
+ decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0));
+ assert!(decoder.output_color_profile() == &profile);
+ }
+
#[test]
fn test_fill_opaque_alpha_both_pipelines() {
use crate::api::{JxlColorType, JxlDataFormat, JxlPixelFormat};
@@ -1230,7 +1432,7 @@ pub(crate) mod tests {
// The test passes if it doesn't panic with "attempt to add with overflow"
// It's OK if it returns an error or panics with "Unexpected end of input"
let result = panic::catch_unwind(|| {
- let _ = decode(data, 1024, false, None);
+ let _ = decode(data, 1024, false, false, None);
});
// If it panicked, make sure it wasn't an overflow panic
@@ -1247,4 +1449,538 @@ pub(crate) mod tests {
);
}
}
+
+ fn make_box(ty: &[u8; 4], content: &[u8]) -> Vec<u8> {
+ let len = (8 + content.len()) as u32;
+ let mut buf = Vec::new();
+ buf.extend(len.to_be_bytes());
+ buf.extend(ty);
+ buf.extend(content);
+ buf
+ }
+
+ fn add_container_header(container: &mut Vec<u8>) {
+ // JXL signature box
+ let sig = [
+ 0x00, 0x00, 0x00, 0x0c, 0x4a, 0x58, 0x4c, 0x20, 0x0d, 0x0a, 0x87, 0x0a,
+ ];
+ // ftyp box
+ let ftyp = make_box(b"ftyp", b"jxl \x00\x00\x00\x00jxl ");
+ container.extend(&sig);
+ container.extend(&ftyp);
+ }
+
+ /// Helper to wrap a bare codestream in a JXL container with a jxli frame index box.
+ fn wrap_with_frame_index(
+ codestream: &[u8],
+ tnum: u32,
+ tden: u32,
+ entries: &[(u64, u64, u64)], // (OFF_delta, T, F)
+ ) -> Vec<u8> {
+ use crate::util::test::build_frame_index_content;
+
+ let jxli_content = build_frame_index_content(tnum, tden, entries);
+
+ let jxli = make_box(b"jxli", &jxli_content);
+ let jxlc = make_box(b"jxlc", codestream);
+
+ let mut container = Vec::new();
+ add_container_header(&mut container);
+ container.extend(&jxli);
+ container.extend(&jxlc);
+ container
+ }
+
+ /// Helper to wrap a bare codestream in a container split across jxlp boxes.
+ ///
+ /// `chunk_starts` are codestream offsets where each new jxlp chunk begins.
+ fn wrap_with_jxlp_chunks(codestream: &[u8], chunk_starts: &[usize]) -> Vec<u8> {
+ let mut starts = chunk_starts.to_vec();
+ starts.sort_unstable();
+ starts.dedup();
+ if starts.first().copied() != Some(0) {
+ starts.insert(0, 0);
+ }
+ if starts.last().copied() != Some(codestream.len()) {
+ starts.push(codestream.len());
+ }
+ assert!(starts.len() >= 2);
+
+ let mut container = Vec::new();
+ add_container_header(&mut container);
+
+ let num_chunks = starts.len() - 1;
+ for i in 0..num_chunks {
+ let begin = starts[i];
+ let end = starts[i + 1];
+ assert!(begin <= end && end <= codestream.len());
+
+ let mut payload = Vec::with_capacity(4 + (end - begin));
+ let mut index = i as u32;
+ if i + 1 == num_chunks {
+ index |= 0x8000_0000;
+ }
+ payload.extend(index.to_be_bytes());
+ payload.extend(&codestream[begin..end]);
+ container.extend(make_box(b"jxlp", &payload));
+ }
+
+ container
+ }
+
+ #[test]
+ fn test_frame_index_parsed_from_container() {
+ // Read a bare animation codestream and wrap it in a container with a jxli box.
+ let codestream =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+ // Create synthetic frame index entries (delta offsets).
+ // These are synthetic -- we don't know real frame offsets, but we can verify parsing.
+ let entries = vec![
+ (0u64, 100u64, 1u64), // Frame 0 at offset 0
+ (500, 100, 1), // Frame 1 at offset 500
+ (600, 100, 1), // Frame 2 at offset 1100
+ ];
+
+ let container = wrap_with_frame_index(&codestream, 1, 1000, &entries);
+
+ // Decode with a large chunk size so the jxli box is fully consumed.
+ let options = JxlDecoderOptions::default();
+ let mut dec = JxlDecoder::<states::Initialized>::new(options);
+ let mut input: &[u8] = &container;
+ let dec = loop {
+ match dec.process(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ if input.is_empty() {
+ panic!("Unexpected end of input");
+ }
+ dec = fallback;
+ }
+ }
+ };
+
+ // Check that frame index was parsed.
+ let fi = dec.frame_index().expect("frame_index should be Some");
+ assert_eq!(fi.num_frames(), 3);
+ assert_eq!(fi.tnum, 1);
+ assert_eq!(fi.tden.get(), 1000);
+ // Verify absolute offsets (accumulated from deltas)
+ assert_eq!(fi.entries[0].codestream_offset, 0);
+ assert_eq!(fi.entries[1].codestream_offset, 500);
+ assert_eq!(fi.entries[2].codestream_offset, 1100);
+ assert_eq!(fi.entries[0].duration_ticks, 100);
+ assert_eq!(fi.entries[2].frame_count, 1);
+ }
+
+ #[test]
+ fn test_frame_index_none_for_bare_codestream() {
+ // A bare codestream has no container, so no frame index.
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+ let options = JxlDecoderOptions::default();
+ let mut dec = JxlDecoder::<states::Initialized>::new(options);
+ let mut input: &[u8] = &data;
+ let dec = loop {
+ match dec.process(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ if input.is_empty() {
+ panic!("Unexpected end of input");
+ }
+ dec = fallback;
+ }
+ }
+ };
+ assert!(dec.frame_index().is_none());
+ }
+
+ fn scan_frames_with_decoder(mut input: &[u8], chunk_size: usize) -> Vec<VisibleFrameInfo> {
+ let mut chunk_input = &input[0..0];
+ let options = JxlDecoderOptions {
+ scan_frames_only: true,
+ skip_preview: false,
+ ..Default::default()
+ };
+ let mut initialized_decoder = JxlDecoder::<states::Initialized>::new(options);
+
+ macro_rules! advance_process {
+ ($decoder: ident) => {
+ loop {
+ chunk_input =
+ &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
+ let available_before = chunk_input.len();
+ let process_result = $decoder.process(&mut chunk_input);
+ input = &input[(available_before - chunk_input.len())..];
+ match process_result.unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ if input.is_empty() {
+ panic!("Unexpected end of input");
+ }
+ $decoder = fallback;
+ }
+ }
+ }
+ };
+ }
+
+ macro_rules! advance_skip {
+ ($decoder: ident) => {
+ loop {
+ chunk_input =
+ &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())];
+ let available_before = chunk_input.len();
+ let process_result = $decoder.skip_frame(&mut chunk_input);
+ input = &input[(available_before - chunk_input.len())..];
+ match process_result.unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ if input.is_empty() {
+ panic!("Unexpected end of input");
+ }
+ $decoder = fallback;
+ }
+ }
+ }
+ };
+ }
+
+ let mut decoder_with_image_info = advance_process!(initialized_decoder);
+
+ if !decoder_with_image_info.has_more_frames() {
+ return decoder_with_image_info.scanned_frames().to_vec();
+ }
+
+ loop {
+ let mut decoder_with_frame_info = advance_process!(decoder_with_image_info);
+ decoder_with_image_info = advance_skip!(decoder_with_frame_info);
+ if !decoder_with_image_info.has_more_frames() {
+ break;
+ }
+ }
+
+ decoder_with_image_info.scanned_frames().to_vec()
+ }
+
+ fn assert_start_new_frame_matches_sequential(data: &[u8], expect_bare_codestream: bool) {
+ use crate::api::{JxlDataFormat, JxlPixelFormat};
+ use crate::image::{Image, Rect};
+
+ // 1. Scan frame info to get seek offsets.
+ let scanned_frames = scan_frames_with_decoder(data, usize::MAX);
+ assert!(scanned_frames.len() > 1, "need multiple frames");
+
+ // Compare against second visible frame from regular sequential decode.
+ let target_visible_index = 1;
+ let seek_target = scanned_frames[target_visible_index].seek_target;
+
+ if expect_bare_codestream {
+ assert_eq!(seek_target.remaining_in_box, u64::MAX);
+ } else {
+ assert_ne!(seek_target.remaining_in_box, u64::MAX);
+ }
+
+ // 2. Decode all frames sequentially and keep the reference frame.
+ let (_n, sequential_frames) = decode(data, usize::MAX, false, false, None).unwrap();
+ let expected = &sequential_frames[target_visible_index];
+
+ // 3. Create decoder and parse image info.
+ let options = JxlDecoderOptions::default();
+ let decoder = JxlDecoder::<states::Initialized>::new(options);
+ let mut input = data;
+
+ let ProcessingResult::Complete {
+ result: mut decoder,
+ } = decoder.process(&mut input).unwrap()
+ else {
+ panic!("expected Complete with full data");
+ };
+
+ let basic_info = decoder.basic_info().clone();
+ let (width, height) = basic_info.size;
+
+ // Match the same requested output format as the sequential helper.
+ let default_format = decoder.current_pixel_format().clone();
+ let requested_format = JxlPixelFormat {
+ color_type: default_format.color_type,
+ color_data_format: Some(JxlDataFormat::f32()),
+ extra_channel_format: default_format
+ .extra_channel_format
+ .iter()
+ .map(|_| Some(JxlDataFormat::f32()))
+ .collect(),
+ };
+ decoder.set_pixel_format(requested_format.clone());
+
+ let channels = requested_format.color_type.samples_per_pixel();
+ let num_ec = requested_format.extra_channel_format.len();
+
+ // 4. Seek to decode-start and advance to the target visible frame.
+ decoder.start_new_frame(seek_target);
+ let mut input = &data[seek_target.decode_start_file_offset..];
+
+ for _ in 0..seek_target.visible_frames_to_skip {
+ let mut decoder_frame = loop {
+ match decoder.process(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ decoder = fallback;
+ }
+ }
+ };
+
+ decoder = loop {
+ match decoder_frame.skip_frame(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ decoder_frame = fallback;
+ }
+ }
+ };
+ }
+
+ let mut decoder_frame = loop {
+ match decoder.process(&mut input).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ decoder = fallback;
+ }
+ }
+ };
+
+ let mut color_buffer = Image::<f32>::new((width * channels, height)).unwrap();
+ let mut ec_buffers: Vec<Image<f32>> = (0..num_ec)
+ .map(|_| Image::<f32>::new((width, height)).unwrap())
+ .collect();
+ let mut buffers: Vec<JxlOutputBuffer> = vec![JxlOutputBuffer::from_image_rect_mut(
+ color_buffer
+ .get_rect_mut(Rect {
+ origin: (0, 0),
+ size: (width * channels, height),
+ })
+ .into_raw(),
+ )];
+ for ec in ec_buffers.iter_mut() {
+ buffers.push(JxlOutputBuffer::from_image_rect_mut(
+ ec.get_rect_mut(Rect {
+ origin: (0, 0),
+ size: (width, height),
+ })
+ .into_raw(),
+ ));
+ }
+
+ let _decoder = loop {
+ match decoder_frame.process(&mut input, &mut buffers).unwrap() {
+ ProcessingResult::Complete { result } => break result,
+ ProcessingResult::NeedsMoreInput { fallback, .. } => {
+ decoder_frame = fallback;
+ }
+ }
+ };
+
+ // 5. Compare seek-decoded frame against sequential decode reference.
+ let mut seek_decoded = Vec::with_capacity(1 + num_ec);
+ seek_decoded.push(color_buffer);
+ seek_decoded.extend(ec_buffers);
+ compare_frames(
+ Path::new("start_new_frame_seek"),
+ target_visible_index,
+ expected,
+ &seek_decoded,
+ )
+ .unwrap();
+ }
+
+ /// Test that `start_new_frame()` + scanner seek info decodes the same
+ /// frame as regular sequential decode for bare codestream input.
+ #[test]
+ fn test_start_new_frame_bare_codestream() {
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+ assert_start_new_frame_matches_sequential(&data, true);
+ }
+
+ /// Test that `start_new_frame()` + scanner seek info also works for boxed input.
+ #[test]
+ fn test_start_new_frame_boxed_codestream() {
+ let codestream =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+ let entries = vec![(0u64, 100u64, 1u64), (500, 100, 1), (600, 100, 1)];
+ let container = wrap_with_frame_index(&codestream, 1, 1000, &entries);
+ assert_start_new_frame_matches_sequential(&container, false);
+ }
+
+ /// Test seek/scanner behavior when codestream data is split across jxlp boxes,
+ /// with each visible frame starting in its own chunk.
+ #[test]
+ fn test_start_new_frame_boxed_jxlp_per_visible_frame() {
+ let codestream =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+ let scanned_frames = scan_frames_with_decoder(&codestream, usize::MAX);
+ assert!(scanned_frames.len() > 1, "need multiple frames");
+
+ let (decoded_frames, _) = decode(&codestream, usize::MAX, false, false, None).unwrap();
+ assert_eq!(
+ decoded_frames,
+ scanned_frames.len(),
+ "test file should have one codestream frame per visible frame",
+ );
+
+ let mut chunk_starts: Vec<usize> = scanned_frames.iter().map(|f| f.file_offset).collect();
+ chunk_starts.sort_unstable();
+ chunk_starts.dedup();
+ assert_eq!(chunk_starts.len(), scanned_frames.len());
+
+ let container = wrap_with_jxlp_chunks(&codestream, &chunk_starts);
+ assert_start_new_frame_matches_sequential(&container, false);
+ }
+
+ #[test]
+ fn test_scan_still_image() {
+ let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ assert_eq!(frames.len(), 1);
+ assert!(frames[0].is_last);
+ assert!(frames[0].is_keyframe);
+ let total_duration_ms: f64 = frames.iter().map(|f| f.duration_ms).sum();
+ assert_eq!(total_duration_ms, 0.0);
+ }
+
+ #[test]
+ fn test_scan_bare_animation() {
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ assert!(frames.len() > 1, "expected multiple frames");
+
+ for (i, frame) in frames.iter().enumerate() {
+ assert_eq!(frame.index, i);
+ }
+
+ assert!(frames.last().unwrap().is_last);
+
+ assert!(frames[0].is_keyframe);
+ assert_eq!(
+ frames[0].seek_target.decode_start_file_offset,
+ frames[0].file_offset
+ );
+ }
+
+ #[test]
+ fn test_scan_animation_offsets_increase() {
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ for i in 1..frames.len() {
+ assert!(
+ frames[i].file_offset > frames[i - 1].file_offset,
+ "frame {} offset {} should be > frame {} offset {}",
+ i,
+ frames[i].file_offset,
+ i - 1,
+ frames[i - 1].file_offset,
+ );
+ }
+ }
+
+ #[test]
+ fn test_scan_incremental() {
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+ let frames = scan_frames_with_decoder(&data, 128);
+ assert!(frames.len() > 1);
+ assert!(frames.last().unwrap().is_last);
+ }
+
+ #[test]
+ fn test_scan_keyframe_detection_still() {
+ let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ assert_eq!(frames.len(), 1);
+ let f = &frames[0];
+ assert!(f.is_keyframe);
+ assert_eq!(f.seek_target.decode_start_file_offset, f.file_offset);
+ assert_eq!(f.seek_target.visible_frames_to_skip, 0);
+ }
+
+ #[test]
+ fn test_scan_decode_start_file_offset_consistency() {
+ let data =
+ std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap();
+
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ for frame in &frames {
+ assert!(
+ frame.seek_target.decode_start_file_offset <= frame.file_offset,
+ "frame {}: decode_start_file_offset {} > file_offset {}",
+ frame.index,
+ frame.seek_target.decode_start_file_offset,
+ frame.file_offset,
+ );
+ assert_eq!(
+ frame.is_keyframe,
+ frame.seek_target.visible_frames_to_skip == 0,
+ "frame {}: keyframe flag should match visible_frames_to_skip",
+ frame.index,
+ );
+ }
+ }
+
+ #[test]
+ fn test_scan_with_preview() {
+ let data = std::fs::read("resources/test/with_preview.jxl");
+ if data.is_err() {
+ return;
+ }
+ let data = data.unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ assert!(frames.len() <= 1);
+ }
+
+ #[test]
+ fn test_scan_patches_not_keyframe() {
+ let data = std::fs::read("resources/test/grayscale_patches_var_dct.jxl");
+ if data.is_err() {
+ return;
+ }
+ let data = data.unwrap();
+ let frames = scan_frames_with_decoder(&data, usize::MAX);
+
+ assert!(!frames.is_empty());
+ }
+
+ /// Regression test for Chromium ClusterFuzz issue 474401148.
+ #[test]
+ fn test_fuzzer_xyb_icc_no_panic() {
+ use crate::api::ProcessingResult;
+
+ #[rustfmt::skip]
+ let data: &[u8] = &[
+ 0xff, 0x0a, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x25, 0x00,
+ ];
+
+ let opts = JxlDecoderOptions {
+ pixel_limit: Some(1024 * 1024 * 1024),
+ ..Default::default()
+ };
+ let mut decoder = JxlDecoderInner::new(opts);
+ let mut input = data;
+
+ if let Ok(ProcessingResult::Complete { .. }) = decoder.process(&mut input, None)
+ && let Some(profile) = decoder.output_color_profile()
+ {
+ let _ = profile.try_as_icc();
+ }
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
index eb66cb3b1f4cf..e2b452cbc81c9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs
@@ -3,6 +3,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::io::IoSliceMut;
+
+use crate::container::frame_index::FrameIndexBox;
use crate::error::{Error, Result};
use crate::api::{
@@ -15,6 +18,8 @@ enum ParseState {
BoxNeeded,
CodestreamBox(u64),
SkippableBox(u64),
+ /// Buffering a jxli box: (remaining bytes, accumulated content).
+ BufferingFrameIndex(u64, Vec<u8>),
}
enum CodestreamBoxType {
@@ -28,6 +33,10 @@ pub(super) struct BoxParser {
pub(super) box_buffer: SmallBuffer,
state: ParseState,
box_type: CodestreamBoxType,
+ /// Parsed frame index box, if present in the file.
+ pub(super) frame_index: Option<FrameIndexBox>,
+ /// Total file bytes consumed from the underlying input.
+ pub(super) total_file_consumed: u64,
}
impl BoxParser {
@@ -36,6 +45,8 @@ impl BoxParser {
box_buffer: SmallBuffer::new(128),
state: ParseState::SignatureNeeded,
box_type: CodestreamBoxType::None,
+ frame_index: None,
+ total_file_consumed: 0,
}
}
@@ -49,7 +60,8 @@ impl BoxParser {
loop {
match self.state.clone() {
ParseState::SignatureNeeded => {
- self.box_buffer.refill(|b| input.read(b), None)?;
+ let read = self.box_buffer.refill(|b| input.read(b), None)?;
+ self.total_file_consumed += read as u64;
match check_signature_internal(&self.box_buffer)? {
None => return Err(Error::InvalidSignature),
Some(JxlSignatureType::Codestream) => {
@@ -71,7 +83,9 @@ impl BoxParser {
let skipped = if !self.box_buffer.is_empty() {
self.box_buffer.consume(num)
} else {
- input.skip(num)?
+ let skipped = input.skip(num)?;
+ self.total_file_consumed += skipped as u64;
+ skipped
};
if skipped == 0 {
return Err(Error::OutOfBounds(num));
@@ -83,8 +97,35 @@ impl BoxParser {
self.state = ParseState::SkippableBox(s);
}
}
+ ParseState::BufferingFrameIndex(mut remaining, mut buf) => {
+ let num = remaining.min(usize::MAX as u64) as usize;
+ if !self.box_buffer.is_empty() {
+ let take = num.min(self.box_buffer.len());
+ buf.extend_from_slice(&self.box_buffer[..take]);
+ self.box_buffer.consume(take);
+ remaining -= take as u64;
+ } else {
+ let old_len = buf.len();
+ buf.resize(old_len + num, 0);
+ let read = input.read(&mut [IoSliceMut::new(&mut buf[old_len..])])?;
+ self.total_file_consumed += read as u64;
+ if read == 0 {
+ return Err(Error::OutOfBounds(num));
+ }
+ buf.truncate(old_len + read);
+ remaining -= read as u64;
+ }
+ if remaining == 0 {
+ // Parse the buffered frame index box.
+ self.frame_index = Some(FrameIndexBox::parse(&buf)?);
+ self.state = ParseState::BoxNeeded;
+ } else {
+ self.state = ParseState::BufferingFrameIndex(remaining, buf);
+ }
+ }
ParseState::BoxNeeded => {
- self.box_buffer.refill(|b| input.read(b), None)?;
+ let read = self.box_buffer.refill(|b| input.read(b), None)?;
+ self.total_file_consumed += read as u64;
let min_len = match &self.box_buffer[..] {
[0, 0, 0, 1, ..] => 16,
_ => 8,
@@ -148,6 +189,20 @@ impl BoxParser {
};
self.state = ParseState::CodestreamBox(content_len);
}
+ b"jxli" => {
+ if content_len == u64::MAX {
+ return Err(Error::InvalidBox);
+ }
+ // Reasonable size limit for a frame index box (16 MB).
+ if content_len > 16 * 1024 * 1024 {
+ self.state = ParseState::SkippableBox(content_len);
+ } else {
+ self.state = ParseState::BufferingFrameIndex(
+ content_len,
+ Vec::with_capacity(content_len as usize),
+ );
+ }
+ }
_ => {
self.state = ParseState::SkippableBox(content_len);
}
@@ -158,6 +213,26 @@ impl BoxParser {
}
}
+ /// Accounts file bytes consumed directly by codestream parser reads/skips.
+ pub(super) fn mark_file_consumed(&mut self, amount: usize) {
+ self.total_file_consumed += amount as u64;
+ }
+
+ /// Resets the box parser for seeking to a specific codestream position.
+ ///
+ /// Sets the parser to `CodestreamBox(remaining)` state with cleared
+ /// buffers. The caller must provide raw input starting from the file
+ /// position that corresponds to the target codestream offset.
+ ///
+ /// `remaining` is the number of codestream bytes left in the current
+ /// box from the target file position. For bare-codestream files this
+ /// is `u64::MAX`.
+ pub(super) fn reset_for_codestream_seek(&mut self, remaining: u64) {
+ self.box_buffer = SmallBuffer::new(128);
+ self.state = ParseState::CodestreamBox(remaining);
+ // Keep frame_index unchanged.
+ }
+
pub(super) fn consume_codestream(&mut self, amount: u64) {
if let ParseState::CodestreamBox(cb) = &mut self.state {
*cb = cb.checked_sub(amount).unwrap();
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
index a5b650eacd226..ab645281dfffd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs
@@ -14,8 +14,9 @@ use sections::SectionState;
use crate::api::FrameCallback;
use crate::{
api::{
- JxlBasicInfo, JxlBitstreamInput, JxlColorProfile, JxlDecoderOptions, JxlOutputBuffer,
- JxlPixelFormat,
+ JxlBasicInfo, JxlBitstreamInput, JxlColorEncoding, JxlColorProfile, JxlDataFormat,
+ JxlDecoderOptions, JxlOutputBuffer, JxlPixelFormat, VisibleFrameInfo,
+ VisibleFrameSeekTarget,
inner::{box_parser::BoxParser, process::SmallBuffer},
},
error::{Error, Result},
@@ -33,6 +34,13 @@ struct SectionBuffer {
section: Section,
}
+#[derive(Clone, Copy)]
+struct FrameStartInfo {
+ file_offset: usize,
+ remaining_in_box: u64,
+ visible_count_before: usize,
+}
+
pub(super) struct CodestreamParser {
// TODO(veluca): this would probably be cleaner with some kind of state enum.
pub(super) file_header: Option<FileHeader>,
@@ -44,6 +52,9 @@ pub(super) struct CodestreamParser {
pub(super) embedded_color_profile: Option<JxlColorProfile>,
pub(super) output_color_profile: Option<JxlColorProfile>,
pub(super) pixel_format: Option<JxlPixelFormat>,
+ xyb_encoded: bool,
+ is_gray: bool,
+ pub(super) output_color_profile_set_by_user: bool,
// These fields are populated when starting to decode a frame, and cleared once
// the frame is done.
@@ -79,6 +90,27 @@ pub(super) struct CodestreamParser {
header_needed_bytes: Option<u64>,
+ // --- Frame info tracking (for frame scanning) ---
+ /// Collected visible frame info entries.
+ pub(super) scanned_frames: Vec<VisibleFrameInfo>,
+ /// Zero-based visible frame index counter.
+ visible_frame_index: usize,
+ /// File offsets and visibility info for every non-preview frame (visible
+ /// and non-visible), in parse order.
+ frame_starts: Vec<FrameStartInfo>,
+ /// For each reference slot, earliest frame index required to reconstruct
+ /// the current contents of that slot.
+ reference_slot_decode_start: [Option<usize>; DecoderState::MAX_STORED_FRAMES],
+ /// For each LF slot, earliest frame index required to reconstruct the
+ /// current contents of that slot.
+ lf_slot_decode_start: [Option<usize>; DecoderState::NUM_LF_FRAMES],
+ /// File byte offset where the current frame header parse started.
+ /// Set when we begin parsing a frame header.
+ current_frame_file_offset: usize,
+ /// Remaining codestream bytes in the current box at frame start.
+ /// Captured alongside `current_frame_file_offset`.
+ current_frame_remaining_in_box: u64,
+
#[cfg(test)]
pub frame_callback: Option<Box<FrameCallback>>,
#[cfg(test)]
@@ -96,6 +128,9 @@ impl CodestreamParser {
embedded_color_profile: None,
output_color_profile: None,
pixel_format: None,
+ xyb_encoded: false,
+ is_gray: false,
+ output_color_profile_set_by_user: false,
frame_header: None,
toc_parser: None,
frame: None,
@@ -115,6 +150,13 @@ impl CodestreamParser {
candidate_hf_sections: HashSet::new(),
has_more_frames: true,
header_needed_bytes: None,
+ scanned_frames: Vec::new(),
+ visible_frame_index: 0,
+ frame_starts: Vec::new(),
+ reference_slot_decode_start: [None; DecoderState::MAX_STORED_FRAMES],
+ lf_slot_decode_start: [None; DecoderState::NUM_LF_FRAMES],
+ current_frame_file_offset: 0,
+ current_frame_remaining_in_box: u64::MAX,
#[cfg(test)]
frame_callback: None,
#[cfg(test)]
@@ -130,6 +172,125 @@ impl CodestreamParser {
}
}
+ /// Record frame info for the just-parsed frame.
+ /// Called after process_non_section() creates a Frame, for frame scanning.
+ fn record_frame_info(&mut self) {
+ let frame = match self.frame.as_ref() {
+ Some(f) => f,
+ None => return,
+ };
+ let header = frame.header();
+
+ let current_frame_index = self.frame_starts.len();
+ let is_visible = header.is_visible();
+ self.frame_starts.push(FrameStartInfo {
+ file_offset: self.current_frame_file_offset,
+ remaining_in_box: self.current_frame_remaining_in_box,
+ visible_count_before: self.visible_frame_index,
+ });
+
+ let mut decode_start_frame_index = current_frame_index;
+
+ // Track frame dependencies through reference slots. For blending we know
+ // exactly which slots are used. For patches we conservatively assume any
+ // reference slot may be used.
+ let mut used_reference_slots = [false; DecoderState::MAX_STORED_FRAMES];
+ if header.needs_blending() {
+ for blending_info in header
+ .ec_blending_info
+ .iter()
+ .chain(std::iter::once(&header.blending_info))
+ {
+ let source = blending_info.source as usize;
+ assert!(
+ source < DecoderState::MAX_STORED_FRAMES,
+ "invalid blending source slot {source}, max {}",
+ DecoderState::MAX_STORED_FRAMES - 1
+ );
+ used_reference_slots[source] = true;
+ }
+ }
+ if header.has_patches() {
+ used_reference_slots.fill(true);
+ }
+
+ for (slot, used) in used_reference_slots.iter().enumerate() {
+ if *used && let Some(dep_start) = self.reference_slot_decode_start[slot] {
+ decode_start_frame_index = decode_start_frame_index.min(dep_start);
+ }
+ }
+
+ if header.has_lf_frame() {
+ let lf_slot = header.lf_level as usize;
+ assert!(
+ lf_slot < DecoderState::NUM_LF_FRAMES,
+ "invalid lf slot {lf_slot}, max {}",
+ DecoderState::NUM_LF_FRAMES - 1
+ );
+ if let Some(dep_start) = self.lf_slot_decode_start[lf_slot] {
+ decode_start_frame_index = decode_start_frame_index.min(dep_start);
+ }
+ }
+
+ if is_visible {
+ let duration_ticks = header.duration;
+ let duration_ms = if let Some(ref anim) = self.animation {
+ if anim.tps_numerator > 0 {
+ (duration_ticks as f64) * 1000.0 * (anim.tps_denominator as f64)
+ / (anim.tps_numerator as f64)
+ } else {
+ 0.0
+ }
+ } else {
+ 0.0
+ };
+
+ let decode_start = self.frame_starts[decode_start_frame_index];
+ let seek_target = VisibleFrameSeekTarget {
+ decode_start_file_offset: decode_start.file_offset,
+ remaining_in_box: decode_start.remaining_in_box,
+ visible_frames_to_skip: self
+ .visible_frame_index
+ .saturating_sub(decode_start.visible_count_before),
+ };
+ let is_keyframe = seek_target.visible_frames_to_skip == 0;
+
+ self.scanned_frames.push(VisibleFrameInfo {
+ index: self.visible_frame_index,
+ duration_ms,
+ duration_ticks,
+ file_offset: self.current_frame_file_offset,
+ is_last: header.is_last,
+ is_keyframe,
+ seek_target,
+ name: header.name.clone(),
+ });
+
+ self.visible_frame_index += 1;
+ }
+
+ // Update slot dependency origins after processing this frame.
+ if header.can_be_referenced {
+ let slot = header.save_as_reference as usize;
+ assert!(
+ slot < DecoderState::MAX_STORED_FRAMES,
+ "invalid save_as_reference slot {slot}, max {}",
+ DecoderState::MAX_STORED_FRAMES - 1
+ );
+ self.reference_slot_decode_start[slot] = Some(decode_start_frame_index);
+ }
+
+ if header.lf_level != 0 {
+ let slot = (header.lf_level - 1) as usize;
+ assert!(
+ slot < DecoderState::NUM_LF_FRAMES,
+ "invalid lf save slot {slot}, max {}",
+ DecoderState::NUM_LF_FRAMES - 1
+ );
+ self.lf_slot_decode_start[slot] = Some(decode_start_frame_index);
+ }
+ }
+
/// Returns the number of passes that are fully completed across all groups.
pub(super) fn num_completed_passes(&self) -> usize {
self.section_state.num_completed_passes()
@@ -151,12 +312,41 @@ impl CodestreamParser {
pixel_format
}
+ /// Resets frame-level state for seeking to a new frame.
+ ///
+ /// Preserves: file_header, decoder_state (including reference frames),
+ /// basic_info, animation, color profiles, pixel_format, xyb_encoded,
+ /// is_gray, output_color_profile_set_by_user, preview_done.
+ ///
+ /// Clears: frame_header, toc_parser, frame, all section buffers,
+ /// non_section_buf, and processing flags.
+ pub(super) fn start_new_frame(&mut self) {
+ self.frame_header = None;
+ self.toc_parser = None;
+ self.frame = None;
+ self.non_section_buf = SmallBuffer::new(4096);
+ self.non_section_bit_offset = 0;
+ self.sections.clear();
+ self.ready_section_data = 0;
+ self.skip_sections = false;
+ self.process_without_output = false;
+ self.section_state = SectionState::new(0, 0);
+ self.lf_global_section = None;
+ self.lf_sections.clear();
+ self.hf_global_section = None;
+ self.hf_sections.clear();
+ self.candidate_hf_sections.clear();
+ self.has_more_frames = true;
+ self.header_needed_bytes = None;
+ }
+
pub(super) fn process(
&mut self,
box_parser: &mut BoxParser,
input: &mut dyn JxlBitstreamInput,
decode_options: &JxlDecoderOptions,
mut output_buffers: Option<&mut [JxlOutputBuffer]>,
+ do_flush: bool,
) -> Result<()> {
if let Some(output_buffers) = &output_buffers {
let px = self.pixel_format.as_ref().unwrap();
@@ -179,7 +369,11 @@ impl CodestreamParser {
.frame
.as_ref()
.is_some_and(|f| f.header().can_be_referenced);
- if !self.process_without_output && output_buffers.is_none() && !can_be_referenced {
+ if decode_options.scan_frames_only
+ || (!self.process_without_output
+ && output_buffers.is_none()
+ && !can_be_referenced)
+ {
self.skip_sections = true;
}
@@ -229,7 +423,9 @@ impl CodestreamParser {
let num = if !box_parser.box_buffer.is_empty() {
box_parser.box_buffer.take(buffers)
} else {
- input.read(buffers)?
+ let num = input.read(buffers)?;
+ box_parser.mark_file_consumed(num);
+ num
};
self.ready_section_data += num;
box_parser.consume_codestream(num as u64);
@@ -238,7 +434,7 @@ impl CodestreamParser {
break;
}
}
- match self.process_sections(decode_options, &mut output_buffers) {
+ match self.process_sections(decode_options, &mut output_buffers, do_flush) {
Ok(None) => Ok(()),
Ok(Some(missing)) => Err(Error::OutOfBounds(missing)),
Err(Error::OutOfBounds(_)) => Err(Error::SectionTooShort),
@@ -256,7 +452,9 @@ impl CodestreamParser {
let skipped = if !box_parser.box_buffer.is_empty() {
box_parser.box_buffer.consume(to_skip)
} else {
- input.skip(to_skip)?
+ let skipped = input.skip(to_skip)?;
+ box_parser.mark_file_consumed(skipped);
+ skipped
};
box_parser.consume_codestream(skipped as u64);
self.ready_section_data += skipped;
@@ -295,25 +493,59 @@ impl CodestreamParser {
} else {
// Trying to read a frame or a file header.
assert!(self.frame.is_none());
- assert!(self.has_more_frames);
+ if !self.has_more_frames {
+ // If this is a flush request and the file is complete, we are done.
+ // Otherwise, this is an API usage error.
+ assert!(do_flush);
+ return Ok(());
+ }
+
+ // Capture frame-start metadata once before parsing the next
+ // frame header. We do this after `get_more_codestream()` so we
+ // are robust to the previous frame ending exactly at a box
+ // boundary (BoxNeeded -> CodestreamBox transition).
+ let mut capture_frame_start =
+ self.decoder_state.is_some() && self.frame_header.is_none();
// Loop to handle incremental parsing (e.g. large ICC profiles) that may need
// multiple buffer refills to complete.
loop {
let available_codestream = match box_parser.get_more_codestream(input) {
Err(Error::OutOfBounds(_)) => 0,
- Ok(c) => c as usize,
+ Ok(c) => c,
Err(e) => return Err(e),
};
+
+ if capture_frame_start {
+ // total_file_consumed counts bytes read/skipped from
+ // raw input. non_section_buf and box_buffer contain
+ // unread bytes already accounted there.
+ self.current_frame_file_offset = (box_parser.total_file_consumed as usize)
+ .saturating_sub(self.non_section_buf.len())
+ .saturating_sub(box_parser.box_buffer.len());
+
+ // `available_codestream` includes bytes still in
+ // box_buffer and not yet in non_section_buf.
+ self.current_frame_remaining_in_box = if available_codestream > u64::MAX / 2
+ {
+ u64::MAX
+ } else {
+ available_codestream.saturating_add(self.non_section_buf.len() as u64)
+ };
+ capture_frame_start = false;
+ }
+
let c = self.non_section_buf.refill(
|buf| {
if !box_parser.box_buffer.is_empty() {
Ok(box_parser.box_buffer.take(buf))
} else {
- input.read(buf)
+ let read = input.read(buf)?;
+ box_parser.mark_file_consumed(read);
+ Ok(read)
}
},
- Some(available_codestream),
+ Some(available_codestream as usize),
)? as u64;
box_parser.consume_codestream(c);
@@ -379,6 +611,11 @@ impl CodestreamParser {
}
}
+ // Record frame info for scanning (after preview check).
+ if !is_preview_frame {
+ self.record_frame_info();
+ }
+
if self.has_visible_frame() {
// Return to caller if we found visible frame info.
return Ok(());
@@ -390,4 +627,49 @@ impl CodestreamParser {
}
}
}
+
+ pub(super) fn update_default_output_color_profile(&mut self) {
+ // Only set default output_color_profile if not already configured by user
+ if self.output_color_profile_set_by_user {
+ return;
+ }
+
+ let embedded_color_profile = self.embedded_color_profile.as_ref().unwrap();
+ let pixel_format = self.pixel_format.as_ref().unwrap();
+
+ // Determine default output color profile following libjxl logic:
+ // - For XYB: use embedded if can_output_to(), else:
+ // - if float samples are requested: linear sRGB,
+ // - else: sRGB
+ // - For non-XYB: use embedded color profile
+ let output_color_profile = if self.xyb_encoded {
+ // Use embedded if we can output to it, otherwise fall back to sRGB
+ let base_encoding = if embedded_color_profile.can_output_to() {
+ match &embedded_color_profile {
+ JxlColorProfile::Simple(enc) => enc.clone(),
+ JxlColorProfile::Icc(_) => {
+ unreachable!("can_output_to returns false for ICC")
+ }
+ }
+ } else {
+ let data_format = pixel_format
+ .color_data_format
+ .unwrap_or(JxlDataFormat::U8 { bit_depth: 8 });
+ let is_float = matches!(
+ data_format,
+ JxlDataFormat::F32 { .. } | JxlDataFormat::F16 { .. }
+ );
+ if is_float {
+ JxlColorEncoding::linear_srgb(self.is_gray)
+ } else {
+ JxlColorEncoding::srgb(self.is_gray)
+ }
+ };
+
+ JxlColorProfile::Simple(base_encoding)
+ } else {
+ embedded_color_profile.clone()
+ };
+ self.output_color_profile = Some(output_color_profile);
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
index 342fd6729f7a5..48ec9e6b9480f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs
@@ -117,6 +117,7 @@ impl CodestreamParser {
if self.decoder_state.is_none() && self.embedded_color_profile.is_none() {
let file_header = self.file_header.as_ref().unwrap();
+
// Parse (or extract from file header) the ICC profile.
let mut br = BitReader::new(&self.non_section_buf);
br.skip_bits(self.non_section_bit_offset as usize)?;
@@ -147,50 +148,17 @@ impl CodestreamParser {
&file_header.image_metadata.color_encoding,
)?)
};
- // Determine default output color profile following libjxl logic:
- // - For XYB: use embedded if can_output_to(), else linear sRGB fallback
- // - For non-XYB: use embedded color profile
- let output_color_profile = if file_header.image_metadata.xyb_encoded {
- let is_gray =
- file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray;
-
- // Use embedded if we can output to it, otherwise fall back to linear sRGB
- let base_encoding = if embedded_color_profile.can_output_to() {
- match &embedded_color_profile {
- JxlColorProfile::Simple(enc) => enc.clone(),
- JxlColorProfile::Icc(_) => {
- unreachable!("can_output_to returns false for ICC")
- }
- }
- } else {
- JxlColorEncoding::linear_srgb(is_gray)
- };
-
- JxlColorProfile::Simple(base_encoding)
- } else {
- embedded_color_profile.clone()
- };
self.embedded_color_profile = Some(embedded_color_profile.clone());
- // Only set default output_color_profile if not already configured by user
- if self.output_color_profile.is_none() {
- self.output_color_profile = Some(output_color_profile);
- } else {
- // Validate user's output color profile choice (libjxl compatibility)
- // For non-XYB without CMS: only same encoding as embedded is allowed
- let user_profile = self.output_color_profile.as_ref().unwrap();
- if !file_header.image_metadata.xyb_encoded
- && decode_options.cms.is_none()
- && *user_profile != embedded_color_profile
- {
- return Err(Error::NonXybOutputNoCMS);
- }
- }
+
+ let xyb_encoded = file_header.image_metadata.xyb_encoded;
+ let is_gray = file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray;
+ self.xyb_encoded = xyb_encoded;
+ self.is_gray = is_gray;
+
// Only set default pixel_format if not already configured (e.g. via rewind)
if self.pixel_format.is_none() {
self.pixel_format = Some(JxlPixelFormat {
- color_type: if file_header.image_metadata.color_encoding.color_space
- == ColorSpace::Gray
- {
+ color_type: if is_gray {
JxlColorType::Grayscale
} else {
JxlColorType::Rgb
@@ -207,6 +175,19 @@ impl CodestreamParser {
});
}
+ if let Some(user_profile) = &self.output_color_profile {
+ // Validate user's output color profile choice (libjxl compatibility)
+ // For non-XYB without CMS: only same encoding as embedded is allowed
+ if !xyb_encoded
+ && decode_options.cms.is_none()
+ && *user_profile != embedded_color_profile
+ {
+ return Err(Error::NonXybOutputNoCMS);
+ }
+ } else {
+ self.update_default_output_color_profile();
+ }
+
let mut br = BitReader::new(&self.non_section_buf);
br.skip_bits(self.non_section_bit_offset as usize)?;
br.jump_to_byte_boundary()?;
@@ -298,7 +279,7 @@ impl CodestreamParser {
// Save file_header before creating frame (for preview frame recovery)
self.saved_file_header = self.decoder_state.as_ref().map(|ds| ds.file_header.clone());
- let frame = Frame::from_header_and_toc(
+ let mut frame = Frame::from_header_and_toc(
self.frame_header.take().unwrap(),
toc,
self.decoder_state.take().unwrap(),
@@ -360,6 +341,17 @@ impl CodestreamParser {
self.section_state =
SectionState::new(frame.header().num_lf_groups(), frame.header().num_groups());
+ frame.prepare_render_pipeline(
+ self.pixel_format.as_ref().unwrap(),
+ decode_options.cms.as_deref(),
+ self.embedded_color_profile
+ .as_ref()
+ .expect("embedded_color_profile should be set before pipeline preparation"),
+ self.output_color_profile
+ .as_ref()
+ .expect("output_color_profile should be set before pipeline preparation"),
+ )?;
+
self.frame = Some(frame);
Ok(())
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
index ffdb3588b7334..4fc5278c5fc15 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs
@@ -8,15 +8,18 @@ use crate::{
bit_reader::BitReader,
error::Result,
frame::Section,
+ headers::frame_header::{Encoding, FrameType},
};
use super::CodestreamParser;
+#[derive(Debug)]
pub(super) struct SectionState {
lf_global_done: bool,
remaining_lf: usize,
hf_global_done: bool,
completed_passes: Vec<u8>,
+ lf_global_flush_len: usize,
}
impl SectionState {
@@ -26,6 +29,7 @@ impl SectionState {
remaining_lf: num_lf_groups,
hf_global_done: false,
completed_passes: vec![0; num_groups],
+ lf_global_flush_len: 0,
}
}
@@ -41,8 +45,15 @@ impl CodestreamParser {
&mut self,
decode_options: &JxlDecoderOptions,
output_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>,
+ do_flush: bool,
) -> Result<Option<usize>> {
let frame = self.frame.as_mut().unwrap();
+
+ let output_profile = self
+ .output_color_profile
+ .as_ref()
+ .expect("output_color_profile should be set before pipeline preparation");
+
let frame_header = frame.header();
// Dequeue ready sections.
@@ -72,40 +83,81 @@ impl CodestreamParser {
}
let mut processed_section = false;
+ let mut called_render_hf = false;
let pixel_format = self.pixel_format.as_ref().unwrap();
+
+ let complete_lf_global;
+ let (lf_global, lf_global_is_complete) = if let Some(d) = self.lf_global_section.take() {
+ complete_lf_global = d;
+ (
+ Some(&complete_lf_global.data[..complete_lf_global.len]),
+ true,
+ )
+ } else if do_flush
+ && self
+ .sections
+ .front()
+ .is_some_and(|s| s.section == Section::LfGlobal)
+ && 2 * self.ready_section_data > 3 * self.section_state.lf_global_flush_len
+ && frame_header.encoding == Encoding::Modular
+ && matches!(
+ frame_header.frame_type,
+ FrameType::RegularFrame | FrameType::LFFrame
+ )
+ {
+ self.section_state.lf_global_flush_len = self.ready_section_data;
+ (
+ Some(&self.sections[0].data[..self.ready_section_data]),
+ false,
+ )
+ } else {
+ (None, false)
+ };
+
'process: {
if frame_header.num_groups() == 1 && frame_header.passes.num_passes == 1 {
// Single-group special case.
- let Some(sec) = self.lf_global_section.take() else {
+ let Some(buf) = lf_global else {
break 'process;
};
- assert!(self.sections.is_empty());
- let mut br = BitReader::new(&sec.data);
- frame.decode_lf_global(&mut br)?;
- frame.decode_lf_group(0, &mut br)?;
- frame.decode_hf_global(&mut br)?;
- frame.prepare_render_pipeline(
- self.pixel_format.as_ref().unwrap(),
- decode_options.cms.as_deref(),
- self.embedded_color_profile
- .as_ref()
- .expect("embedded_color_profile should be set before pipeline preparation"),
- self.output_color_profile
- .as_ref()
- .expect("output_color_profile should be set before pipeline preparation"),
- )?;
- frame.finalize_lf()?;
- frame.decode_and_render_hf_groups(
- output_buffers,
- pixel_format,
- vec![(0, vec![(0, br)])],
- )?;
- processed_section = true;
+ assert!(self.sections.is_empty() || !lf_global_is_complete);
+ let mut br = BitReader::new(buf);
+ let res = (|| -> Result<()> {
+ frame.decode_lf_global(&mut br, !lf_global_is_complete)?;
+ frame.decode_lf_group(0, &mut br)?;
+ frame.decode_hf_global(&mut br)?;
+ frame.finalize_lf()?;
+ frame.decode_and_render_hf_groups(
+ output_buffers,
+ pixel_format,
+ vec![(0, vec![(0, br)])],
+ do_flush,
+ output_profile,
+ )?;
+ called_render_hf = true;
+ Ok(())
+ })();
+ match res {
+ Ok(_) => {
+ processed_section = true;
+ }
+ Err(_) if !lf_global_is_complete => {
+ // Ignore errors if we are doing partial parsing.
+ }
+ Err(e) => return Err(e),
+ }
} else {
- if let Some(lf_global) = self.lf_global_section.take() {
- frame.decode_lf_global(&mut BitReader::new(&lf_global.data))?;
- self.section_state.lf_global_done = true;
- processed_section = true;
+ if let Some(buf) = lf_global {
+ match frame.decode_lf_global(&mut BitReader::new(buf), !lf_global_is_complete) {
+ Ok(_) => {
+ self.section_state.lf_global_done = true;
+ processed_section = true;
+ }
+ Err(_) if !lf_global_is_complete => {
+ // Ignore errors if we are doing partial parsing.
+ }
+ Err(e) => return Err(e),
+ }
}
if !self.section_state.lf_global_done {
@@ -127,16 +179,6 @@ impl CodestreamParser {
if let Some(hf_global) = self.hf_global_section.take() {
frame.decode_hf_global(&mut BitReader::new(&hf_global.data))?;
- frame.prepare_render_pipeline(
- self.pixel_format.as_ref().unwrap(),
- decode_options.cms.as_deref(),
- self.embedded_color_profile.as_ref().expect(
- "embedded_color_profile should be set before pipeline preparation",
- ),
- self.output_color_profile.as_ref().expect(
- "output_color_profile should be set before pipeline preparation",
- ),
- )?;
frame.finalize_lf()?;
self.section_state.hf_global_done = true;
processed_section = true;
@@ -184,7 +226,14 @@ impl CodestreamParser {
self.candidate_hf_sections.clear();
}
- frame.decode_and_render_hf_groups(output_buffers, pixel_format, group_readers)?;
+ frame.decode_and_render_hf_groups(
+ output_buffers,
+ pixel_format,
+ group_readers,
+ do_flush,
+ output_profile,
+ )?;
+ called_render_hf = true;
for g in processed_groups.into_iter() {
for i in 0..self.section_state.completed_passes[g] {
@@ -195,6 +244,16 @@ impl CodestreamParser {
}
}
+ if do_flush && !called_render_hf && frame.can_do_early_rendering() {
+ frame.decode_and_render_hf_groups(
+ output_buffers,
+ pixel_format,
+ vec![],
+ do_flush,
+ output_profile,
+ )?;
+ }
+
if !processed_section {
let data_for_next_section =
self.sections.front().unwrap().len - self.ready_section_data;
@@ -230,7 +289,6 @@ impl CodestreamParser {
if let Some(fh) = self.saved_file_header.take() {
let mut new_state = crate::frame::DecoderState::new(fh);
new_state.render_spotcolors = decode_options.render_spot_colors;
- new_state.enable_output = decode_options.enable_output;
self.decoder_state = Some(new_state);
}
} else {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
index 44aa57ce701d8..ac8dd81cb6600 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs
@@ -6,11 +6,12 @@
#[cfg(test)]
use crate::api::FrameCallback;
use crate::{
- api::JxlFrameHeader,
+ api::{JxlFrameHeader, VisibleFrameInfo},
error::{Error, Result},
};
use super::{JxlBasicInfo, JxlColorProfile, JxlDecoderOptions, JxlPixelFormat};
+use crate::container::frame_index::FrameIndexBox;
use box_parser::BoxParser;
use codestream_parser::CodestreamParser;
@@ -67,6 +68,7 @@ impl JxlDecoderInner {
return Err(Error::ICCOutputNoCMS);
}
self.codestream_parser.output_color_profile = Some(profile);
+ self.codestream_parser.output_color_profile_set_by_user = true;
Ok(())
}
@@ -75,7 +77,10 @@ impl JxlDecoderInner {
}
pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) {
+ // TODO(veluca): return an error if we are asking for both planar and
+ // interleaved-in-color alpha.
self.codestream_parser.pixel_format = Some(pixel_format);
+ self.codestream_parser.update_default_output_color_profile();
}
pub fn frame_header(&self) -> Option<JxlFrameHeader> {
@@ -131,6 +136,35 @@ impl JxlDecoderInner {
self.codestream_parser.has_more_frames
}
+ /// Returns the parsed frame index box, if the file contained one.
+ pub fn frame_index(&self) -> Option<&FrameIndexBox> {
+ self.box_parser.frame_index.as_ref()
+ }
+
+ /// Returns visible frame info entries collected during parsing.
+ pub fn scanned_frames(&self) -> &[VisibleFrameInfo] {
+ &self.codestream_parser.scanned_frames
+ }
+
+ /// Resets frame-level state to prepare for decoding a new frame.
+ ///
+ /// Preserves image-level state (file header, decoder state including
+ /// reference frames, color profiles, pixel format). Clears frame header,
+ /// TOC, section buffers, and restores the box parser to the correct
+ /// state so the next `process()` call parses a new frame header.
+ ///
+ /// `remaining_in_box` comes from
+ /// `VisibleFrameInfo::seek_target.remaining_in_box` and tells the box
+ /// parser how many codestream bytes remain in the current container box at
+ /// the target position. For bare-codestream files this is `u64::MAX`.
+ ///
+ /// The caller must provide raw file input starting from the target
+ /// frame's `seek_target.decode_start_file_offset`.
+ pub fn start_new_frame(&mut self, remaining_in_box: u64) {
+ self.box_parser.reset_for_codestream_seek(remaining_in_box);
+ self.codestream_parser.start_new_frame();
+ }
+
#[cfg(test)]
pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) {
self.codestream_parser.set_use_simple_pipeline(u);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
index 50e6fe338613c..ecdb966d757f6 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs
@@ -127,11 +127,23 @@ impl JxlDecoderInner {
input,
&self.options,
buffers,
+ false,
))
}
/// Draws all the pixels we have data for.
- pub fn flush_pixels(&mut self, _buffers: &mut [JxlOutputBuffer]) -> Result<()> {
- todo!()
+ pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer]) -> Result<()> {
+ let mut input: &[u8] = &[];
+ match self.codestream_parser.process(
+ &mut self.box_parser,
+ &mut input,
+ &self.options,
+ Some(buffers),
+ true,
+ ) {
+ Ok(()) => Ok(()),
+ Err(crate::error::Error::OutOfBounds(_)) => Ok(()),
+ Err(e) => Err(e),
+ }
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
index 5be3ef129622e..18c4b430f8dab 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs
@@ -12,6 +12,7 @@ mod inner;
mod input;
mod options;
mod signature;
+mod xyb_constants;
pub use crate::image::JxlOutputBuffer;
pub use color::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
index 2bff60cda75ba..327a4456dbe70 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs
@@ -22,7 +22,6 @@ pub struct JxlDecoderOptions {
pub desired_intensity_target: Option<f32>,
pub skip_preview: bool,
pub progressive_mode: JxlProgressiveMode,
- pub enable_output: bool,
pub cms: Option<Box<dyn JxlCms>>,
/// Fail decoding images with more than this number of pixels, or with frames with
/// more than this number of pixels. The limit counts the product of pixels and
@@ -40,6 +39,11 @@ pub struct JxlDecoderOptions {
/// This produces premultiplied alpha output, which is useful for compositing.
/// Default: false (output straight alpha)
pub premultiply_output: bool,
+ /// If true, only parse frame headers/TOC and skip section decoding.
+ ///
+ /// This is useful for collecting [`VisibleFrameInfo`](crate::api::VisibleFrameInfo)
+ /// via the regular decoder API without producing pixels.
+ pub scan_frames_only: bool,
}
impl Default for JxlDecoderOptions {
@@ -51,11 +55,11 @@ impl Default for JxlDecoderOptions {
skip_preview: true,
desired_intensity_target: None,
progressive_mode: JxlProgressiveMode::Pass,
- enable_output: true,
cms: None,
pixel_limit: None,
high_precision: false,
premultiply_output: false,
+ scan_frames_only: false,
}
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs
new file mode 100644
index 0000000000000..eb9356b228a56
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//! XYB color space constants (matching libjxl)
+//!
+//! Allow excessive precision as these constants are copied verbatim from libjxl for compatibility
+
+#![allow(clippy::excessive_precision)]
+
+pub const OPSIN_ABSORBANCE_BIAS: f32 = 0.0037930732552754493;
+
+#[allow(dead_code)]
+pub const NEG_OPSIN_ABSORBANCE_BIAS_RGB: [f32; 3] = [
+ -OPSIN_ABSORBANCE_BIAS,
+ -OPSIN_ABSORBANCE_BIAS,
+ -OPSIN_ABSORBANCE_BIAS,
+];
+
+const SCALED_XYB_OFFSET: [f32; 3] = [0.015386134, 0.0, 0.27770459];
+const SCALED_XYB_SCALE: [f32; 3] = [22.995788804, 1.183000077, 1.502141333];
+
+const fn reciprocal_sum(r1: f32, r2: f32) -> f32 {
+ (r1 * r2) / (r1 + r2)
+}
+
+pub const XYB_OFFSET: [f32; 3] = [
+ SCALED_XYB_OFFSET[0] + SCALED_XYB_OFFSET[1],
+ SCALED_XYB_OFFSET[1] - SCALED_XYB_OFFSET[0] + (1.0 / SCALED_XYB_SCALE[0]),
+ SCALED_XYB_OFFSET[1] + SCALED_XYB_OFFSET[2],
+];
+
+pub const fn xyb_scale() -> [f32; 3] {
+ [
+ reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]),
+ reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]),
+ reciprocal_sum(SCALED_XYB_SCALE[1], SCALED_XYB_SCALE[2]),
+ ]
+}
+
+const fn xyb_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 {
+ let val = match idx {
+ 0 => x,
+ 1 => y,
+ _ => b,
+ };
+ (val as f32 / SCALED_XYB_SCALE[idx]) - SCALED_XYB_OFFSET[idx]
+}
+
+const fn scaled_a2b_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 {
+ match idx {
+ 0 => xyb_corner(x, y, b, 1) + xyb_corner(x, y, b, 0),
+ 1 => xyb_corner(x, y, b, 1) - xyb_corner(x, y, b, 0),
+ _ => xyb_corner(x, y, b, 2) + xyb_corner(x, y, b, 1),
+ }
+}
+
+const fn unscaled_a2b_corner(x: usize, y: usize, b: usize) -> [f32; 3] {
+ let scale = xyb_scale();
+ [
+ (scaled_a2b_corner(x, y, b, 0) + XYB_OFFSET[0]) * scale[0],
+ (scaled_a2b_corner(x, y, b, 1) + XYB_OFFSET[1]) * scale[1],
+ (scaled_a2b_corner(x, y, b, 2) + XYB_OFFSET[2]) * scale[2],
+ ]
+}
+
+/// Compute the 2x2x2 CLUT cube for XYB to linear RGB conversion.
+pub const fn unscaled_a2b_cube_full() -> [[[[f32; 3]; 2]; 2]; 2] {
+ [
+ [
+ [unscaled_a2b_corner(0, 0, 0), unscaled_a2b_corner(0, 0, 1)],
+ [unscaled_a2b_corner(0, 1, 0), unscaled_a2b_corner(0, 1, 1)],
+ ],
+ [
+ [unscaled_a2b_corner(1, 0, 0), unscaled_a2b_corner(1, 0, 1)],
+ [unscaled_a2b_corner(1, 1, 0), unscaled_a2b_corner(1, 1, 1)],
+ ],
+ ]
+}
+
+/// Matrix for XYB ICC profile (from libjxl).
+pub const XYB_ICC_MATRIX: [f64; 9] = [
+ 1.5170095, -1.1065225, 0.071623, -0.050022, 0.5683655, -0.018344, -1.387676, 1.1145555,
+ 0.6857255,
+];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs
new file mode 100644
index 0000000000000..8a81b02864676
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs
@@ -0,0 +1,236 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//! Parser for the JPEG XL Frame Index box (`jxli`), as specified in
+//! the JPEG XL container specification.
+//!
+//! The frame index box provides a seek table for animated JXL files,
+//! listing keyframe byte offsets in the codestream, timestamps, and
+//! frame counts.
+
+use std::num::NonZero;
+
+use byteorder::{BigEndian, ReadBytesExt};
+
+use crate::error::{Error, Result};
+use crate::icc::read_varint_from_reader;
+use crate::util::NewWithCapacity;
+
+/// A single entry in the frame index.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FrameIndexEntry {
+ /// Absolute byte offset of this keyframe in the codestream.
+ /// (Accumulated from the delta-coded OFFi values.)
+ pub codestream_offset: u64,
+ /// Duration in ticks from this indexed frame to the next indexed frame
+ /// (or end of stream for the last entry). A tick lasts TNUM/TDEN seconds.
+ pub duration_ticks: u64,
+ /// Number of displayed frames from this indexed frame to the next indexed
+ /// frame (or end of stream for the last entry).
+ pub frame_count: u64,
+}
+
+/// Parsed contents of a Frame Index box (`jxli`).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FrameIndexBox {
+ /// Tick numerator. A tick lasts `tnum / tden` seconds.
+ pub tnum: u32,
+ /// Tick denominator (non-zero per spec).
+ pub tden: NonZero<u32>,
+ /// Indexed frame entries.
+ pub entries: Vec<FrameIndexEntry>,
+}
+
+impl FrameIndexBox {
+ /// Returns the number of indexed frames.
+ pub fn num_frames(&self) -> usize {
+ self.entries.len()
+ }
+
+ /// Returns the duration of one tick in seconds.
+ pub fn tick_duration_secs(&self) -> f64 {
+ self.tnum as f64 / self.tden.get() as f64
+ }
+
+ /// Finds the index entry for the keyframe at or before the given
+ /// codestream byte offset.
+ pub fn entry_for_offset(&self, offset: u64) -> Option<&FrameIndexEntry> {
+ // Entries are sorted by codestream_offset (monotonically increasing).
+ match self
+ .entries
+ .binary_search_by_key(&offset, |e| e.codestream_offset)
+ {
+ Ok(i) => Some(&self.entries[i]),
+ Err(0) => None,
+ Err(i) => Some(&self.entries[i - 1]),
+ }
+ }
+
+ /// Parse a frame index box from its raw content bytes (after the box header).
+ pub fn parse(data: &[u8]) -> Result<Self> {
+ let mut reader = data;
+
+ let nf = read_varint_from_reader(&mut reader)?;
+ if nf > u32::MAX as u64 {
+ return Err(Error::InvalidBox);
+ }
+ let nf = nf as usize;
+
+ let tnum = reader
+ .read_u32::<BigEndian>()
+ .map_err(|_| Error::InvalidBox)?;
+ let tden = NonZero::new(
+ reader
+ .read_u32::<BigEndian>()
+ .map_err(|_| Error::InvalidBox)?,
+ )
+ .ok_or(Error::InvalidBox)?;
+
+ // Each entry requires at least 3 bytes (three varints, min 1 byte each).
+ // Cap the pre-allocation to avoid OOM from a crafted NF value.
+ // Use new_with_capacity to return Err on allocation failure instead of aborting.
+ let mut entries = Vec::new_with_capacity(nf.min(reader.len() / 3))?;
+ let mut absolute_offset: u64 = 0;
+
+ for _ in 0..nf {
+ let off_delta = read_varint_from_reader(&mut reader)?;
+ let duration_ticks = read_varint_from_reader(&mut reader)?;
+ let frame_count = read_varint_from_reader(&mut reader)?;
+
+ absolute_offset = absolute_offset
+ .checked_add(off_delta)
+ .ok_or(Error::InvalidBox)?;
+
+ entries.push(FrameIndexEntry {
+ codestream_offset: absolute_offset,
+ duration_ticks,
+ frame_count,
+ });
+ }
+
+ Ok(FrameIndexBox {
+ tnum,
+ tden,
+ entries,
+ })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::util::test::{build_frame_index_content, encode_varint};
+
+ fn build_frame_index(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> {
+ build_frame_index_content(tnum, tden, entries)
+ }
+
+ #[test]
+ fn test_parse_empty_index() {
+ let data = build_frame_index(1, 1000, &[]);
+ let index = FrameIndexBox::parse(&data).unwrap();
+ assert_eq!(index.num_frames(), 0);
+ assert_eq!(index.tnum, 1);
+ assert_eq!(index.tden.get(), 1000);
+ }
+
+ #[test]
+ fn test_parse_single_entry() {
+ // One frame at offset 0, duration 100 ticks, 1 frame
+ let data = build_frame_index(1, 1000, &[(0, 100, 1)]);
+ let index = FrameIndexBox::parse(&data).unwrap();
+ assert_eq!(index.num_frames(), 1);
+ assert_eq!(
+ index.entries[0],
+ FrameIndexEntry {
+ codestream_offset: 0,
+ duration_ticks: 100,
+ frame_count: 1,
+ }
+ );
+ }
+
+ #[test]
+ fn test_parse_multiple_entries_delta_coding() {
+ // Three frames with delta-coded offsets:
+ // OFF0=100 (absolute: 100), T0=50, F0=2
+ // OFF1=200 (absolute: 300), T1=50, F1=2
+ // OFF2=150 (absolute: 450), T2=30, F2=1
+ let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]);
+ let index = FrameIndexBox::parse(&data).unwrap();
+ assert_eq!(index.num_frames(), 3);
+ assert_eq!(index.entries[0].codestream_offset, 100);
+ assert_eq!(index.entries[1].codestream_offset, 300);
+ assert_eq!(index.entries[2].codestream_offset, 450);
+ assert_eq!(index.entries[0].duration_ticks, 50);
+ assert_eq!(index.entries[1].duration_ticks, 50);
+ assert_eq!(index.entries[2].duration_ticks, 30);
+ }
+
+ #[test]
+ fn test_parse_large_varint() {
+ // Test with a value that requires multiple varint bytes
+ let mut data = Vec::new();
+ data.extend(encode_varint(1)); // NF = 1
+ data.extend(1u32.to_be_bytes()); // TNUM
+ data.extend(1000u32.to_be_bytes()); // TDEN
+ data.extend(encode_varint(0x1234_5678_9ABC)); // large offset
+ data.extend(encode_varint(42));
+ data.extend(encode_varint(1));
+ let index = FrameIndexBox::parse(&data).unwrap();
+ assert_eq!(index.entries[0].codestream_offset, 0x1234_5678_9ABC);
+ }
+
+ #[test]
+ fn test_entry_for_offset() {
+ let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]);
+ let index = FrameIndexBox::parse(&data).unwrap();
+ // Absolute offsets: 100, 300, 450
+
+ // Before first entry
+ assert!(index.entry_for_offset(50).is_none());
+ // Exact match
+ assert_eq!(index.entry_for_offset(100).unwrap().codestream_offset, 100);
+ // Between entries
+ assert_eq!(index.entry_for_offset(200).unwrap().codestream_offset, 100);
+ assert_eq!(index.entry_for_offset(350).unwrap().codestream_offset, 300);
+ // Exact match on last
+ assert_eq!(index.entry_for_offset(450).unwrap().codestream_offset, 450);
+ // Past last
+ assert_eq!(index.entry_for_offset(999).unwrap().codestream_offset, 450);
+ }
+
+ #[test]
+ fn test_zero_tden_rejected() {
+ let data = build_frame_index(1, 0, &[]);
+ assert!(FrameIndexBox::parse(&data).is_err());
+ }
+
+ #[test]
+ fn test_truncated_data() {
+ // Just NF=1, no TNUM/TDEN
+ let data = encode_varint(1);
+ assert!(FrameIndexBox::parse(&data).is_err());
+ }
+
+ #[test]
+ fn test_huge_nf_no_oom() {
+ // Crafted input: NF claims billions of entries but the data is tiny.
+ // This must not OOM -- Vec::with_capacity should be bounded by data length.
+ let mut data = Vec::new();
+ data.extend(encode_varint(u32::MAX as u64)); // NF = 4 billion
+ data.extend(1u32.to_be_bytes()); // TNUM
+ data.extend(1000u32.to_be_bytes()); // TDEN
+ // No actual entry data -- parse should fail gracefully, not OOM.
+ assert!(FrameIndexBox::parse(&data).is_err());
+ }
+
+ #[test]
+ fn test_tick_duration() {
+ let data = build_frame_index(1, 1000, &[]);
+ let index = FrameIndexBox::parse(&data).unwrap();
+ assert!((index.tick_duration_secs() - 0.001).abs() < 1e-9);
+ }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
index c6e9e505076b6..65f91b5e57ae0 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs
@@ -6,6 +6,7 @@
// Originally written for jxl-oxide.
pub mod box_header;
+pub mod frame_index;
pub mod parse;
use box_header::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
index c6b95d1a8ef1f..9da5cbee0388f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs
@@ -15,6 +15,11 @@ const RLE_MARKER_SYM: u16 = LOG_SUM_PROBS as u16 + 1;
#[derive(Debug)]
struct AnsHistogram {
+ // Safety invariant:
+ // - log_bucket_size <= LOG_SUM_PROBS
+ // - buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size)
+ // This relationship ensures that for any ANS state (12 bits), the bucket index
+ // computed as (state & 0xfff) >> log_bucket_size is always < buckets.len()
buckets: Vec<Bucket>,
log_bucket_size: usize,
bucket_mask: u32,
@@ -265,7 +270,7 @@ impl AnsHistogram {
debug_assert!((5..=8).contains(&log_alpha_size));
let table_size = (1u16 << log_alpha_size) as usize;
// 4 <= log_bucket_size <= 7
- let log_bucket_size = LOG_SUM_PROBS - log_alpha_size;
+ let log_bucket_size = LOG_SUM_PROBS.checked_sub(log_alpha_size).unwrap();
let bucket_size = 1u16 << log_bucket_size;
let bucket_mask = bucket_size as u32 - 1;
@@ -281,10 +286,9 @@ impl AnsHistogram {
} else {
Self::decode_dist_complex(br, &mut dist)?
};
-
- if let Some(single_sym_idx) = dist.iter().position(|&d| d == SUM_PROBS) {
- let buckets = dist
- .into_iter()
+ let single_symbol = dist.iter().position(|&d| d == SUM_PROBS).map(|x| x as u32);
+ let buckets = if let Some(single_sym_idx) = single_symbol {
+ dist.into_iter()
.enumerate()
.map(|(i, dist)| Bucket {
dist,
@@ -293,20 +297,19 @@ impl AnsHistogram {
alias_cutoff: 0,
alias_dist_xor: dist ^ SUM_PROBS,
})
- .collect();
- return Ok(Self {
- buckets,
- log_bucket_size,
- bucket_mask,
- single_symbol: Some(single_sym_idx as u32),
- });
- }
+ .collect()
+ } else {
+ Self::build_alias_map(alphabet_size, log_bucket_size, &dist)
+ };
+ assert_eq!(buckets.len(), 1 << (LOG_SUM_PROBS - log_bucket_size));
+ // Safety note: log_bucket_size <= LOG_SUM_PROBS by construction, and we
+ // just checked that buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size)
Ok(Self {
- buckets: Self::build_alias_map(alphabet_size, log_bucket_size, &dist),
+ buckets,
log_bucket_size,
bucket_mask,
- single_symbol: None,
+ single_symbol,
})
}
@@ -356,7 +359,19 @@ impl AnsHistogram {
let pos = idx & self.bucket_mask;
debug_assert!(self.buckets.len().is_power_of_two());
- let bucket = self.buckets[i & (self.buckets.len() - 1)];
+ debug_assert!(
+ i < self.buckets.len(),
+ "bucket index {} out of bounds (len = {})",
+ i,
+ self.buckets.len()
+ );
+ // SAFETY: The struct-level safety invariant (see AnsHistogram::buckets) ensures that
+ // buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size). Since idx = state & 0xfff
+ // (12 bits) and i = idx >> log_bucket_size, we have i < buckets.len() always.
+ #[allow(unsafe_code)]
+ let bucket = unsafe { *self.buckets.get_unchecked(i) };
+ // Safe version: (~3% slower for e2 lossless decoding)
+ // let bucket = self.buckets[i & (self.buckets.len() - 1)];
let alias_symbol = bucket.alias_symbol as u32;
let alias_cutoff = bucket.alias_cutoff as u32;
let dist = bucket.dist as u32;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
index dd23c6247e226..e57a3211aafea 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs
@@ -12,6 +12,7 @@ use crate::entropy_coding::huffman::*;
use crate::entropy_coding::hybrid_uint::*;
use crate::error::{Error, Result};
use crate::headers::encodings::*;
+use crate::util::NewWithCapacity;
use crate::util::tracing_wrappers::*;
pub fn decode_varint16(br: &mut BitReader) -> Result<u16> {
@@ -259,7 +260,7 @@ impl SymbolReader {
min_symbol,
min_length,
dist_multiplier,
- window: Vec::new(),
+ window: Vec::new_with_capacity(1 << Lz77State::LOG_WINDOW_SIZE)?,
num_to_copy: 0,
copy_pos: 0,
num_decoded: 0,
@@ -278,30 +279,50 @@ impl SymbolReader {
}
impl SymbolReader {
- #[inline]
- pub fn read_unsigned(
+ #[inline(always)]
+ pub fn read_unsigned_inline(
&mut self,
histograms: &Histograms,
br: &mut BitReader,
context: usize,
) -> u32 {
let cluster = histograms.map_context_to_cluster(context);
- self.read_unsigned_clustered(histograms, br, cluster)
+ self.read_unsigned_clustered_inline(histograms, br, cluster)
+ }
+
+ #[inline(never)]
+ pub fn read_unsigned(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ context: usize,
+ ) -> u32 {
+ self.read_unsigned_inline(histograms, br, context)
}
#[inline(always)]
- pub fn read_signed(
+ pub fn read_signed_inline(
&mut self,
histograms: &Histograms,
br: &mut BitReader,
context: usize,
) -> i32 {
- let unsigned = self.read_unsigned(histograms, br, context);
+ let unsigned = self.read_unsigned_inline(histograms, br, context);
unpack_signed(unsigned)
}
- #[inline]
- pub fn read_unsigned_clustered(
+ #[inline(never)]
+ pub fn read_signed(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ context: usize,
+ ) -> i32 {
+ self.read_signed_inline(histograms, br, context)
+ }
+
+ #[inline(always)]
+ pub fn read_unsigned_clustered_inline(
&mut self,
histograms: &Histograms,
br: &mut BitReader,
@@ -382,14 +403,69 @@ impl SymbolReader {
}
}
+ #[inline(never)]
+ pub fn read_unsigned_clustered(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ cluster: usize,
+ ) -> u32 {
+ self.read_unsigned_clustered_inline(histograms, br, cluster)
+ }
+
#[inline(always)]
+ pub fn read_signed_clustered_inline(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ cluster: usize,
+ ) -> i32 {
+ let unsigned = self.read_unsigned_clustered_inline(histograms, br, cluster);
+ unpack_signed(unsigned)
+ }
+
+ #[inline(never)]
pub fn read_signed_clustered(
&mut self,
histograms: &Histograms,
br: &mut BitReader,
cluster: usize,
) -> i32 {
- let unsigned = self.read_unsigned_clustered(histograms, br, cluster);
+ self.read_signed_clustered_inline(histograms, br, cluster)
+ }
+
+ /// Specialized fast path for when all HybridUint configs are 420.
+ ///
+ /// # Preconditions
+ /// - `histograms.can_use_config_420_fast_path()` must be true (no LZ77, all configs are 420)
+ /// - This assumes `SymbolReaderState::None` (verified by debug_assert)
+ #[inline(always)]
+ pub fn read_unsigned_clustered_config_420(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ cluster: usize,
+ ) -> u32 {
+ debug_assert!(matches!(self.state, SymbolReaderState::None));
+ debug_assert!(histograms.can_use_config_420_fast_path());
+
+ let token = match &histograms.codes {
+ Codes::Huffman(hc) => hc.read(br, cluster),
+ Codes::Ans(ans) => self.ans_reader.read(ans, br, cluster),
+ };
+ HybridUint::read_config_420(token, br)
+ }
+
+ /// Specialized fast path for signed reads when all configs are 420.
+ /// See [`read_unsigned_clustered_config_420`] for preconditions.
+ #[inline(always)]
+ pub fn read_signed_clustered_config_420(
+ &mut self,
+ histograms: &Histograms,
+ br: &mut BitReader,
+ cluster: usize,
+ ) -> i32 {
+ let unsigned = self.read_unsigned_clustered_config_420(histograms, br, cluster);
unpack_signed(unsigned)
}
@@ -553,6 +629,17 @@ impl Histograms {
pub fn num_histograms(&self) -> usize {
*self.context_map.iter().max().unwrap() as usize + 1
}
+
+ pub fn resize(&mut self, num_contexts: usize) {
+ self.context_map.resize(num_contexts, 0);
+ }
+
+ /// Returns true if the config 420 fast path can be safely used.
+ /// Config 420: split_exponent=4, msb_in_token=2, lsb_in_token=0 (common pattern)
+ /// Requires: all configs are 420 AND LZ77 is disabled
+ pub fn can_use_config_420_fast_path(&self) -> bool {
+ !self.lz77_params.enabled && self.uint_configs.iter().all(|cfg| cfg.is_config_420())
+ }
}
#[cfg(test)]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
index fc6e7f6db4fba..447bca94abefa 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs
@@ -53,6 +53,33 @@ impl HybridUint {
})
}
+ /// Returns true if this config matches the 420 pattern (common in e3 images):
+ /// split_exponent=4, msb_in_token=2, lsb_in_token=0
+ #[inline(always)]
+ pub fn is_config_420(&self) -> bool {
+ self.split_exponent == 4
+ && self.split_token == 16
+ && self.msb_in_token == 2
+ && self.lsb_in_token == 0
+ }
+
+ /// Specialized fast path for 420 config:
+ /// split_exponent=4, msb_in_token=2, lsb_in_token=0
+ #[inline(always)]
+ pub fn read_config_420(symbol: u32, br: &mut BitReader) -> u32 {
+ if symbol < 16 {
+ return symbol;
+ }
+
+ // Equivalent to: 2 + ((symbol - 16) >> 2)
+ let nbits = (symbol >> 2) - 2;
+ let nbits = nbits & 31;
+ let bits = br.read_optimistic(nbits as usize) as u32;
+ let hi = (symbol & 3) | 4;
+
+ (hi << nbits) | bits
+ }
+
#[inline]
pub fn read(&self, symbol: u32, br: &mut BitReader) -> u32 {
if symbol < self.split_token {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
index c21679bf8c844..a8e299c70f73e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs
@@ -133,16 +133,12 @@ pub enum Error {
// Generic arithmetic overflow. Prefer using other errors if possible.
#[error("Arithmetic overflow")]
ArithmeticOverflow,
- #[error("Empty frame sequence")]
- NoFrames,
#[error(
"Pipeline channel type mismatch: stage {0} channel {1}, expected {2:?} but found {3:?}"
)]
PipelineChannelTypeMismatch(String, usize, DataTypeTag, DataTypeTag),
#[error("Invalid stage {0} after extend stage")]
PipelineInvalidStageAfterExtend(String),
- #[error("Channel {0} was not used in the render pipeline")]
- PipelineChannelUnused(usize),
#[error("Trying to copy rects of different size, src: {0}x{1} dst {2}x{3}")]
CopyOfDifferentSize(usize, usize, usize, usize),
#[error("LF quantization factor is too small: {0}")]
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
index c28bf701bd2b2..55dcedc8153ad 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs
@@ -25,6 +25,15 @@ pub enum SigmaSource {
Constant(f32),
}
+#[allow(clippy::excessive_precision)]
+const INV_SIGMA_NUM: f32 = -1.1715728752538099024;
+
+impl Default for SigmaSource {
+ fn default() -> Self {
+ Self::Constant(INV_SIGMA_NUM / 2.0)
+ }
+}
+
impl SigmaSource {
pub fn new(
frame_header: &FrameHeader,
@@ -32,9 +41,6 @@ impl SigmaSource {
hf_meta: &Option<HfMetadata>,
) -> Result<Self> {
let rf = &frame_header.restoration_filter;
- #[allow(clippy::excessive_precision)]
- const INV_SIGMA_NUM: f32 = -1.1715728752538099024;
-
if frame_header.encoding == Encoding::VarDCT {
let size_blocks = frame_header.size_blocks();
let sigma_xsize = size_blocks.0;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
index b73a2d7306ec2..d981ef3d96bbd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs
@@ -172,6 +172,13 @@ pub struct PatchesDictionary {
}
impl PatchesDictionary {
+ pub fn new(num_extra_channels: usize) -> Self {
+ Self {
+ blendings_stride: num_extra_channels + 1,
+ ..Default::default()
+ }
+ }
+
#[cfg(test)]
pub fn random<R: rand::Rng>(
size: (usize, usize),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
index b4a19b95e49e4..3e947d6cf23a3 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs
@@ -14,8 +14,9 @@ use crate::{
entropy_coding::decode::{Histograms, SymbolReader, unpack_signed},
error::{Error, Result},
frame::color_correlation_map::ColorCorrelationParams,
- util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff, tracing_wrappers::*},
+ util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff_simd, tracing_wrappers::*},
};
+use jxl_simd::{F32SimdVec, ScalarDescriptor, SimdDescriptor, simd_function};
const MAX_NUM_CONTROL_POINTS: u32 = 1 << 20;
const MAX_NUM_CONTROL_POINTS_PER_PIXEL_RATIO: u32 = 2;
const DELTA_LIMIT: i64 = 1 << 30;
@@ -520,7 +521,111 @@ impl Dct32 {
}
}
+#[inline(always)]
+fn draw_segment_inner<D: SimdDescriptor>(
+ d: D,
+ row: &mut [&mut [f32]],
+ row_pos: (usize, usize),
+ x_range: (usize, usize),
+ segment: &SplineSegment,
+) -> usize {
+ let (x_start, x_end) = x_range;
+ let (row_x0, y) = row_pos;
+ let len = D::F32Vec::LEN;
+ if x_start + len > x_end {
+ return x_start;
+ }
+
+ let inv_sigma = D::F32Vec::splat(d, segment.inv_sigma);
+ let half = D::F32Vec::splat(d, 0.5);
+ let one_over_2s2 = D::F32Vec::splat(d, 0.353_553_38);
+ let sigma_over_4_times_intensity = D::F32Vec::splat(d, segment.sigma_over_4_times_intensity);
+ let center_x = D::F32Vec::splat(d, segment.center_x);
+ let center_y = D::F32Vec::splat(d, segment.center_y);
+ let dy = D::F32Vec::splat(d, y as f32) - center_y;
+ let dy2 = dy * dy;
+
+ let mut x_base_arr = [0.0f32; 16];
+ for (i, val) in x_base_arr.iter_mut().enumerate() {
+ *val = i as f32;
+ }
+ let vx_base = D::F32Vec::load(d, &x_base_arr);
+
+ let start_offset = x_start - row_x0;
+ let end_offset = x_end - row_x0;
+
+ let [r0, r1, r2] = row else { unreachable!() };
+
+ let mut it0 = r0[start_offset..end_offset].chunks_exact_mut(len);
+ let mut it1 = r1[start_offset..end_offset].chunks_exact_mut(len);
+ let mut it2 = r2[start_offset..end_offset].chunks_exact_mut(len);
+
+ let cm0 = D::F32Vec::splat(d, segment.color[0]);
+ let cm1 = D::F32Vec::splat(d, segment.color[1]);
+ let cm2 = D::F32Vec::splat(d, segment.color[2]);
+
+ let num_chunks = (end_offset - start_offset) / len;
+ let mut x = x_start;
+ for _ in 0..num_chunks {
+ let vx = D::F32Vec::splat(d, x as f32) + vx_base;
+ let dx = vx - center_x;
+ let sqd = dx.mul_add(dx, dy2);
+ let distance = sqd.sqrt();
+
+ let arg1 = distance.mul_add(half, one_over_2s2) * inv_sigma;
+ let arg2 = distance.mul_add(half, D::F32Vec::splat(d, -0.353_553_38)) * inv_sigma;
+ let one_dimensional_factor = fast_erff_simd(d, arg1) - fast_erff_simd(d, arg2);
+ let local_intensity =
+ sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor;
+
+ let c0 = it0.next().unwrap();
+ cm0.mul_add(local_intensity, D::F32Vec::load(d, c0))
+ .store(c0);
+ let c1 = it1.next().unwrap();
+ cm1.mul_add(local_intensity, D::F32Vec::load(d, c1))
+ .store(c1);
+ let c2 = it2.next().unwrap();
+ cm2.mul_add(local_intensity, D::F32Vec::load(d, c2))
+ .store(c2);
+
+ x += len;
+ }
+ x
+}
+
+simd_function!(
+ draw_segment_dispatch,
+ d: D,
+ fn draw_segment_simd(
+ row: &mut [&mut [f32]],
+ row_pos: (usize, usize),
+ xsize: usize,
+ segment: &SplineSegment,
+ ) {
+ let (x0, y) = row_pos;
+ let x1 = x0 + xsize;
+ let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize);
+ let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1);
+
+ if clamped_x1 <= clamped_x0 {
+ return;
+ }
+
+ let x = clamped_x0;
+ let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+ let d = d.maybe_downgrade_256bit();
+ let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+ let d = d.maybe_downgrade_128bit();
+ let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment);
+ draw_segment_inner(ScalarDescriptor, row, (x0, y), (x, clamped_x1), segment);
+ }
+);
+
impl Splines {
+ pub fn is_initialized(&self) -> bool {
+ !self.segment_y_start.is_empty()
+ }
+
#[cfg(test)]
pub fn create(
quantization_adjustment: i32,
@@ -540,7 +645,7 @@ impl Splines {
let first_segment_index_pos = self.segment_y_start[row_pos.1];
let last_segment_index_pos = self.segment_y_start[row_pos.1 + 1];
for segment_index_pos in first_segment_index_pos..last_segment_index_pos {
- self.draw_segment(
+ draw_segment_dispatch(
row,
row_pos,
xsize,
@@ -548,48 +653,6 @@ impl Splines {
);
}
}
- fn draw_segment(
- &self,
- row: &mut [&mut [f32]],
- row_pos: (usize, usize),
- xsize: usize,
- segment: &SplineSegment,
- ) {
- let (x0, y) = row_pos;
- let x1 = x0 + xsize;
- let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize);
- // one-past-the-end
- let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1);
- for x in clamped_x0..clamped_x1 {
- self.draw_segment_at(row, (x, y), x0, segment);
- }
- }
- fn draw_segment_at(
- &self,
- row: &mut [&mut [f32]],
- pixel_pos: (usize, usize),
- row_x0: usize,
- segment: &SplineSegment,
- ) {
- let (x, y) = pixel_pos;
- let inv_sigma = segment.inv_sigma;
- let half = 0.5f32;
- let one_over_2s2 = 0.353_553_38_f32;
- let sigma_over_4_times_intensity = segment.sigma_over_4_times_intensity;
- let dx = x as f32 - segment.center_x;
- let dy = y as f32 - segment.center_y;
- let sqd = dx * dx + dy * dy;
- let distance = sqd.sqrt();
- let one_dimensional_factor = fast_erff((distance * half + one_over_2s2) * inv_sigma)
- - fast_erff((distance * half - one_over_2s2) * inv_sigma);
- let local_intensity =
- sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor;
- for (channel_index, row) in row.iter_mut().enumerate() {
- let cm = segment.color[channel_index];
- let inp = row[x - row_x0];
- row[x - row_x0] = cm * local_intensity + inp;
- }
- }
fn add_segment(
&mut self,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
index c48e1e22ddc70..9051f59650354 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs
@@ -12,7 +12,11 @@ use crate::{
};
pub const NON_ZERO_BUCKETS: usize = 37;
+
+// Supremum of zero_density_context(x, y) + 1, when x + y <= 64.
pub const ZERO_DENSITY_CONTEXT_COUNT: usize = 458;
+// Supremum of zero_density_context(x, y) + 1.
+pub const ZERO_DENSITY_CONTEXT_LIMIT: usize = 474;
pub const COEFF_FREQ_CONTEXT: [usize; 64] = [
0xBAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19,
@@ -27,7 +31,7 @@ pub const COEFF_NUM_NONZERO_CONTEXT: [usize; 64] = [
206, 206, 206, 206, 206, 206,
];
-#[inline]
+#[inline(always)]
pub fn zero_density_context(
nonzeros_left: usize,
k: usize,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
index f2435eb5ddf48..f58b1044de7cd 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs
@@ -3,6 +3,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::collections::BTreeSet;
use std::sync::Arc;
use super::render::pipeline;
@@ -16,9 +17,14 @@ use super::{
quantizer::{LfQuantFactors, QuantizerParams},
};
use crate::error::Error;
+use crate::features::epf::SigmaSource;
+use crate::frame::block_context_map::{ZERO_DENSITY_CONTEXT_COUNT, ZERO_DENSITY_CONTEXT_LIMIT};
+use crate::headers::frame_header::FrameType;
#[cfg(test)]
use crate::render::SimpleRenderPipeline;
use crate::render::buffer_splitter::BufferSplitter;
+use crate::util::AtomicRefCell;
+use crate::util::{ShiftRightCeil, mirror};
use crate::{
GROUP_DIM,
bit_reader::BitReader,
@@ -39,6 +45,111 @@ use crate::{
};
use jxl_transforms::transform_map::*;
+use crate::headers::CustomTransformData;
+use crate::render::RenderPipelineInOutStage;
+use crate::render::stages::Upsample8x;
+use crate::render::{Channels, ChannelsMut};
+
+fn upsample_lf_group(
+ group: usize,
+ pixels: &mut [Image<f32>; 3],
+ lf_image: &[Image<f32>; 3],
+ header: &FrameHeader,
+ factors: &CustomTransformData,
+) -> Result<()> {
+ let group_dim = header.group_dim();
+ let lf_group_dim = group_dim / 8;
+ let (width_groups, _) = header.size_groups();
+ let gx = group % width_groups;
+ let gy = group / width_groups;
+
+ let upsample = Upsample8x::new(factors, 0);
+ let mut state = upsample.init_local_state(0)?.unwrap();
+
+ let max_width = pixels.iter().map(|x| x.size().0).max().unwrap();
+
+ // Temporary buffer for 8 output rows
+ // We reuse this buffer for each iteration to minimize allocation
+ let mut temp_out_buf: [_; 8] = std::array::from_fn(|_| vec![0.0f32; max_width + 128]);
+
+ let mut input_rows_storage: [_; 5] = std::array::from_fn(|_| vec![0.0; max_width / 8 + 32]);
+
+ for c in 0..3 {
+ let lf_img = &lf_image[c];
+ let out_img = &mut pixels[c];
+ let (out_width, out_height) = out_img.size();
+
+ let vs = header.vshift(c);
+ let hs = header.hshift(c);
+
+ let lf_group_dim_x = lf_group_dim >> hs;
+ let lf_group_dim_y = lf_group_dim >> vs;
+ let lf_x0 = gx * lf_group_dim_x;
+ let lf_y0 = gy * lf_group_dim_y;
+
+ let lf_width = lf_img.size().0.shrc(hs);
+ let lf_height = lf_img.size().1.shrc(hs);
+
+ let start_x = lf_x0.saturating_sub(2);
+ let lf_x1 = (lf_x0 + lf_group_dim_x).min(lf_width);
+ let end_x = (lf_x1 + 2).min(lf_width);
+ let copy_width = end_x - start_x;
+
+ for y in 0..lf_group_dim_y {
+ let cy = lf_y0 + y;
+
+ for dy in -2..=2 {
+ let iy = cy as isize + dy;
+ let iy = mirror(iy, lf_height);
+
+ let storage = &mut input_rows_storage[(dy + 2) as usize];
+
+ let save_start = if start_x == lf_x0 { 2 } else { 0 };
+ let save_end = save_start + copy_width;
+
+ storage[save_start..save_end].copy_from_slice(&lf_img.row(iy)[start_x..end_x]);
+
+ if start_x == lf_x0 {
+ storage[0] = storage[2 + mirror(-2, copy_width)];
+ storage[1] = storage[2 + mirror(-1, copy_width)];
+ }
+ if end_x == lf_x1 {
+ storage[save_end] = storage[save_start + mirror(save_end as isize, save_end)];
+ storage[save_end + 1] =
+ storage[save_start + mirror(save_end as isize + 1, save_end)];
+ }
+ }
+
+ let input_rows_refs = input_rows_storage.iter().map(|x| &x[..]).collect();
+ let input_channels = Channels::new(input_rows_refs, 1, 5);
+
+ {
+ // Prepare output refs
+ let output_rows_refs = temp_out_buf.iter_mut().map(|x| &mut x[..]).collect();
+ let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8);
+
+ upsample.process_row_chunk(
+ (0, 0),
+ lf_x1 - lf_x0,
+ &input_channels,
+ &mut output_channels,
+ Some(state.as_mut()),
+ );
+ }
+
+ // Copy back to out_img
+ let base_y = y * 8;
+ for (i, buf) in temp_out_buf.iter().enumerate() {
+ let out_y = base_y + i;
+ if out_y < out_height {
+ out_img.row_mut(out_y)[..out_width].copy_from_slice(&buf[..out_width]);
+ }
+ }
+ }
+ }
+ Ok(())
+}
+
impl Frame {
pub fn from_header_and_toc(
frame_header: FrameHeader,
@@ -51,6 +162,9 @@ impl Frame {
} else {
decoder_state.nonvisible_frame_index += 1;
}
+ if frame_header.frame_type == FrameType::LFFrame && frame_header.lf_level == 1 {
+ decoder_state.lf_frame_was_rendered = false;
+ }
let image_metadata = &decoder_state.file_header.image_metadata;
let is_gray = !frame_header.do_ycbcr
&& !image_metadata.xyb_encoded
@@ -124,9 +238,13 @@ impl Frame {
None
};
+ let num_extra_channels = image_metadata.extra_channel_info.len();
+
Ok(Self {
#[cfg(test)]
use_simple_pipeline: decoder_state.use_simple_pipeline,
+ last_rendered_pass: vec![None; frame_header.num_groups()],
+ incomplete_groups: frame_header.num_groups(),
header: frame_header,
color_channels,
toc,
@@ -139,10 +257,39 @@ impl Frame {
render_pipeline: None,
reference_frame_data,
lf_frame_data,
- lf_global_was_rendered: false,
+ was_flushed_once: false,
vardct_buffers: None,
+ groups_to_flush: BTreeSet::new(),
+ changed_since_last_flush: BTreeSet::new(),
+ patches: Arc::new(AtomicRefCell::new(PatchesDictionary::new(
+ num_extra_channels,
+ ))),
+ splines: Arc::new(AtomicRefCell::new(Splines::default())),
+ noise: Arc::new(AtomicRefCell::new(Noise::default())),
+ lf_quant: Arc::new(AtomicRefCell::new(LfQuantFactors::default())),
+ color_correlation_params: Arc::new(AtomicRefCell::new(
+ ColorCorrelationParams::default(),
+ )),
+ epf_sigma: Arc::new(AtomicRefCell::new(SigmaSource::default())),
})
}
+
+ pub fn allow_rendering_before_last_pass(&self) -> bool {
+ if self
+ .lf_global
+ .as_ref()
+ .is_none_or(|x| !x.modular_global.can_do_partial_render())
+ {
+ return false;
+ }
+
+ self.header.frame_type == FrameType::RegularFrame
+ || (self.header.frame_type == FrameType::LFFrame
+ && self.header.lf_level == 1
+ // TODO(veluca): this should probably be "there is no alpha".
+ && self.header.num_extra_channels == 0)
+ }
+
/// Given a bit reader pointing at the end of the TOC, returns a vector of `BitReader`s, each
/// of which reads a specific section.
pub fn sections<'a>(&self, br: &'a mut BitReader) -> Result<Vec<BitReader<'a>>> {
@@ -166,97 +313,109 @@ impl Frame {
}
Ok(shuffled_ret)
}
+
#[instrument(level = "debug", skip_all)]
- pub fn decode_lf_global(&mut self, br: &mut BitReader) -> Result<()> {
+ pub fn decode_lf_global(&mut self, br: &mut BitReader, allow_partial: bool) -> Result<()> {
debug!(section_size = br.total_bits_available());
- assert!(self.lf_global.is_none());
- trace!(pos = br.total_bits_read());
- let patches = if self.header.has_patches() {
- info!("decoding patches");
- Some(PatchesDictionary::read(
- br,
- self.header.size_padded().0,
- self.header.size_padded().1,
- self.decoder_state.extra_channel_info().len(),
- &self.decoder_state.reference_frames[..],
- )?)
+ if let Some(lfg) = &self.lf_global {
+ br.skip_bits(lfg.total_bits_read)?;
} else {
- None
- };
+ trace!(pos = br.total_bits_read());
- let splines = if self.header.has_splines() {
- info!("decoding splines");
- Some(Splines::read(br, self.header.width * self.header.height)?)
- } else {
- None
- };
+ if self.header.has_patches() {
+ info!("decoding patches");
+ let p = PatchesDictionary::read(
+ br,
+ self.header.size_padded().0,
+ self.header.size_padded().1,
+ self.decoder_state.extra_channel_info().len(),
+ &self.decoder_state.reference_frames[..],
+ )?;
+ *self.patches.borrow_mut() = p;
+ }
- let noise = if self.header.has_noise() {
- info!("decoding noise");
- Some(Noise::read(br)?)
- } else {
- None
- };
+ if self.header.has_splines() {
+ info!("decoding splines");
+ let s = Splines::read(br, self.header.width * self.header.height)?;
+ *self.splines.borrow_mut() = s;
+ }
- let lf_quant = LfQuantFactors::new(br)?;
- debug!(?lf_quant);
+ if self.header.has_noise() {
+ info!("decoding noise");
+ let n = Noise::read(br)?;
+ *self.noise.borrow_mut() = n;
+ }
- let quant_params = if self.header.encoding == Encoding::VarDCT {
- info!("decoding VarDCT quantizer params");
- Some(QuantizerParams::read(br)?)
- } else {
- None
- };
- debug!(?quant_params);
+ let lf_quant = LfQuantFactors::new(br)?;
+ *self.lf_quant.borrow_mut() = lf_quant.clone();
+ debug!(?lf_quant);
- let block_context_map = if self.header.encoding == Encoding::VarDCT {
- info!("decoding block context map");
- Some(BlockContextMap::read(br)?)
- } else {
- None
- };
- debug!(?block_context_map);
+ let quant_params = if self.header.encoding == Encoding::VarDCT {
+ info!("decoding VarDCT quantizer params");
+ Some(QuantizerParams::read(br)?)
+ } else {
+ None
+ };
+ debug!(?quant_params);
- let color_correlation_params = if self.header.encoding == Encoding::VarDCT {
- info!("decoding color correlation params");
- Some(ColorCorrelationParams::read(br)?)
- } else {
- None
- };
- debug!(?color_correlation_params);
-
- let tree = if br.read(1)? == 1 {
- let size_limit = (1024
- + self.header.width as usize
- * self.header.height as usize
- * (self.color_channels + self.decoder_state.extra_channel_info().len())
- / 16)
- .min(1 << 22);
- Some(Tree::read(br, size_limit)?)
- } else {
- None
- };
+ let block_context_map = if self.header.encoding == Encoding::VarDCT {
+ info!("decoding block context map");
+ Some(BlockContextMap::read(br)?)
+ } else {
+ None
+ };
+ debug!(?block_context_map);
- let modular_global = FullModularImage::read(
- &self.header,
- &self.decoder_state.file_header.image_metadata,
- self.modular_color_channels(),
- &tree,
- br,
- )?;
+ let color_correlation_params = if self.header.encoding == Encoding::VarDCT {
+ info!("decoding color correlation params");
+ let ccp = ColorCorrelationParams::read(br)?;
+ *self.color_correlation_params.borrow_mut() = ccp;
+ Some(ccp)
+ } else {
+ None
+ };
+ debug!(?color_correlation_params);
+
+ let tree = if br.read(1)? == 1 {
+ let size_limit = (1024
+ + self.header.width as usize
+ * self.header.height as usize
+ * (self.color_channels + self.decoder_state.extra_channel_info().len())
+ / 16)
+ .min(1 << 22);
+ Some(Tree::read(br, size_limit)?)
+ } else {
+ None
+ };
- self.lf_global = Some(LfGlobalState {
- patches: patches.map(Arc::new),
- splines,
- noise,
- lf_quant,
- quant_params,
- block_context_map,
- color_correlation_params,
- tree,
- modular_global,
- });
+ let modular_global = FullModularImage::read(
+ &self.header,
+ &self.decoder_state.file_header.image_metadata,
+ self.modular_color_channels(),
+ br,
+ )?;
+
+ // Ensure that, if we call this function again, we resume from just after
+ // reading modular global data (excluding section 0 channels).
+ let total_bits_read = br.total_bits_read();
+
+ self.lf_global = Some(LfGlobalState {
+ lf_quant,
+ quant_params,
+ block_context_map,
+ color_correlation_params,
+ tree,
+ modular_global,
+ total_bits_read,
+ });
+ }
+
+ let lf_global = self.lf_global.as_mut().unwrap();
+
+ lf_global
+ .modular_global
+ .read_section0(&self.header, &lf_global.tree, br, allow_partial)?;
Ok(())
}
@@ -281,6 +440,9 @@ impl Frame {
br,
)?;
}
+
+ lf_global.modular_global.mark_group_to_be_read(1, group);
+
lf_global.modular_global.read_stream(
ModularStreamId::ModularLF(group),
&self.header,
@@ -305,188 +467,305 @@ impl Frame {
#[instrument(level = "debug", skip_all)]
pub fn decode_hf_global(&mut self, br: &mut BitReader) -> Result<()> {
debug!(section_size = br.total_bits_available());
- if self.header.encoding == Encoding::Modular {
- return Ok(());
- }
- let lf_global = self.lf_global.as_mut().unwrap();
- let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?;
- let block_context_map = lf_global.block_context_map.as_mut().unwrap();
- let num_histo_bits = self.header.num_groups().ceil_log2();
- let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1;
- info!(
- "Processing HFGlobal section with {} passes and {} histograms",
- self.header.passes.num_passes, num_histograms
- );
- let mut passes: Vec<PassState> = vec![];
- #[allow(unused_variables)]
- for i in 0..self.header.passes.num_passes as usize {
- let used_orders = match br.read(2)? {
- 0 => 0x5f,
- 1 => 0x13,
- 2 => 0,
- _ => br.read(coeff_order::NUM_ORDERS)?,
- } as u32;
- debug!(used_orders);
- let coeff_orders = decode_coeff_orders(used_orders, br)?;
- assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS);
- let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts();
+ if self.header.encoding == Encoding::VarDCT {
+ let lf_global = self.lf_global.as_mut().unwrap();
+ let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?;
+ let block_context_map = lf_global.block_context_map.as_mut().unwrap();
+ let num_histo_bits = self.header.num_groups().ceil_log2();
+ let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1;
info!(
- "Deconding histograms for pass {} with {} contexts",
- i, num_contexts
+ "Processing HFGlobal section with {} passes and {} histograms",
+ self.header.passes.num_passes, num_histograms
);
- let histograms = Histograms::decode(num_contexts, br, true)?;
- debug!("Found {} histograms", histograms.num_histograms());
- passes.push(PassState {
- coeff_orders,
- histograms,
+ let mut passes: Vec<PassState> = vec![];
+ #[allow(unused_variables)]
+ for i in 0..self.header.passes.num_passes as usize {
+ let used_orders = match br.read(2)? {
+ 0 => 0x5f,
+ 1 => 0x13,
+ 2 => 0,
+ _ => br.read(coeff_order::NUM_ORDERS)?,
+ } as u32;
+ debug!(used_orders);
+ let coeff_orders = decode_coeff_orders(used_orders, br)?;
+ assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS);
+ let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts();
+ info!(
+ "Decoding histograms for pass {} with {} contexts",
+ i, num_contexts
+ );
+ let mut histograms = Histograms::decode(num_contexts, br, true)?;
+ // Pad the context map to avoid index out of bounds in decode_vardct_group (group.rs#L514@752e6a4).
+ let padding = ZERO_DENSITY_CONTEXT_LIMIT - ZERO_DENSITY_CONTEXT_COUNT;
+ histograms.resize(num_contexts + padding);
+ debug!("Found {} histograms", histograms.num_histograms());
+ passes.push(PassState {
+ coeff_orders,
+ histograms,
+ });
+ }
+ // Note that, if we have extra channels that can be rendered progressively,
+ // we might end up re-drawing some VarDCT groups. In that case, we need to
+ // keep around the coefficients, so allocate coefficients under those conditions
+ // too.
+ // TODO(veluca): evaluate whether we can make this check more precise.
+ let hf_coefficients = if passes.len() <= 1
+ && !(self
+ .lf_global
+ .as_mut()
+ .unwrap()
+ .modular_global
+ .can_do_partial_render()
+ && self.header.num_extra_channels > 0)
+ {
+ None
+ } else {
+ let xs = GROUP_DIM * GROUP_DIM;
+ let ys = self.header.num_groups();
+ Some((
+ Image::new((xs, ys))?,
+ Image::new((xs, ys))?,
+ Image::new((xs, ys))?,
+ ))
+ };
+
+ self.hf_global = Some(HfGlobalState {
+ num_histograms,
+ passes,
+ dequant_matrices,
+ hf_coefficients,
});
}
- let hf_coefficients = if passes.len() <= 1 {
- None
- } else {
- let xs = GROUP_DIM * GROUP_DIM;
- let ys = self.header.num_groups();
- Some((
- Image::new((xs, ys))?,
- Image::new((xs, ys))?,
- Image::new((xs, ys))?,
- ))
- };
- self.hf_global = Some(HfGlobalState {
- num_histograms,
- passes,
- dequant_matrices,
- hf_coefficients,
- });
+ // Set EPF sigma values to the correct values if we are doing EPF.
+ if self.header.restoration_filter.epf_iters > 0 {
+ *self.epf_sigma.borrow_mut() = SigmaSource::new(
+ &self.header,
+ self.lf_global.as_ref().unwrap(),
+ &self.hf_meta,
+ )?;
+ }
Ok(())
}
- #[instrument(level = "debug", skip(self, br, buffer_splitter))]
- pub fn decode_hf_group(
+ pub fn render_noise_for_group(
&mut self,
group: usize,
- pass: usize,
- mut br: BitReader,
+ complete: bool,
buffer_splitter: &mut BufferSplitter,
) -> Result<()> {
- debug!(section_size = br.total_bits_available());
- if self.header.has_noise() {
- // TODO(sboukortt): consider making this a dedicated stage
- let num_channels = self.header.num_extra_channels as usize + 3;
-
- let group_dim = self.header.group_dim() as u32;
- let xsize_groups = self.header.size_groups().0;
- let gx = (group % xsize_groups) as u32;
- let gy = (group / xsize_groups) as u32;
- // TODO(sboukortt): test upsampling+noise
- let upsampling = self.header.upsampling;
- let x0 = gx * upsampling * group_dim;
- let y0 = gy * upsampling * group_dim;
- let x1 = ((x0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().0);
- let y1 = ((y0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().1);
- let xsize = x1 - x0 as usize;
- let ysize = y1 - y0 as usize;
- let mut rng = Xorshift128Plus::new_with_seeds(
- self.decoder_state.visible_frame_index as u32,
- self.decoder_state.nonvisible_frame_index as u32,
- x0,
- y0,
- );
- let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000);
- for i in 0..3 {
- let mut buf = pipeline!(self, p, p.get_buffer(num_channels + i)?);
- const FLOATS_PER_BATCH: usize =
- Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>();
- let mut batch = [0u64; Xorshift128Plus::N];
-
- for y in 0..ysize {
- let row = buf.row_mut(y);
- for batch_index in 0..xsize.div_ceil(FLOATS_PER_BATCH) {
- rng.fill(&mut batch);
- let batch_size =
- (xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH);
- for i in 0..batch_size {
- let x = FLOATS_PER_BATCH * batch_index + i;
- let k = i / 2;
- let high_bytes = i % 2 != 0;
- let bits = if high_bytes {
- ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32
- } else {
- (batch[k] & 0xFFFFFFFF) as u32
- };
- row[x] = bits_to_float(bits);
+ // TODO(sboukortt): consider making this a dedicated stage
+ // TODO(veluca): SIMD.
+ let num_channels = self.header.num_extra_channels as usize + 3;
+
+ let group_dim = self.header.group_dim() as u32;
+ let xsize_groups = self.header.size_groups().0;
+ let gx = (group % xsize_groups) as u32;
+ let gy = (group / xsize_groups) as u32;
+ let upsampling = self.header.upsampling;
+ let upsampled_size = self.header.size_upsampled();
+
+ // Total buffer covers the upsampled region for this group
+ let buf_x1 = ((gx + 1) * upsampling * group_dim) as usize;
+ let buf_y1 = ((gy + 1) * upsampling * group_dim) as usize;
+ let buf_xsize = buf_x1.min(upsampled_size.0) - (gx * upsampling * group_dim) as usize;
+ let buf_ysize = buf_y1.min(upsampled_size.1) - (gy * upsampling * group_dim) as usize;
+
+ let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000);
+
+ // Get all 3 noise channel buffers upfront
+ let mut bufs = [
+ pipeline!(self, p, p.get_buffer(num_channels)?),
+ pipeline!(self, p, p.get_buffer(num_channels + 1)?),
+ pipeline!(self, p, p.get_buffer(num_channels + 2)?),
+ ];
+
+ const FLOATS_PER_BATCH: usize =
+ Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>();
+ let mut batch = [0u64; Xorshift128Plus::N];
+
+ // libjxl iterates through upsampling subdivisions with separate RNG seeds.
+ // For each subregion, a single RNG is shared across all 3 channels.
+ for iy in 0..upsampling {
+ for ix in 0..upsampling {
+ // Seed coordinates for this subregion (matches libjxl)
+ let x0 = (gx * upsampling + ix) * group_dim;
+ let y0 = (gy * upsampling + iy) * group_dim;
+
+ // Create RNG with this subregion's seed - shared across all 3 channels
+ let mut rng = Xorshift128Plus::new_with_seeds(
+ self.decoder_state.visible_frame_index as u32,
+ self.decoder_state.nonvisible_frame_index as u32,
+ x0,
+ y0,
+ );
+
+ // Subregion boundaries within the buffer
+ let sub_x0 = (ix * group_dim) as usize;
+ let sub_y0 = (iy * group_dim) as usize;
+ let sub_x1 = ((ix + 1) * group_dim) as usize;
+ let sub_y1 = ((iy + 1) * group_dim) as usize;
+
+ // Clamp to actual buffer size
+ let sub_xsize = sub_x1.min(buf_xsize).saturating_sub(sub_x0);
+ let sub_ysize = sub_y1.min(buf_ysize).saturating_sub(sub_y0);
+
+ // Skip if this subregion is entirely outside the buffer
+ if sub_xsize == 0 || sub_ysize == 0 {
+ continue;
+ }
+
+ // Fill all 3 channels with this subregion's noise, sharing the RNG
+ for buf in &mut bufs {
+ for y in 0..sub_ysize {
+ let row = buf.row_mut(sub_y0 + y);
+ for batch_index in 0..sub_xsize.div_ceil(FLOATS_PER_BATCH) {
+ rng.fill(&mut batch);
+ let batch_size =
+ (sub_xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH);
+ for i in 0..batch_size {
+ let x = sub_x0 + FLOATS_PER_BATCH * batch_index + i;
+ let k = i / 2;
+ let high_bytes = i % 2 != 0;
+ let bits = if high_bytes {
+ ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32
+ } else {
+ (batch[k] & 0xFFFFFFFF) as u32
+ };
+ row[x] = bits_to_float(bits);
+ }
}
}
}
- pipeline!(
- self,
- p,
- p.set_buffer_for_group(num_channels + i, group, 1, buf, buffer_splitter)?
- )
}
}
+ // Set all buffers after filling
+ let [buf0, buf1, buf2] = bufs;
+ pipeline!(
+ self,
+ p,
+ p.set_buffer_for_group(num_channels, group, complete, buf0, buffer_splitter)?
+ );
+ pipeline!(
+ self,
+ p,
+ p.set_buffer_for_group(num_channels + 1, group, complete, buf1, buffer_splitter)?
+ );
+ pipeline!(
+ self,
+ p,
+ p.set_buffer_for_group(num_channels + 2, group, complete, buf2, buffer_splitter)?
+ );
+ Ok(())
+ }
+
+ // Returns `true` if VarDCT and noise data were effectively rendered.
+ #[instrument(level = "debug", skip(self, passes, buffer_splitter))]
+ pub fn decode_hf_group(
+ &mut self,
+ group: usize,
+ passes: &mut [(usize, BitReader)],
+ buffer_splitter: &mut BufferSplitter,
+ force_render: bool,
+ ) -> Result<bool> {
+ if passes.is_empty() {
+ assert!(force_render);
+ }
+
+ let last_pass_in_file = self.header.passes.num_passes as usize - 1;
+ let was_complete = self.last_rendered_pass[group].is_some_and(|p| p >= last_pass_in_file);
+
+ if let Some((p, _)) = passes.last() {
+ self.last_rendered_pass[group] = Some(*p);
+ };
+ let pass_to_render = self.last_rendered_pass[group];
+ let complete = pass_to_render.is_some_and(|p| p >= last_pass_in_file);
+
+ if complete && !was_complete {
+ self.incomplete_groups = self.incomplete_groups.checked_sub(1).unwrap();
+ }
+
+ // Render if we are decoding the last pass, or if we are requesting an eager render and
+ // we can handle this case of eager renders.
+ let do_render = if complete {
+ true
+ } else if force_render {
+ self.allow_rendering_before_last_pass()
+ } else {
+ false
+ };
+
+ if !do_render && passes.is_empty() {
+ return Ok(false);
+ }
+
+ if self.header.has_noise() && do_render {
+ self.render_noise_for_group(group, complete, buffer_splitter)?;
+ }
+
let lf_global = self.lf_global.as_mut().unwrap();
if self.header.encoding == Encoding::VarDCT {
- info!("Decoding VarDCT group {group}, pass {pass}");
- let hf_global = self.hf_global.as_mut().unwrap();
- let hf_meta = self.hf_meta.as_mut().unwrap();
- let mut pixels = [
- pipeline!(self, p, p.get_buffer(0))?,
- pipeline!(self, p, p.get_buffer(1))?,
- pipeline!(self, p, p.get_buffer(2))?,
- ];
- let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new);
- decode_vardct_group(
- group,
- pass,
- &self.header,
- lf_global,
- hf_global,
- hf_meta,
- &self.lf_image,
- &self.quant_lf,
- &self
- .decoder_state
- .file_header
- .transform_data
- .opsin_inverse_matrix
- .quant_biases,
- &mut pixels,
- &mut br,
- buffers,
- )?;
- if self.decoder_state.enable_output
- && pass + 1 == self.header.passes.num_passes as usize
- {
+ let mut pixels = if do_render {
+ Some([
+ pipeline!(self, p, p.get_buffer(0))?,
+ pipeline!(self, p, p.get_buffer(1))?,
+ pipeline!(self, p, p.get_buffer(2))?,
+ ])
+ } else {
+ None
+ };
+ if pass_to_render.is_none() && do_render {
+ info!("Upsampling LF for group {group}");
+ upsample_lf_group(
+ group,
+ pixels.as_mut().unwrap(),
+ self.lf_image.as_ref().unwrap(),
+ &self.header,
+ &self.decoder_state.file_header.transform_data,
+ )?;
+ } else {
+ info!("Decoding VarDCT group {group}");
+ let hf_global = self.hf_global.as_mut().unwrap();
+ let hf_meta = self.hf_meta.as_mut().unwrap();
+ let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new);
+ decode_vardct_group(
+ group,
+ passes,
+ &self.header,
+ lf_global,
+ hf_global,
+ hf_meta,
+ &self.lf_image,
+ &self.quant_lf,
+ &self
+ .decoder_state
+ .file_header
+ .transform_data
+ .opsin_inverse_matrix
+ .quant_biases,
+ &mut pixels,
+ buffers,
+ )?;
+ }
+ if let Some(pixels) = pixels {
for (c, img) in pixels.into_iter().enumerate() {
pipeline!(
self,
p,
- p.set_buffer_for_group(c, group, 1, img, buffer_splitter)?
+ p.set_buffer_for_group(c, group, complete, img, buffer_splitter)?
);
}
}
}
- lf_global.modular_global.read_stream(
- ModularStreamId::ModularHF { group, pass },
- &self.header,
- &lf_global.tree,
- &mut br,
- )?;
- lf_global.modular_global.process_output(
- 2 + pass,
- group,
- &self.header,
- &mut |chan, group, num_passes, image| {
- pipeline!(
- self,
- p,
- p.set_buffer_for_group(chan, group, num_passes, image, buffer_splitter)?
- );
- Ok(())
- },
- )?;
- Ok(())
+
+ for (pass, br) in passes.iter_mut() {
+ lf_global.modular_global.read_stream(
+ ModularStreamId::ModularHF { group, pass: *pass },
+ &self.header,
+ &lf_global.tree,
+ br,
+ )?;
+ }
+ Ok(do_render)
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
index b98d4541894b6..b7d8021b33962 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs
@@ -18,7 +18,7 @@ use crate::{
},
headers::frame_header::FrameHeader,
image::{Image, ImageRect, Rect},
- util::{CeilLog2, ShiftRightCeil, tracing_wrappers::*},
+ util::{CeilLog2, ShiftRightCeil, SmallVec, tracing_wrappers::*},
};
use jxl_simd::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, simd_function};
@@ -305,11 +305,69 @@ simd_function!(
}
);
+struct PassInfo<'a, 'b> {
+ histogram_index: usize,
+ reader: Option<SymbolReader>,
+ br: &'a mut BitReader<'b>,
+ shift: u32,
+ pass: usize,
+ // TODO(veluca): reuse this allocation.
+ num_nzeros: [Image<u32>; 3],
+}
+
+impl<'a, 'b> PassInfo<'a, 'b> {
+ fn new(
+ hf_global: &HfGlobalState,
+ frame_header: &FrameHeader,
+ block_group_rect: Rect,
+ pass: usize,
+ br: &'a mut BitReader<'b>,
+ ) -> Result<Self> {
+ let num_histo_bits = hf_global.num_histograms.ceil_log2();
+ debug!(?pass);
+ let histogram_index = br.read(num_histo_bits as usize)? as usize;
+ debug!(?histogram_index);
+ let reader = Some(SymbolReader::new(
+ &hf_global.passes[pass].histograms,
+ br,
+ None,
+ )?);
+ let shift = if pass < frame_header.passes.shift.len() {
+ frame_header.passes.shift[pass]
+ } else {
+ 0
+ };
+ let num_nzeros = [
+ Image::new((
+ block_group_rect.size.0 >> frame_header.hshift(0),
+ block_group_rect.size.1 >> frame_header.vshift(0),
+ ))?,
+ Image::new((
+ block_group_rect.size.0 >> frame_header.hshift(1),
+ block_group_rect.size.1 >> frame_header.vshift(1),
+ ))?,
+ Image::new((
+ block_group_rect.size.0 >> frame_header.hshift(2),
+ block_group_rect.size.1 >> frame_header.vshift(2),
+ ))?,
+ ];
+
+ Ok(Self {
+ histogram_index,
+ reader,
+ br,
+ shift,
+ pass,
+ num_nzeros,
+ })
+ }
+}
+
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
pub fn decode_vardct_group(
group: usize,
- pass: usize,
+ passes: &mut [(usize, BitReader)],
frame_header: &FrameHeader,
lf_global: &mut LfGlobalState,
hf_global: &mut HfGlobalState,
@@ -317,19 +375,19 @@ pub fn decode_vardct_group(
lf_image: &Option<[Image<f32>; 3]>,
quant_lf: &Image<u8>,
quant_biases: &[f32; 4],
- pixels: &mut [Image<f32>; 3],
- br: &mut BitReader,
+ pixels: &mut Option<[Image<f32>; 3]>,
buffers: &mut VarDctBuffers,
) -> Result<(), Error> {
let x_dm_multiplier = (1.0 / (1.25)).powf(frame_header.x_qm_scale as f32 - 2.0);
let b_dm_multiplier = (1.0 / (1.25)).powf(frame_header.b_qm_scale as f32 - 2.0);
- let num_histo_bits = hf_global.num_histograms.ceil_log2();
- let histogram_index: usize = br.read(num_histo_bits as usize)? as usize;
- debug!(?histogram_index);
- let mut reader = SymbolReader::new(&hf_global.passes[pass].histograms, br, None)?;
let block_group_rect = frame_header.block_group_rect(group);
debug!(?block_group_rect);
+ let mut pass_info = passes
+ .iter_mut()
+ .map(|(pass, br)| PassInfo::new(hf_global, frame_header, block_group_rect, *pass, br))
+ .collect::<Result<SmallVec<_, 4>>>()?;
+
// Reset and use pooled buffers
buffers.reset();
let scratch = &mut buffers.scratch;
@@ -350,23 +408,9 @@ pub fn decode_vardct_group(
let ytob_map = hf_meta.ytob_map.get_rect(cmap_rect);
let transform_map = hf_meta.transform_map.get_rect(block_group_rect);
let raw_quant_map = hf_meta.raw_quant_map.get_rect(block_group_rect);
- let mut num_nzeros: [Image<u32>; 3] = [
- Image::new((
- block_group_rect.size.0 >> frame_header.hshift(0),
- block_group_rect.size.1 >> frame_header.vshift(0),
- ))?,
- Image::new((
- block_group_rect.size.0 >> frame_header.hshift(1),
- block_group_rect.size.1 >> frame_header.vshift(1),
- ))?,
- Image::new((
- block_group_rect.size.0 >> frame_header.hshift(2),
- block_group_rect.size.1 >> frame_header.vshift(2),
- ))?,
- ];
let quant_lf_rect = quant_lf.get_rect(block_group_rect);
let block_context_map = lf_global.block_context_map.as_mut().unwrap();
- let context_offset = histogram_index * block_context_map.num_ac_contexts();
+ // TODO(veluca): improve coefficient storage (smaller allocations, use 16 bits if possible).
let coeffs = match hf_global.hf_coefficients.as_mut() {
Some(hf_coefficients) => [
hf_coefficients.0.row_mut(group),
@@ -380,11 +424,6 @@ pub fn decode_vardct_group(
[coeffs_x, coeffs_y, coeffs_b]
}
};
- let shift_for_pass = if pass < frame_header.passes.shift.len() {
- frame_header.passes.shift[pass]
- } else {
- 0
- };
let mut coeffs_offset = 0;
let transform_buffer = &mut buffers.transform_buffer;
@@ -474,94 +513,116 @@ pub fn decode_vardct_group(
let num_blocks = cx * cy;
let num_coeffs = num_blocks * BLOCK_SIZE;
let log_num_blocks = num_blocks.ilog2() as usize;
- let pass_info = &hf_global.passes[pass];
- for c in [1, 0, 2] {
- if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) {
- continue;
- }
- trace!(
- "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})",
- sbx[c], sby[c], c, cx, cy, transform_id, shape_id
- );
- let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]);
- let block_context =
- block_context_map.block_context(quant_lf, raw_quant, shape_id, c);
- let nonzero_context = block_context_map
- .nonzero_context(predicted_nzeros, block_context)
- + context_offset;
- let mut nonzeros =
- reader.read_unsigned(&pass_info.histograms, br, nonzero_context) as usize;
- trace!(
- "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \
+ for PassInfo {
+ histogram_index,
+ reader,
+ br,
+ shift,
+ pass,
+ num_nzeros,
+ } in pass_info.iter_mut()
+ {
+ let reader = reader.as_mut().unwrap();
+ let pass_info = &hf_global.passes[*pass];
+ let context_offset = *histogram_index * block_context_map.num_ac_contexts();
+ for c in [1, 0, 2] {
+ if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) {
+ continue;
+ }
+ trace!(
+ "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})",
+ sbx[c], sby[c], c, cx, cy, transform_id, shape_id
+ );
+ let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]);
+ let block_context =
+ block_context_map.block_context(quant_lf, raw_quant, shape_id, c);
+ let nonzero_context = block_context_map
+ .nonzero_context(predicted_nzeros, block_context)
+ + context_offset;
+ let mut nonzeros =
+ reader.read_unsigned_inline(&pass_info.histograms, br, nonzero_context)
+ as usize;
+ trace!(
+ "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \
nzero_ctx: {nonzero_context} (offset: {context_offset}) \
nzeros: {nonzeros}",
- sbx[c], sby[c]
- );
- if nonzeros + num_blocks > num_coeffs {
- return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks));
- }
- for iy in 0..cy {
- let nzrow = num_nzeros[c].row_mut(sby[c] + iy);
- for ix in 0..cx {
- nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32;
+ sbx[c], sby[c]
+ );
+ if nonzeros + num_blocks > num_coeffs {
+ return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks));
}
- }
- let histo_offset =
- block_context_map.zero_density_context_offset(block_context) + context_offset;
- let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 };
- let permutation = &pass_info.coeff_orders[shape_id * 3 + c];
- let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs];
- for k in num_blocks..num_coeffs {
- if nonzeros == 0 {
- break;
+ for iy in 0..cy {
+ let nzrow = num_nzeros[c].row_mut(sby[c] + iy);
+ for ix in 0..cx {
+ nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32;
+ }
+ }
+ let histo_offset = block_context_map.zero_density_context_offset(block_context)
+ + context_offset;
+ let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 };
+ let permutation = &pass_info.coeff_orders[shape_id * 3 + c];
+ let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs];
+ for k in num_blocks..num_coeffs {
+ if nonzeros == 0 {
+ break;
+ }
+ let ctx =
+ histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev);
+ let coeff =
+ reader.read_signed_inline(&pass_info.histograms, br, ctx) << *shift;
+ prev = if coeff != 0 { 1 } else { 0 };
+ nonzeros -= prev;
+ let coeff_index = permutation[k] as usize;
+ current_coeffs[coeff_index] += coeff;
+ }
+ if nonzeros != 0 {
+ return Err(Error::EndOfBlockResidualNonZeros(nonzeros));
}
- let ctx =
- histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev);
- let coeff =
- reader.read_signed(&pass_info.histograms, br, ctx) << shift_for_pass;
- prev = if coeff != 0 { 1 } else { 0 };
- nonzeros -= prev;
- let coeff_index = permutation[k] as usize;
- current_coeffs[coeff_index] += coeff;
- }
- if nonzeros != 0 {
- return Err(Error::EndOfBlockResidualNonZeros(nonzeros));
}
}
- let qblock = [
- &coeffs[0][coeffs_offset..],
- &coeffs[1][coeffs_offset..],
- &coeffs[2][coeffs_offset..],
- ];
- let dequant_matrices = &hf_global.dequant_matrices;
- dequant_and_transform_to_pixels_dispatch(
- quant_biases,
- x_dm_multiplier,
- b_dm_multiplier,
- pixels,
- scratch,
- inv_global_scale,
- transform_buffer,
- hshift,
- vshift,
- by,
- sby,
- bx,
- sbx,
- x_cc_mul,
- b_cc_mul,
- raw_quant,
- &lf_rects,
- transform_type,
- block_rect,
- num_blocks,
- num_coeffs,
- &qblock,
- dequant_matrices,
- )?;
+ if let Some(pixels) = pixels {
+ let qblock = [
+ &coeffs[0][coeffs_offset..],
+ &coeffs[1][coeffs_offset..],
+ &coeffs[2][coeffs_offset..],
+ ];
+ let dequant_matrices = &hf_global.dequant_matrices;
+ dequant_and_transform_to_pixels_dispatch(
+ quant_biases,
+ x_dm_multiplier,
+ b_dm_multiplier,
+ pixels,
+ scratch,
+ inv_global_scale,
+ transform_buffer,
+ hshift,
+ vshift,
+ by,
+ sby,
+ bx,
+ sbx,
+ x_cc_mul,
+ b_cc_mul,
+ raw_quant,
+ &lf_rects,
+ transform_type,
+ block_rect,
+ num_blocks,
+ num_coeffs,
+ &qblock,
+ dequant_matrices,
+ )?;
+ }
coeffs_offset += num_coeffs;
}
}
- reader.check_final_state(&hf_global.passes[pass].histograms, br)?;
+ for PassInfo {
+ pass, br, reader, ..
+ } in pass_info.iter_mut()
+ {
+ std::mem::take(reader)
+ .unwrap()
+ .check_final_state(&hf_global.passes[*pass].histograms, br)?;
+ }
Ok(())
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs
new file mode 100644
index 0000000000000..43ebeb7f43aa5
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs
@@ -0,0 +1,371 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+use crate::{
+ api::{JxlColorProfile, JxlColorType, JxlDataFormat, JxlOutputBuffer, JxlPixelFormat},
+ error::Result,
+ frame::Frame,
+ headers::{Orientation, frame_header::FrameType},
+ image::{DataTypeTag, Rect},
+ render::{
+ Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage,
+ buffer_splitter::{BufferSplitter, SaveStageBufferInfo},
+ low_memory_pipeline::row_buffers::RowBuffer,
+ save::SaveStage,
+ stages::{
+ ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage, FromLinearStage,
+ OutputColorInfo, TransferFunction, Upsample8x, XybStage,
+ },
+ },
+ util::{f16, mirror},
+};
+
+impl Frame {
+ #[allow(clippy::too_many_arguments)]
+ fn render_lf_frame_rect(
+ &mut self,
+ color_type: JxlColorType,
+ data_format: JxlDataFormat,
+ rect: Rect,
+ upsampled_rect: Rect,
+ orientation: Orientation,
+ output_buffers: &mut [Option<JxlOutputBuffer<'_>>],
+ full_size: (usize, usize),
+ output_color_info: &OutputColorInfo,
+ output_tf: &TransferFunction,
+ ) -> Result<()> {
+ let save_stage = SaveStage::new(
+ if color_type.has_alpha() {
+ &[0, 1, 2, 3]
+ } else {
+ &[0, 1, 2]
+ },
+ orientation,
+ 0,
+ color_type,
+ data_format,
+ color_type.has_alpha(),
+ );
+ let len = rect.size.0;
+ let ulen = len * 8;
+ enum DataFormatConverter {
+ U8(ConvertF32ToU8Stage),
+ U16(ConvertF32ToU16Stage),
+ F16(ConvertF32ToF16Stage),
+ None,
+ }
+ let (converter, constant_alpha) = match data_format {
+ JxlDataFormat::U8 { bit_depth } => (
+ DataFormatConverter::U8(ConvertF32ToU8Stage::new(0, bit_depth)),
+ RowBuffer::new_filled(DataTypeTag::U8, ulen, &(1u8 << bit_depth).to_ne_bytes())?,
+ ),
+ JxlDataFormat::U16 { bit_depth, .. } => (
+ DataFormatConverter::U16(ConvertF32ToU16Stage::new(0, bit_depth)),
+ RowBuffer::new_filled(DataTypeTag::U16, ulen, &(1u16 << bit_depth).to_ne_bytes())?,
+ ),
+ JxlDataFormat::F16 { .. } => (
+ DataFormatConverter::F16(ConvertF32ToF16Stage::new(0)),
+ RowBuffer::new_filled(
+ DataTypeTag::F16,
+ ulen,
+ &(f16::from_f32(1.0).to_bits().to_ne_bytes()),
+ )?,
+ ),
+ JxlDataFormat::F32 { .. } => (
+ DataFormatConverter::None,
+ RowBuffer::new_filled(DataTypeTag::F32, ulen, &1.0f32.to_ne_bytes())?,
+ ),
+ };
+
+ let upsample_stage = Upsample8x::new(&self.decoder_state.file_header.transform_data, 0);
+ let mut upsample_state = upsample_stage.init_local_state(0)?.unwrap();
+
+ let xyb_stage = XybStage::new(0, output_color_info.clone());
+
+ let from_linear_stage = FromLinearStage::new(0, output_tf.clone());
+
+ let mut lf_rows = [
+ RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+ RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+ RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?,
+ ];
+
+ // Converted to RGB in place.
+ let mut upsampled_rows = [
+ RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+ RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+ RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?,
+ ];
+
+ let mut output_rows = [
+ RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+ RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+ RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?,
+ ];
+
+ let src = if self.header.frame_type == FrameType::RegularFrame {
+ self.decoder_state.lf_frames[0].as_ref().unwrap()
+ } else {
+ self.lf_frame_data.as_ref().unwrap()
+ };
+
+ const LF_ROW_OFFSET: usize = 8;
+
+ let x0 = rect.origin.0;
+ let x1 = rect.end().0;
+
+ let y0 = rect.origin.1 as isize - 2;
+ let y1 = rect.end().1 as isize + 2;
+
+ let lf_size = src[0].size();
+
+ for yy in y0..y1 {
+ let sy = mirror(yy, lf_size.1);
+
+ // Fill in input.
+ for c in 0..3 {
+ let bufy = (yy + LF_ROW_OFFSET as isize) as usize;
+ let row = lf_rows[c].get_row_mut::<f32>(bufy);
+ let srow = src[c].row(sy);
+ let off = RowBuffer::x0_offset::<f32>();
+ row[off..off + len].copy_from_slice(&srow[x0..x1]);
+ row[off - 1] = srow[mirror(x0 as isize - 1, lf_size.0)];
+ row[off - 2] = srow[mirror(x0 as isize - 2, lf_size.0)];
+ row[off + len] = srow[mirror(x1 as isize, lf_size.0)];
+ row[off + len + 1] = srow[mirror(x1 as isize + 1, lf_size.0)];
+ }
+
+ if yy < y0 + 4 {
+ continue;
+ }
+
+ let y = yy as usize - 2;
+
+ // Upsample.
+ for c in 0..3 {
+ let off = RowBuffer::x0_offset::<f32>() - 2;
+ let input_rows_refs = [
+ &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 2)[off..],
+ &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 1)[off..],
+ &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET)[off..],
+ &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 1)[off..],
+ &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 2)[off..],
+ ]
+ .into_iter()
+ .collect();
+ let input_channels = Channels::new(input_rows_refs, 1, 5);
+
+ let output_rows_refs =
+ upsampled_rows[c].get_rows_mut(y * 8..y * 8 + 8, RowBuffer::x0_offset::<f32>());
+ let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8);
+
+ upsample_stage.process_row_chunk(
+ (0, 0),
+ len,
+ &input_channels,
+ &mut output_channels,
+ Some(upsample_state.as_mut()),
+ );
+ }
+
+ // un-XYB, convert and save.
+ for uy in y * 8..y * 8 + 8 {
+ // XYB
+ let [x, y, b] = &mut upsampled_rows;
+ let off = RowBuffer::x0_offset::<f32>();
+ let mut rows = [
+ &mut x.get_row_mut(uy)[off..],
+ &mut y.get_row_mut(uy)[off..],
+ &mut b.get_row_mut(uy)[off..],
+ ];
+ xyb_stage.process_row_chunk((0, 0), ulen, &mut rows, None);
+ from_linear_stage.process_row_chunk((0, 0), ulen, &mut rows, None);
+
+ macro_rules! convert {
+ ($s: expr, $t: ty) => {
+ for c in 0..3 {
+ let input_rows_refs = std::iter::once(
+ &upsampled_rows[c].get_row(uy)[RowBuffer::x0_offset::<f32>()..],
+ )
+ .collect();
+ let input_channels = Channels::new(input_rows_refs, 1, 1);
+ let output_rows_refs = output_rows[c]
+ .get_rows_mut(uy..uy + 1, RowBuffer::x0_offset::<$t>());
+ let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 1);
+ $s.process_row_chunk(
+ (0, 0),
+ ulen,
+ &input_channels,
+ &mut output_channels,
+ None,
+ );
+ }
+ };
+ }
+
+ // Convert
+ let save_input = match &converter {
+ DataFormatConverter::U8(s) => {
+ convert!(s, u8);
+ &output_rows
+ }
+ DataFormatConverter::U16(s) => {
+ convert!(s, u16);
+ &output_rows
+ }
+ DataFormatConverter::F16(s) => {
+ convert!(s, f16);
+ &output_rows
+ }
+ DataFormatConverter::None => &upsampled_rows,
+ };
+
+ let input_no_alpha = [&save_input[0], &save_input[1], &save_input[2]];
+ let input_alpha = [
+ &save_input[0],
+ &save_input[1],
+ &save_input[2],
+ &constant_alpha,
+ ];
+
+ save_stage.save_lowmem(
+ if color_type.has_alpha() {
+ &input_alpha
+ } else {
+ &input_no_alpha
+ },
+ output_buffers,
+ upsampled_rect.size,
+ uy,
+ upsampled_rect.origin,
+ full_size,
+ (0, 0),
+ )?;
+ }
+ }
+
+ Ok(())
+ }
+
+ pub fn maybe_preview_lf_frame(
+ &mut self,
+ pixel_format: &JxlPixelFormat,
+ output_buffers: &mut [JxlOutputBuffer<'_>],
+ changed_regions: Option<&[Rect]>,
+ output_profile: &JxlColorProfile,
+ ) -> Result<()> {
+ if self.header.needs_blending() {
+ return Ok(());
+ }
+ if !((self.header.has_lf_frame() && self.header.frame_type == FrameType::RegularFrame)
+ || (self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1))
+ {
+ return Ok(());
+ }
+
+ let output_color_info = OutputColorInfo::from_header(&self.decoder_state.file_header)?;
+
+ let Some(output_tf) = output_profile.transfer_function().map(|tf| {
+ TransferFunction::from_api_tf(
+ tf,
+ output_color_info.intensity_target,
+ output_color_info.luminances,
+ )
+ }) else {
+ return Ok(());
+ };
+
+ if output_tf.is_linear() {
+ return Ok(());
+ }
+
+ let image_metadata = &self.decoder_state.file_header.image_metadata;
+ if !image_metadata.xyb_encoded || !image_metadata.extra_channel_info.is_empty() {
+ // We only render LF frames for XYB VarDCT images with no extra channels.
+ // TODO(veluca): we might want to relax this to "no alpha".
+ return Ok(());
+ }
+ let color_type = pixel_format.color_type;
+ let data_format = pixel_format.color_data_format.unwrap();
+ if pixel_format.color_data_format.is_none()
+ || output_buffers.is_empty()
+ || !matches!(
+ color_type,
+ JxlColorType::Rgb | JxlColorType::Rgba | JxlColorType::Bgr | JxlColorType::Bgra,
+ )
+ {
+ // We only render color data, and only to 3- or 4- channel output buffers.
+ return Ok(());
+ }
+ // We already have a fully-rendered frame and we are not requesting to re-render
+ // specific regions.
+ if self.decoder_state.lf_frame_was_rendered && changed_regions.is_none() {
+ return Ok(());
+ }
+ if changed_regions.is_none() {
+ self.decoder_state.lf_frame_was_rendered = true;
+ }
+
+ let sz = &self.decoder_state.file_header.size;
+ let xsize = sz.xsize() as usize;
+ let ysize = sz.ysize() as usize;
+
+ let mut regions_storage;
+
+ let regions = if let Some(regions) = changed_regions {
+ regions
+ } else {
+ regions_storage = vec![];
+ for i in (0..xsize.div_ceil(8)).step_by(256) {
+ let x0 = i;
+ let x1 = (i + 256).min(xsize.div_ceil(8));
+ regions_storage.push(Rect {
+ origin: (x0, 0),
+ size: (x1 - x0, ysize.div_ceil(8)),
+ });
+ }
+ &regions_storage[..]
+ };
+
+ let orientation = image_metadata.orientation;
+ let info = SaveStageBufferInfo {
+ downsample: (0, 0),
+ orientation,
+ byte_size: data_format.bytes_per_sample() * color_type.samples_per_pixel(),
+ after_extend: false,
+ };
+ let info = [Some(info)];
+ let mut bufs = [Some(JxlOutputBuffer::reborrow(&mut output_buffers[0]))];
+ let mut bufs = BufferSplitter::new(&mut bufs);
+ for r in regions {
+ let upsampled_rect = Rect {
+ size: (r.size.0 * 8, r.size.1 * 8),
+ origin: (r.origin.0 * 8, r.origin.1 * 8),
+ };
+ let upsampled_rect = upsampled_rect.clip((xsize, ysize));
+ let mut bufs = bufs.get_local_buffers(
+ &info,
+ upsampled_rect,
+ false,
+ (xsize, ysize),
+ (xsize, ysize),
+ (0, 0),
+ );
+ self.render_lf_frame_rect(
+ color_type,
+ data_format,
+ *r,
+ upsampled_rect,
+ orientation,
+ &mut bufs,
+ (xsize, ysize),
+ &output_color_info,
+ &output_tf,
+ )?;
+ }
+
+ Ok(())
+ }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
index 7425d87fe19f3..61d285d66e29e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs
@@ -3,7 +3,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-use std::sync::Arc;
+use std::{collections::BTreeSet, sync::Arc};
use crate::{
entropy_coding::decode::Histograms,
@@ -12,7 +12,7 @@ use crate::{
headers::{
FileHeader,
extra_channels::ExtraChannelInfo,
- frame_header::{Encoding, FrameHeader},
+ frame_header::{Encoding, FrameHeader, FrameType},
permutation::Permutation,
toc::Toc,
},
@@ -26,12 +26,16 @@ use modular::{FullModularImage, Tree};
use quant_weights::DequantMatrices;
use quantizer::{LfQuantFactors, QuantizerParams};
+use crate::features::epf::SigmaSource;
+use crate::util::AtomicRefCell;
+
mod adaptive_lf_smoothing;
mod block_context_map;
mod coeff_order;
pub mod color_correlation_map;
pub mod decode;
mod group;
+pub mod lf_preview;
pub mod modular;
mod quant_weights;
pub mod quantizer;
@@ -45,16 +49,15 @@ pub enum Section {
Hf { group: usize, pass: usize },
}
+#[derive(Debug)]
pub struct LfGlobalState {
- patches: Option<Arc<PatchesDictionary>>,
- splines: Option<Splines>,
- noise: Option<Noise>,
lf_quant: LfQuantFactors,
pub quant_params: Option<QuantizerParams>,
block_context_map: Option<BlockContextMap>,
color_correlation_params: Option<ColorCorrelationParams>,
tree: Option<Tree>,
modular_global: FullModularImage,
+ total_bits_read: usize,
}
pub struct PassState {
@@ -113,10 +116,7 @@ impl ReferenceFrame {
pub struct DecoderState {
pub(super) file_header: FileHeader,
pub(super) reference_frames: Arc<[Option<ReferenceFrame>; Self::MAX_STORED_FRAMES]>,
- pub(super) lf_frames: [Option<[Image<f32>; 3]>; 4],
- // TODO(veluca): do we really need this? ISTM it could be achieved by passing None for all the
- // buffers, and it's not clear to me what use the decoder can make of it.
- pub enable_output: bool,
+ pub(super) lf_frames: [Option<[Image<f32>; 3]>; Self::NUM_LF_FRAMES],
pub render_spotcolors: bool,
#[cfg(test)]
pub use_simple_pipeline: bool,
@@ -124,17 +124,21 @@ pub struct DecoderState {
pub nonvisible_frame_index: usize,
pub high_precision: bool,
pub premultiply_output: bool,
+ // Whether the latest level 1 LF frame was fully rendered.
+ // If this is set to `true`, early flushing in the main frame
+ // (before HF is available) will do nothing.
+ pub lf_frame_was_rendered: bool,
}
impl DecoderState {
pub const MAX_STORED_FRAMES: usize = 4;
+ pub const NUM_LF_FRAMES: usize = 4;
pub fn new(file_header: FileHeader) -> Self {
Self {
file_header,
reference_frames: Arc::new([None, None, None, None]),
- lf_frames: [None, None, None, None],
- enable_output: true,
+ lf_frames: std::array::from_fn(|_| None),
render_spotcolors: true,
#[cfg(test)]
use_simple_pipeline: false,
@@ -142,6 +146,7 @@ impl DecoderState {
nonvisible_frame_index: 0,
high_precision: false,
premultiply_output: false,
+ lf_frame_was_rendered: false,
}
}
@@ -169,6 +174,14 @@ pub struct HfMetadata {
used_hf_types: u32,
}
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum RenderUnit {
+ /// VarDCT data
+ VarDCT,
+ /// Modular channel with the given index
+ Modular(usize),
+}
+
pub struct Frame {
header: FrameHeader,
toc: Toc,
@@ -187,9 +200,21 @@ pub struct Frame {
render_pipeline: Option<Box<crate::render::LowMemoryRenderPipeline>>,
reference_frame_data: Option<Vec<Image<f32>>>,
lf_frame_data: Option<[Image<f32>; 3]>,
- lf_global_was_rendered: bool,
+ was_flushed_once: bool,
/// Reusable buffers for VarDCT group decoding.
vardct_buffers: Option<group::VarDctBuffers>,
+ // Last pass rendered so far for each HF group.
+ last_rendered_pass: Vec<Option<usize>>,
+ // Groups that should be rendered on the next call to flush().
+ groups_to_flush: BTreeSet<usize>,
+ changed_since_last_flush: BTreeSet<(usize, RenderUnit)>,
+ incomplete_groups: usize,
+ patches: Arc<AtomicRefCell<PatchesDictionary>>,
+ splines: Arc<AtomicRefCell<Splines>>,
+ noise: Arc<AtomicRefCell<Noise>>,
+ lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+ color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+ epf_sigma: Arc<AtomicRefCell<SigmaSource>>,
}
impl Frame {
@@ -221,6 +246,25 @@ impl Frame {
}
}
+ pub fn can_do_early_rendering(&self) -> bool {
+ if matches!(
+ self.header.frame_type,
+ FrameType::ReferenceOnly | FrameType::SkipProgressive
+ ) {
+ return false;
+ }
+ if self.header.has_lf_frame() {
+ return true;
+ }
+ if self.header.encoding == Encoding::VarDCT {
+ return false;
+ }
+ self.lf_global
+ .as_ref()
+ .map(|x| x.modular_global.can_do_early_partial_render())
+ .unwrap_or_default()
+ }
+
pub fn finalize_lf(&mut self) -> Result<()> {
if self.header.should_do_adaptive_lf_smoothing() {
let lf_global = self.lf_global.as_mut().unwrap();
@@ -295,14 +339,14 @@ mod test {
bytes: &[u8],
verify: impl Fn(&Frame, usize) -> Result<()> + 'static,
) -> Result<usize> {
- crate::api::tests::decode(bytes, usize::MAX, false, Some(Box::new(verify))).map(|x| x.0)
+ crate::api::tests::decode(bytes, usize::MAX, false, false, Some(Box::new(verify)))
+ .map(|x| x.0)
}
#[test]
fn splines() -> Result<(), Error> {
let verify_frame = move |frame: &Frame, _| {
- let lf_global = frame.lf_global.as_ref().unwrap();
- let splines = lf_global.splines.as_ref().unwrap();
+ let splines = frame.splines.borrow();
assert_eq!(splines.quantization_adjustment, 0);
let expected_starting_points = [Point { x: 9.0, y: 54.0 }].to_vec();
assert_eq!(splines.starting_points, expected_starting_points);
@@ -361,8 +405,7 @@ mod test {
#[test]
fn noise() -> Result<(), Error> {
let verify_frame = |frame: &Frame, _| {
- let lf_global = frame.lf_global.as_ref().unwrap();
- let noise = lf_global.noise.as_ref().unwrap();
+ let noise = frame.noise.borrow();
let want_noise = [
0.000000, 0.000977, 0.002930, 0.003906, 0.005859, 0.006836, 0.008789, 0.010742,
];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
index 4e3b4569ec8cf..29c93efa42e57 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs
@@ -18,7 +18,6 @@ pub fn with_buffers<T>(
buffers: &[ModularBufferInfo],
indices: &[usize],
grid: usize,
- skip_empty: bool,
f: impl FnOnce(Vec<&mut ModularChannel>) -> Result<T>,
) -> Result<T> {
let mut bufs = vec![];
@@ -36,10 +35,12 @@ pub fn with_buffers<T>(
});
}
- // Skip zero-sized buffers when decoding - they don't contribute to the bitstream.
- // This matches libjxl's behavior in DecodeGroup where zero-sized rects are skipped.
- // The buffer is still allocated above so transforms can access it.
- if skip_empty && (b.size.0 == 0 || b.size.1 == 0) {
+ // Skip zero-sized *tiles*.
+ //
+ // Note that some bitstreams can contain channels with one dimension being 0 (e.g. palette
+ // meta-channel with 0 colors has size (0, 3)). Those must still participate in channel
+ // numbering (but carry no entropy-coded pixels), so we only skip when both dimensions are 0.
+ if b.size.0 == 0 && b.size.1 == 0 {
continue;
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
index 930603f1f3f58..b2cc596252f6c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs
@@ -20,6 +20,7 @@ pub fn decode_modular_subbitstream(
header: Option<GroupHeader>,
global_tree: &Option<Tree>,
br: &mut BitReader,
+ partial_decoded_buffers: Option<&mut usize>,
) -> Result<()> {
// Skip decoding if all grids are zero-sized.
let is_empty = buffers
@@ -80,7 +81,21 @@ pub fn decode_modular_subbitstream(
let mut reader = SymbolReader::new(&tree.histograms, br, Some(image_width))?;
for i in 0..buffers.len() {
- decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br)?;
+ // Keep channel numbering stable, but skip actually decoding empty channels.
+ // This matches libjxl, which continues the loop without renumbering.
+ let (w, h) = buffers[i].data.size();
+ if w == 0 || h == 0 {
+ continue;
+ }
+ if let Err(e) =
+ decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br)
+ {
+ if let Some(p) = partial_decoded_buffers {
+ buffers[i].data.fill(0);
+ *p = i;
+ }
+ return Err(e);
+ }
}
reader.check_final_state(&tree.histograms, br)?;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
index b9190ce996269..398eb204c491b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs
@@ -189,13 +189,16 @@ pub(super) fn decode_modular_channel(
let special_tree = specialize_tree(tree, chan, stream_id, size.0, header)?;
match special_tree {
+ TreeSpecialCase::NoTree(t) => {
+ decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
+ }
TreeSpecialCase::NoWp(t) => {
decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
}
- TreeSpecialCase::WpOnly(t) => {
+ TreeSpecialCase::WpOnlyConfig420(t) => {
decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
}
- TreeSpecialCase::GradientLookup(t) => {
+ TreeSpecialCase::GradientLookupConfig420(t) => {
decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
}
TreeSpecialCase::SingleGradientOnly(t) => {
@@ -204,5 +207,6 @@ pub(super) fn decode_modular_channel(
TreeSpecialCase::General(t) => {
decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms)
}
- }
+ }?;
+ br.check_for_error()
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
index ff7d2263a7a91..8ffebfe8a8720 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs
@@ -86,7 +86,7 @@ impl ModularChannelDecoder for NoWpTree {
&self.references,
&mut self.property_buffer,
);
- let dec = reader.read_signed(histograms, br, prediction_result.context as usize);
+ let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize);
make_pixel(dec, prediction_result.multiplier, prediction_result.guess)
}
}
@@ -140,7 +140,7 @@ impl ModularChannelDecoder for GeneralTree {
&self.no_wp_tree.references,
&mut self.no_wp_tree.property_buffer,
);
- let dec = reader.read_signed(histograms, br, prediction_result.context as usize);
+ let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize);
let val = make_pixel(dec, prediction_result.multiplier, prediction_result.guess);
self.wp_state.update_errors(val, pos, xsize);
val
@@ -152,12 +152,7 @@ const LUT_MIN_SPLITVAL: i32 = -1024;
const LUT_TABLE_SIZE: usize = (LUT_MAX_SPLITVAL - LUT_MIN_SPLITVAL + 1) as usize;
const _: () = assert!(LUT_TABLE_SIZE.is_power_of_two());
-pub struct WpOnlyLookup {
- lut: [u8; LUT_TABLE_SIZE], // Lookup (wp value -> *clustered* context id)
- wp_state: WeightedPredictorState,
-}
-
-fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE_SIZE]> {
+fn make_lut(tree: &[TreeNode]) -> Option<[u8; LUT_TABLE_SIZE]> {
struct RangeAndNode {
range: Range<i32>,
node: u32,
@@ -198,8 +193,7 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE
}
let start = range.start - LUT_MIN_SPLITVAL;
let end = range.end - LUT_MIN_SPLITVAL;
- ans[start as usize..end as usize]
- .fill(histograms.map_context_to_cluster(id as usize) as u8);
+ ans[start as usize..end as usize].fill(id as u8);
}
}
}
@@ -207,20 +201,30 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE
Some(ans)
}
-impl WpOnlyLookup {
+/// Specialized WpOnlyLookup for when all HybridUint configs are 420
+/// This allows using the fast-path entropy decoder
+pub struct WpOnlyLookupConfig420 {
+ lut: [u8; LUT_TABLE_SIZE],
+ wp_state: WeightedPredictorState,
+}
+
+impl WpOnlyLookupConfig420 {
fn new(
tree: &[TreeNode],
histograms: &Histograms,
header: &GroupHeader,
xsize: usize,
) -> Option<Self> {
+ if !histograms.can_use_config_420_fast_path() {
+ return None;
+ }
let wp_state = WeightedPredictorState::new(&header.wp_header, xsize);
- let lut = make_lut(tree, histograms)?;
+ let lut = make_lut(tree)?;
Some(Self { lut, wp_state })
}
}
-impl ModularChannelDecoder for WpOnlyLookup {
+impl ModularChannelDecoder for WpOnlyLookupConfig420 {
const NEEDS_TOP: bool = true;
const NEEDS_TOPTOP: bool = true;
@@ -243,25 +247,30 @@ impl ModularChannelDecoder for WpOnlyLookup {
.predict_and_property(pos, xsize, &prediction_data);
let ctx = self.lut[(property as i64 - LUT_MIN_SPLITVAL as i64)
.clamp(0, LUT_TABLE_SIZE as i64 - 1) as usize];
- let dec = reader.read_signed_clustered(histograms, br, ctx as usize);
+ // Use the specialized 420 fast path
+ let dec = reader.read_signed_clustered_config_420(histograms, br, ctx as usize);
let val = dec.wrapping_add(wp_pred as i32);
self.wp_state.update_errors(val, pos, xsize);
val
}
}
-/// Fast path for trees that split only on property 9 (gradient: left + top - topleft)
-/// with Gradient predictor, offset=0, multiplier=1.
-/// Maps property 9 values directly to cluster IDs via a LUT.
-/// This targets libjxl effort 2 encoding.
-pub struct GradientLookup {
- lut: [u8; LUT_TABLE_SIZE],
-}
-
/// Property 9 is the "gradient property": left + top - topleft
const GRADIENT_PROPERTY: u8 = 9;
-fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<GradientLookup> {
+/// Config 420 specialized version of gradient lookup for trees that split only on property 9.
+/// This uses the specialized entropy decoder for config 420 + no LZ77.
+pub struct GradientLookupConfig420 {
+ lut: [u8; LUT_TABLE_SIZE],
+}
+
+fn make_gradient_lut_config_420(
+ tree: &[TreeNode],
+ histograms: &Histograms,
+) -> Option<GradientLookupConfig420> {
+ if !histograms.can_use_config_420_fast_path() {
+ return None;
+ }
// Verify all splits are on property 9 and all leaves have Gradient predictor
for node in tree {
match node {
@@ -278,12 +287,11 @@ fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<Gradi
}
}
- // Use existing make_lut which handles offset=0, multiplier=1 checks
- let lut = make_lut(tree, histograms)?;
- Some(GradientLookup { lut })
+ let lut = make_lut(tree)?;
+ Some(GradientLookupConfig420 { lut })
}
-impl ModularChannelDecoder for GradientLookup {
+impl ModularChannelDecoder for GradientLookupConfig420 {
const NEEDS_TOP: bool = true;
const NEEDS_TOPTOP: bool = false;
@@ -314,13 +322,14 @@ impl ModularChannelDecoder for GradientLookup {
prediction_data.topleft as i64,
);
- let dec = reader.read_signed_clustered(histograms, br, cluster as usize);
+ // Use the specialized config 420 fast path
+ let dec = reader.read_signed_clustered_config_420(histograms, br, cluster as usize);
dec.wrapping_add(pred as i32)
}
}
pub struct SingleGradientOnly {
- ctx: usize,
+ clustered_ctx: usize,
}
impl ModularChannelDecoder for SingleGradientOnly {
@@ -340,16 +349,42 @@ impl ModularChannelDecoder for SingleGradientOnly {
histograms: &Histograms,
) -> i32 {
let pred = Predictor::Gradient.predict_one(prediction_data, 0);
- let dec = reader.read_signed(histograms, br, self.ctx);
+ let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx);
make_pixel(dec, 1, pred)
}
}
+pub struct NoTree {
+ clustered_ctx: usize,
+}
+
+impl ModularChannelDecoder for NoTree {
+ const NEEDS_TOP: bool = false;
+ const NEEDS_TOPTOP: bool = false;
+
+ fn init_row(&mut self, _: &mut [&mut ModularChannel], _: usize, _: usize) {}
+
+ #[inline(always)]
+ fn decode_one(
+ &mut self,
+ _: PredictionData,
+ _: (usize, usize),
+ _: usize,
+ reader: &mut SymbolReader,
+ br: &mut BitReader,
+ histograms: &Histograms,
+ ) -> i32 {
+ let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx);
+ make_pixel(dec, 1, 0)
+ }
+}
+
#[allow(clippy::large_enum_variant)]
pub enum TreeSpecialCase {
+ NoTree(NoTree),
NoWp(NoWpTree),
- WpOnly(WpOnlyLookup),
- GradientLookup(GradientLookup),
+ WpOnlyConfig420(WpOnlyLookupConfig420),
+ GradientLookupConfig420(GradientLookupConfig420),
SingleGradientOnly(SingleGradientOnly),
General(GeneralTree),
}
@@ -372,9 +407,10 @@ pub fn specialize_tree(
let mut uses_non_wp = false;
// Obtain a pruned tree without nodes that are not relevant in the current channel and stream.
- // Proceed in BFS order, so that we know that the children of anode will be adjacent.
+ // Proceed in BFS order, so that we know that the children of a node will be adjacent.
+ // Also re-maps context IDs to cluster IDs.
while let Some(v) = queue.pop_front() {
- let node = tree.nodes[v as usize];
+ let mut node = tree.nodes[v as usize];
match node {
TreeNode::Split {
property,
@@ -409,11 +445,29 @@ pub fn specialize_tree(
TreeNode::Leaf { predictor, .. } => {
uses_wp |= predictor == Predictor::Weighted;
uses_non_wp |= predictor != Predictor::Weighted;
+ let TreeNode::Leaf { id, .. } = &mut node else {
+ unreachable!()
+ };
+ *id = tree.histograms.map_context_to_cluster(*id as usize) as u32;
pruned_tree.push(node);
}
}
}
+ if let [
+ TreeNode::Leaf {
+ predictor: Predictor::Zero,
+ multiplier: 1,
+ offset: 0,
+ id,
+ },
+ ] = &*pruned_tree
+ {
+ return Ok(TreeSpecialCase::NoTree(NoTree {
+ clustered_ctx: *id as usize,
+ }));
+ }
+
if let [
TreeNode::Leaf {
predictor: Predictor::Gradient,
@@ -424,20 +478,23 @@ pub fn specialize_tree(
] = &*pruned_tree
{
return Ok(TreeSpecialCase::SingleGradientOnly(SingleGradientOnly {
- ctx: *id as usize,
+ clustered_ctx: *id as usize,
}));
}
- if !uses_non_wp
- && let Some(wp) = WpOnlyLookup::new(&pruned_tree, &tree.histograms, header, xsize)
- {
- return Ok(TreeSpecialCase::WpOnly(wp));
+ if !uses_non_wp {
+ // Try the specialized 420 config version (fast path for effort 3 encoded images)
+ if let Some(wp) = WpOnlyLookupConfig420::new(&pruned_tree, &tree.histograms, header, xsize)
+ {
+ return Ok(TreeSpecialCase::WpOnlyConfig420(wp));
+ }
}
- // Try gradient LUT for non-WP trees (targets effort 2 encoding)
+ // Non-WP trees (includes effort 2 encoding and some groups in effort > 3)
if !uses_wp {
- if let Some(gl) = make_gradient_lut(&pruned_tree, &tree.histograms) {
- return Ok(TreeSpecialCase::GradientLookup(gl));
+ // Try config 420 specialized gradient LUT version (fast path for effort 2 encoded images)
+ if let Some(gl) = make_gradient_lut_config_420(&pruned_tree, &tree.histograms) {
+ return Ok(TreeSpecialCase::GradientLookupConfig420(gl));
}
return Ok(TreeSpecialCase::NoWp(NoWpTree::new(
pruned_tree,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
index 9537774e4f530..c5d665f328b9e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs
@@ -3,7 +3,13 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-use std::{cmp::min, fmt::Debug};
+use std::{
+ cmp::min,
+ collections::{BTreeMap, BTreeSet},
+ fmt::Debug,
+ ops::Range,
+ sync::atomic::{AtomicUsize, Ordering},
+};
use crate::{
bit_reader::BitReader,
@@ -14,11 +20,13 @@ use crate::{
quantizer::{self, LfQuantFactors, QuantizerParams},
},
headers::{
- ImageMetadata, JxlHeader, bit_depth::BitDepth, frame_header::FrameHeader,
- modular::GroupHeader,
+ ImageMetadata, JxlHeader,
+ bit_depth::BitDepth,
+ frame_header::FrameHeader,
+ modular::{GroupHeader, TransformId},
},
image::{Image, Rect},
- util::{AtomicRefCell, CeilLog2, tracing_wrappers::*},
+ util::{AtomicRefCell, CeilLog2, SmallVec, tracing_wrappers::*},
};
use jxl_transforms::transform_map::*;
@@ -41,8 +49,8 @@ const IMAGE_OFFSET: (usize, usize) = (2, 2);
#[derive(Clone, PartialEq, Eq, Copy)]
struct ChannelInfo {
- // The index of the output channel in the render pipeline, or -1 for non-output channels.
- output_channel_idx: isize,
+ // The index of the output channel in the render pipeline.
+ output_channel_idx: Option<usize>,
// width, height
size: (usize, usize),
shift: Option<(usize, usize)>, // None for meta-channels
@@ -58,8 +66,8 @@ impl Debug for ChannelInfo {
write!(f, "(meta)")?;
}
write!(f, "{:?}", self.bit_depth)?;
- if self.output_channel_idx >= 0 {
- write!(f, "(output channel {})", self.output_channel_idx)?;
+ if let Some(oc) = self.output_channel_idx {
+ write!(f, "(output channel {})", oc)?;
}
Ok(())
}
@@ -162,7 +170,7 @@ impl ModularChannel {
fn channel_info(&self) -> ChannelInfo {
ChannelInfo {
- output_channel_idx: -1,
+ output_channel_idx: None,
size: self.data.size(),
shift: self.shift,
bit_depth: self.bit_depth,
@@ -170,6 +178,10 @@ impl ModularChannel {
}
}
+const BUFFER_STATUS_NOT_RENDERED: usize = 0;
+const BUFFER_STATUS_PARTIAL_RENDER: usize = 1;
+const BUFFER_STATUS_FINAL_RENDER: usize = 2;
+
// Note: this type uses interior mutability to get mutable references to multiple buffers at once.
// In principle, this is not needed, but the overhead should be minimal so using `unsafe` here is
// probably not worth it.
@@ -177,34 +189,81 @@ impl ModularChannel {
struct ModularBuffer {
data: AtomicRefCell<Option<ModularChannel>>,
// Number of times this buffer will be used, *including* when it is used for output.
- remaining_uses: usize,
- used_by_transforms: Vec<usize>,
+ remaining_uses: AtomicUsize,
+ // Transform steps that "strongly" or "weakly" use the image data in this buffer.
+ // A "strong" usage always triggers a re-render if the image data changes.
+ // A "weak" usage only triggers a re-render if the buffer is final, or if the
+ // current re-render was not only caused by weak re-renders.
+ used_by_transforms_strong: Vec<usize>,
+ used_by_transforms_weak: Vec<usize>,
size: (usize, usize),
+ status: AtomicUsize,
}
impl ModularBuffer {
+ fn get_status(&self) -> usize {
+ self.status.load(Ordering::Relaxed)
+ }
+
+ fn set_status(&self, val: usize) {
+ self.status.store(val, Ordering::Relaxed);
+ }
+
+ // Iterator over (transform_id, is_strong_use)
+ fn users(&self, include_weak: bool) -> impl Iterator<Item = (usize, bool)> {
+ let strong = self.used_by_transforms_strong.iter().map(|x| (*x, true));
+ let weak = if include_weak {
+ &self.used_by_transforms_weak[..]
+ } else {
+ &[]
+ }
+ .iter()
+ .map(|x| (*x, false));
+ strong.chain(weak)
+ }
+
// Gives out a copy of the buffer + auxiliary buffer, marking the buffer as used.
// If this was the last usage of the buffer, does not actually copy the buffer.
- fn get_buffer(&mut self) -> Result<ModularChannel> {
- self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap();
- if self.remaining_uses == 0 {
- Ok(self.data.borrow_mut().take().unwrap())
- } else {
- Ok(self
- .data
- .borrow()
- .as_ref()
- .map(ModularChannel::try_clone)
- .transpose()?
- .unwrap())
+ fn get_buffer(&self, can_consume: bool) -> Result<ModularChannel> {
+ if !can_consume {
+ return ModularChannel::try_clone(self.data.borrow().as_ref().unwrap());
}
+ let mut ret = None;
+ let _ = self.remaining_uses.fetch_update(
+ Ordering::Release,
+ Ordering::Acquire,
+ |remaining_pre| {
+ let remaining = remaining_pre.checked_sub(1).unwrap();
+ if ret.is_none() {
+ if remaining == 0 {
+ ret = Some(Ok(self.data.borrow_mut().take().unwrap()))
+ } else {
+ ret = self.data.borrow().as_ref().map(ModularChannel::try_clone);
+ }
+ } else if remaining == 0 {
+ *self.data.borrow_mut() = None;
+ }
+ Some(remaining)
+ },
+ );
+ Ok(ret.transpose()?.unwrap())
}
- fn mark_used(&mut self) {
- self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap();
- if self.remaining_uses == 0 {
- *self.data.borrow_mut() = None;
+ fn mark_used(&self, can_consume: bool) {
+ if !can_consume {
+ return;
}
+ let _ = self.remaining_uses.fetch_update(
+ Ordering::Release,
+ Ordering::Acquire,
+ |remaining_pre: usize| {
+ let remaining = remaining_pre.checked_sub(1).unwrap();
+ if remaining == 0 {
+ *self.data.borrow_mut() = None;
+ }
+ Some(remaining)
+ },
+ );
}
}
@@ -237,6 +296,7 @@ impl ModularBufferInfo {
};
self.grid_shape.0 * grid_pos.1 + grid_pos.0
}
+
fn get_grid_rect(
&self,
frame_header: &FrameHeader,
@@ -296,15 +356,41 @@ pub struct FullModularImage {
// In order, LfGlobal, LfGroup, HfGroup(pass 0), ..., HfGroup(last pass).
section_buffer_indices: Vec<Vec<usize>>,
modular_color_channels: usize,
+ can_do_partial_render: bool,
+ can_do_early_partial_render: bool,
+ decoded_section0_channels: usize,
+ needed_section0_channels_for_early_render: usize,
+ global_header: Option<GroupHeader>,
+ buffers_for_channels: Vec<usize>,
+ // Buffers to _start rendering from_ on the next call to process_output.
+ // This is initially set to LF global and LF buffers, and populated with HF buffers
+ // just before we start decoding them.
+ ready_buffers_dry_run: BTreeSet<(usize, usize)>,
+ ready_buffers: BTreeSet<(usize, usize)>,
+ // Whether each channel is used or not by the render pipeline.
+ pipeline_used_channels: Vec<bool>,
}
impl FullModularImage {
+ pub fn can_do_partial_render(&self) -> bool {
+ self.can_do_partial_render
+ }
+
+ pub fn can_do_early_partial_render(&self) -> bool {
+ self.can_do_early_partial_render
+ // Avoid green martians
+ && self.decoded_section0_channels >= self.needed_section0_channels_for_early_render
+ }
+
+ pub fn set_pipeline_used_channels(&mut self, used: &[bool]) {
+ self.pipeline_used_channels = used.to_vec();
+ }
+
#[instrument(level = "debug", skip_all)]
pub fn read(
frame_header: &FrameHeader,
image_metadata: &ImageMetadata,
modular_color_channels: usize,
- global_tree: &Option<Tree>,
br: &mut BitReader,
) -> Result<Self> {
let mut channels = vec![];
@@ -312,7 +398,7 @@ impl FullModularImage {
let shift = (frame_header.hshift(c), frame_header.vshift(c));
let size = frame_header.size();
channels.push(ChannelInfo {
- output_channel_idx: c as isize,
+ output_channel_idx: Some(c),
size: (size.0.div_ceil(1 << shift.0), size.1.div_ceil(1 << shift.1)),
shift: Some(shift),
bit_depth: image_metadata.bit_depth,
@@ -332,7 +418,7 @@ impl FullModularImage {
size.1.div_ceil(*ecups as usize),
);
channels.push(ChannelInfo {
- output_channel_idx: 3 + idx as isize,
+ output_channel_idx: Some(3 + idx),
size,
shift: Some((shift, shift)),
bit_depth: image_metadata.bit_depth,
@@ -350,12 +436,33 @@ impl FullModularImage {
transform_steps: vec![],
section_buffer_indices: vec![vec![]; 2 + frame_header.passes.num_passes as usize],
modular_color_channels,
+ can_do_partial_render: true,
+ can_do_early_partial_render: false,
+ decoded_section0_channels: 0,
+ needed_section0_channels_for_early_render: 0,
+ global_header: None,
+ buffers_for_channels: vec![],
+ ready_buffers_dry_run: BTreeSet::new(),
+ ready_buffers: BTreeSet::new(),
+ pipeline_used_channels: vec![],
});
}
trace!("reading modular header");
let header = GroupHeader::read(br)?;
+ // Disallow progressive rendering with multi-channel palette transforms
+ // or delta-palette.
+ let has_problematic_palette_transform = header.transforms.iter().any(|x| {
+ x.id == TransformId::Palette
+ && (x.num_channels > 1 || x.predictor_id != Predictor::Zero as u32)
+ });
+
+ let has_squeeze_transform = header
+ .transforms
+ .iter()
+ .any(|x| x.id == TransformId::Squeeze);
+
let (mut buffer_info, transform_steps) =
transforms::apply::meta_apply_transforms(&channels, &header)?;
@@ -460,12 +567,13 @@ impl FullModularImage {
);
for (pos, buf) in bi.buffer_grid.iter().enumerate() {
trace!(
- "Channel {i} grid {pos} ({}, {}) size: {:?}, uses: {}, used_by: {:?}",
+ "Channel {i} grid {pos} ({}, {}) size: {:?}, uses: {:?}, used_by: s {:?} w {:?}",
pos % bi.grid_shape.0,
pos / bi.grid_shape.0,
buf.size,
buf.remaining_uses,
- buf.used_by_transforms
+ buf.used_by_transforms_strong,
+ buf.used_by_transforms_weak,
);
}
}
@@ -475,24 +583,105 @@ impl FullModularImage {
trace!("Transform {i}: {ts:?}");
}
- with_buffers(&buffer_info, &section_buffer_indices[0], 0, true, |bufs| {
- decode_modular_subbitstream(
- bufs,
- ModularStreamId::GlobalData.get_id(frame_header),
- Some(header),
- global_tree,
- br,
- )
- })?;
+ let mut buffers_for_channels = vec![];
+
+ for (i, c) in buffer_info.iter().enumerate() {
+ if let Some(c) = c.info.output_channel_idx {
+ if buffers_for_channels.len() <= c {
+ buffers_for_channels.resize(c + 1, 0);
+ }
+ buffers_for_channels[c] = i;
+ }
+ }
+
+ let num_meta_channels = buffer_info
+ .iter()
+ .filter(|b| b.coded_channel_id >= 0 && b.info.is_meta())
+ .count();
Ok(FullModularImage {
buffer_info,
transform_steps,
section_buffer_indices,
modular_color_channels,
+ can_do_partial_render: !has_problematic_palette_transform,
+ can_do_early_partial_render: !has_problematic_palette_transform
+ && has_squeeze_transform,
+ decoded_section0_channels: 0,
+ needed_section0_channels_for_early_render: buffers_for_channels.len()
+ + num_meta_channels,
+ global_header: Some(header),
+ buffers_for_channels,
+ ready_buffers_dry_run: BTreeSet::new(),
+ ready_buffers: BTreeSet::new(),
+ pipeline_used_channels: vec![],
})
}
+ pub fn read_section0(
+ &mut self,
+ frame_header: &FrameHeader,
+ global_tree: &Option<Tree>,
+ br: &mut BitReader,
+ allow_partial: bool,
+ ) -> Result<()> {
+ let mut decoded_if_partial = 0;
+ let ret = with_buffers(
+ &self.buffer_info,
+ &self.section_buffer_indices[0],
+ 0,
+ |bufs| {
+ decode_modular_subbitstream(
+ bufs,
+ ModularStreamId::GlobalData.get_id(frame_header),
+ self.global_header.clone(),
+ global_tree,
+ br,
+ Some(&mut decoded_if_partial),
+ )
+ },
+ );
+
+ match (ret, allow_partial) {
+ (Ok(_), _) => {
+ // Decoded section completely.
+ self.decoded_section0_channels = self.section_buffer_indices[0].len();
+ }
+ (Err(_), true) => {
+ self.decoded_section0_channels = decoded_if_partial;
+ }
+ (Err(e), false) => {
+ return Err(e);
+ }
+ }
+
+ for b in self.section_buffer_indices[0]
+ .iter()
+ .take(self.decoded_section0_channels)
+ {
+ if self.buffer_info[*b].buffer_grid[0].get_status() == BUFFER_STATUS_FINAL_RENDER {
+ continue;
+ }
+ // If we did a partial decode, we cannot be 100% sure of whether we correctly
+ // decoded all the sections. Thus, mark the sections as partially decoded.
+ self.buffer_info[*b].buffer_grid[0].set_status(if allow_partial {
+ BUFFER_STATUS_PARTIAL_RENDER
+ } else {
+ BUFFER_STATUS_FINAL_RENDER
+ });
+ self.ready_buffers_dry_run.insert((*b, 0));
+ }
+
+ Ok(())
+ }
+
+ pub fn mark_group_to_be_read(&mut self, section_id: usize, group: usize) {
+ for b in self.section_buffer_indices[section_id].iter() {
+ self.buffer_info[*b].buffer_grid[group].set_status(BUFFER_STATUS_FINAL_RENDER);
+ self.ready_buffers_dry_run.insert((*b, group));
+ }
+ }
+
#[allow(clippy::type_complexity)]
#[instrument(level = "debug", skip(self, frame_header, global_tree, br), ret)]
pub fn read_stream(
@@ -520,7 +709,6 @@ impl FullModularImage {
&self.buffer_info,
&self.section_buffer_indices[section_id],
grid,
- true,
|bufs| {
decode_modular_subbitstream(
bufs,
@@ -528,60 +716,247 @@ impl FullModularImage {
None,
global_tree,
br,
- )
+ None,
+ )?;
+ Ok(())
},
)?;
+
+ Ok(())
+ }
+
+ fn maybe_output(
+ &self,
+ buf: usize,
+ grid: usize,
+ dry_run: bool,
+ pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>,
+ ) -> Result<()> {
+ if let Some(chan) = self.buffer_info[buf].info.output_channel_idx {
+ let is_final =
+ self.buffer_info[buf].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER;
+ let all_final = self.buffers_for_channels.iter().all(|x| {
+ self.buffer_info[*x].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER
+ });
+ let channels: SmallVec<usize, 3> = if chan == 0 && self.modular_color_channels == 1 {
+ (0..3).filter(|x| self.pipeline_used_channels[*x]).collect()
+ } else {
+ self.pipeline_used_channels[chan]
+ .then_some(chan)
+ .into_iter()
+ .collect()
+ };
+ if channels.is_empty() {
+ return Ok(());
+ }
+ if dry_run {
+ for c in channels.iter() {
+ pass_to_pipeline(*c, grid, is_final, None)?;
+ }
+ } else {
+ debug!("Rendering channel {chan:?}, grid position {grid}");
+ let buf = self.buffer_info[buf].buffer_grid[grid].get_buffer(all_final)?;
+ for c in channels[1..].iter() {
+ pass_to_pipeline(*c, grid, is_final, Some(buf.data.try_clone()?))?;
+ }
+ pass_to_pipeline(channels[0], grid, is_final, Some(buf.data))?;
+ }
+ }
Ok(())
}
+ // If `dry_run` is true, this call does not modify any state, and the calls to `pass_to_pipeline`
+ // will have None as an image. Otherwise, the image will always be `Some(..)`.
+ // It is *required* to do a dry run before doing an actual run after any event that might have
+ // readied some buffers.
pub fn process_output(
&mut self,
- section_id: usize,
- grid: usize,
frame_header: &FrameHeader,
- pass_to_pipeline: &mut dyn FnMut(usize, usize, usize, Image<i32>) -> Result<()>,
+ dry_run: bool,
+ pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>,
) -> Result<()> {
- let mut maybe_output = |bi: &mut ModularBufferInfo, grid: usize| -> Result<()> {
- if bi.info.output_channel_idx >= 0 {
- let chan = bi.info.output_channel_idx as usize;
- debug!("Rendering channel {chan:?}, grid position {grid}");
- let buf = bi.buffer_grid[grid].get_buffer()?;
- // TODO(veluca): figure out what to do with passes here.
- if chan == 0 && self.modular_color_channels == 1 {
- for i in 0..2 {
- pass_to_pipeline(i, grid, 1, buf.data.try_clone()?)?;
- }
- pass_to_pipeline(2, grid, 1, buf.data)?;
+ // TODO(veluca): consider using `used_channel_mask` to avoid running transforms that produce
+ // channels that are not used.
+
+ // layer -> (transform -> is_strong)
+ let mut to_process_by_layer = BTreeMap::<usize, BTreeMap<usize, bool>>::new();
+ let mut buffers_to_output = vec![];
+
+ let ready_buffers = if dry_run {
+ std::mem::take(&mut self.ready_buffers_dry_run)
+ } else {
+ assert!(self.ready_buffers_dry_run.is_empty());
+ std::mem::take(&mut self.ready_buffers)
+ };
+
+ for (buf, grid) in ready_buffers {
+ if self.buffer_info[buf].info.output_channel_idx.is_some() {
+ buffers_to_output.push((buf, grid));
+ }
+ for (t, is_strong_dep) in self.buffer_info[buf].buffer_grid[grid].users(true) {
+ let layer = self.transform_steps[t].layer;
+ let layer = to_process_by_layer.entry(layer).or_default();
+ let is_strong = layer.entry(t).or_default();
+ *is_strong |= is_strong_dep;
+ }
+ if dry_run {
+ self.ready_buffers.insert((buf, grid));
+ }
+ }
+
+ // When doing a dry run, run the same logic as the real execution, but
+ // without modifying the actual buffer status -- instead, we use local
+ // overrides.
+ // This allows us to know what buffers will be produced before producing any.
+ let mut status_overrides = BTreeMap::new();
+
+ let get_status =
+ |status_overrides: &mut BTreeMap<(usize, usize), usize>, b: usize, g: usize| {
+ if let Some(s) = status_overrides.get(&(b, g)) {
+ *s
} else {
- pass_to_pipeline(chan, grid, 1, buf.data)?;
+ self.buffer_info[b].buffer_grid[g].get_status()
+ }
+ };
+
+ let mut new_dirty_transforms = vec![];
+ while let Some((_, transforms)) = to_process_by_layer.pop_first() {
+ trace!("{transforms:?}");
+ for (t, is_strong) in transforms {
+ let tfm = &self.transform_steps[t];
+ trace!("{:?}", tfm);
+
+ let dependency_status = tfm
+ .deps
+ .iter()
+ .map(|(b, g)| get_status(&mut status_overrides, *b, *g))
+ .min()
+ .unwrap_or(BUFFER_STATUS_FINAL_RENDER);
+
+ if dependency_status == BUFFER_STATUS_NOT_RENDERED {
+ continue;
+ }
+ let is_final = dependency_status == BUFFER_STATUS_FINAL_RENDER;
+
+ let mut previous_output_status = None;
+ for (b, g) in tfm.outputs(&self.buffer_info) {
+ let status = get_status(&mut status_overrides, b, g);
+ if previous_output_status.is_none() {
+ previous_output_status = Some(status);
+ }
+ assert_eq!(Some(status), previous_output_status);
+ if dry_run {
+ status_overrides.insert((b, g), dependency_status);
+ } else {
+ self.buffer_info[b].buffer_grid[g].set_status(dependency_status);
+ }
+ }
+ let previous_output_status = previous_output_status.unwrap();
+
+ if !dry_run {
+ tfm.do_run(frame_header, &self.buffer_info, is_final)?;
+ }
+
+ // If this was the first _or_ the last render, trigger a re-render across weak edges
+ // even if the render was caused by a weak edge.
+ // This is necessary to finish drawing those renders correctly.
+ let is_strong = is_strong
+ || (previous_output_status == BUFFER_STATUS_NOT_RENDERED
+ || dependency_status == BUFFER_STATUS_FINAL_RENDER);
+ for (buf, grid) in self.transform_steps[t].outputs(&self.buffer_info) {
+ if self.buffer_info[buf].info.output_channel_idx.is_some() {
+ buffers_to_output.push((buf, grid));
+ }
+ for (t, is_strong_dep) in
+ self.buffer_info[buf].buffer_grid[grid].users(is_strong)
+ {
+ new_dirty_transforms.push((t, is_strong_dep));
+ }
+ }
+ }
+
+ for (t, is_strong_dep) in new_dirty_transforms.drain(..) {
+ let layer = self.transform_steps[t].layer;
+ let layer = to_process_by_layer.entry(layer).or_default();
+ let is_strong = layer.entry(t).or_default();
+ *is_strong |= is_strong_dep;
+ }
+ }
+
+ // Pass all the output buffers to the render pipeline.
+ for (buf, grid) in buffers_to_output {
+ self.maybe_output(buf, grid, dry_run, pass_to_pipeline)?;
+ }
+
+ Ok(())
+ }
+
+ pub fn channel_range(&self) -> Range<usize> {
+ if self.modular_color_channels != 0 {
+ 0..self.buffers_for_channels.len()
+ } else {
+ // VarDCT image.
+ 3..self.buffers_for_channels.len()
+ }
+ }
+
+ pub fn flush_output(
+ &mut self,
+ group: usize,
+ chan: usize,
+ pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Image<i32>) -> Result<()>,
+ ) -> Result<()> {
+ if !self.can_do_partial_render() {
+ return Ok(());
+ }
+ let buf_idx = self.buffers_for_channels[chan];
+ // Skip channels that don't have a real buffer assignment.
+ // buffers_for_channels is zero-filled on resize, so intermediate channels
+ // (e.g. G/B when modular_color_channels==1) may alias buffer 0 incorrectly.
+ if self.buffer_info[buf_idx].info.output_channel_idx != Some(chan) {
+ return Ok(());
+ }
+ self.maybe_output(buf_idx, group, false, &mut |chan, grid, complete, img| {
+ pass_to_pipeline(chan, grid, complete, img.unwrap())
+ })
+ }
+
+ pub fn zero_fill_empty_channels(
+ &mut self,
+ num_passes: usize,
+ num_groups: usize,
+ num_lf_groups: usize,
+ ) -> Result<()> {
+ if !self.can_do_partial_render() {
+ return Ok(());
+ }
+ if self.buffer_info.is_empty() {
+ return Ok(());
+ }
+ let mut fill_buffer = |section: usize, grid| -> Result<()> {
+ // TODO(veluca): consider filling these buffers with placeholders instead of real images.
+ with_buffers(
+ &self.buffer_info,
+ &self.section_buffer_indices[section],
+ grid,
+ |_| Ok(()),
+ )?;
+ for b in self.section_buffer_indices[section].iter() {
+ if self.buffer_info[*b].buffer_grid[grid].get_status() == BUFFER_STATUS_NOT_RENDERED
+ {
+ self.buffer_info[*b].buffer_grid[grid].set_status(BUFFER_STATUS_PARTIAL_RENDER);
+ self.ready_buffers.insert((*b, grid));
}
}
Ok(())
};
-
- let mut new_ready_transform_chunks = vec![];
- for buf in self.section_buffer_indices[section_id].iter().copied() {
- maybe_output(&mut self.buffer_info[buf], grid)?;
- let new_chunks = self.buffer_info[buf].buffer_grid[grid]
- .used_by_transforms
- .to_vec();
- trace!("Buffer {buf} grid position {grid} used by chunks {new_chunks:?}");
- new_ready_transform_chunks.extend(new_chunks);
- }
-
- trace!(?new_ready_transform_chunks);
-
- while let Some(tfm) = new_ready_transform_chunks.pop() {
- trace!("tfm = {tfm} chunk = {:?}", self.transform_steps[tfm]);
- for (new_buf, new_grid) in
- self.transform_steps[tfm].dep_ready(frame_header, &mut self.buffer_info)?
- {
- maybe_output(&mut self.buffer_info[new_buf], new_grid)?;
- let new_chunks = self.buffer_info[new_buf].buffer_grid[new_grid]
- .used_by_transforms
- .to_vec();
- trace!("Buffer {new_buf} grid position {new_grid} used by chunks {new_chunks:?}");
- new_ready_transform_chunks.extend(new_chunks);
+ fill_buffer(0, 0)?;
+ for grid in 0..num_lf_groups {
+ fill_buffer(1, grid)?;
+ }
+ for pass in 0..num_passes {
+ for grid in 0..num_groups {
+ fill_buffer(2 + pass, grid)?;
}
}
@@ -733,6 +1108,7 @@ pub fn decode_vardct_lf(
None,
global_tree,
br,
+ None,
)?;
dequant_lf(
r,
@@ -780,6 +1156,7 @@ pub fn decode_hf_metadata(
None,
global_tree,
br,
+ None,
)?;
let ytox_image = &buffers[0].data;
let ytob_image = &buffers[1].data;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
index 5f1b631370267..2c74441aa0f00 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs
@@ -56,40 +56,44 @@ pub enum TransformStep {
#[derive(Debug)]
pub struct TransformStepChunk {
pub(super) step: TransformStep,
+
// Grid position this transform should produce.
// Note that this is a lie for Palette with AverageAll or Weighted, as the transform with
// position (0, y) will produce the entire row of blocks (*, y) (and there will be no
// transforms with position (x, y) with x > 0).
pub(super) grid_pos: (usize, usize),
- // Number of inputs that are not yet available.
- pub(super) incomplete_deps: usize,
+
+ // List of (buffer, grid) that this transform depends on.
+ pub(in super::super) deps: Vec<(usize, usize)>,
+
+ // Processing layer that this transform belongs to. Layer 0 are transforms
+ // that only depend on coded channels, layer 1 are transforms that only
+ // depend on coded channels and layer 0 outputs, etc. Since transforms
+ // in the same layer have no inter-dependencies, they can be run at the
+ // same time.
+ pub(in super::super) layer: usize,
}
impl TransformStepChunk {
- // Marks that one dependency of this transform is ready, and potentially runs the transform,
- // returning the new buffers that are now ready.
- #[instrument(level = "trace", skip_all)]
- pub fn dep_ready(
- &mut self,
- frame_header: &FrameHeader,
- buffers: &mut [ModularBufferInfo],
- ) -> Result<Vec<(usize, usize)>> {
- self.incomplete_deps = self.incomplete_deps.checked_sub(1).unwrap();
- if self.incomplete_deps > 0 {
- trace!(
- "skipping transform chunk because incomplete_deps = {}",
- self.incomplete_deps
- );
- return Ok(vec![]);
- }
- let buf_out: &[usize] = match &self.step {
+ fn buf_out(&self) -> &[usize] {
+ match &self.step {
TransformStep::Rct { buf_out, .. } => buf_out,
TransformStep::Palette { buf_out, .. } => buf_out,
TransformStep::HSqueeze { buf_out, .. } | TransformStep::VSqueeze { buf_out, .. } => {
- &[*buf_out]
+ std::slice::from_ref(buf_out)
}
- };
+ }
+ }
+ // Runs this transform. This function *will* crash if the transform is not ready.
+ #[instrument(level = "trace", skip_all)]
+ pub fn do_run(
+ &self,
+ frame_header: &FrameHeader,
+ buffers: &[ModularBufferInfo],
+ is_final: bool,
+ ) -> Result<()> {
+ let buf_out = self.buf_out();
let out_grid_kind = buffers[buf_out[0]].grid_kind;
let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos);
let out_size = buffers[buf_out[0]].info.size;
@@ -112,13 +116,12 @@ impl TransformStepChunk {
// If not, creates buffers in the output that are a copy of the input buffers.
// This should be rare.
*buffers[buf_out[i]].buffer_grid[out_grid].data.borrow_mut() =
- Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer()?);
+ Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer(is_final)?);
}
- with_buffers(buffers, buf_out, out_grid, false, |mut bufs| {
+ with_buffers(buffers, buf_out, out_grid, |mut bufs| {
super::rct::do_rct_step(&mut bufs, *op, *perm);
Ok(())
})?;
- Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
}
TransformStep::Palette {
buf_in,
@@ -127,10 +130,9 @@ impl TransformStepChunk {
..
} if buffers[*buf_in].info.size.0 == 0 => {
// Nothing to do, just bookkeeping.
- buffers[*buf_in].buffer_grid[out_grid].mark_used();
- buffers[*buf_pal].buffer_grid[0].mark_used();
- with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?;
- Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
+ buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final);
+ buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
+ with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?;
}
TransformStep::Palette {
buf_in,
@@ -155,7 +157,7 @@ impl TransformStepChunk {
});
// Ensure that the output buffers are present.
// TODO(szabadka): Extend the callback to support many grid points.
- with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?;
+ with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?;
let grid_shape = buffers[buf_out[0]].grid_shape;
let grid_x = out_grid % grid_shape.0;
let grid_y = out_grid / grid_shape.0;
@@ -191,9 +193,8 @@ impl TransformStepChunk {
*predictor,
);
}
- buffers[*buf_in].buffer_grid[out_grid].mark_used();
- buffers[*buf_pal].buffer_grid[0].mark_used();
- Ok(buf_out.iter().map(|x| (*x, out_grid)).collect())
+ buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final);
+ buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
}
TransformStep::Palette {
buf_in,
@@ -206,7 +207,6 @@ impl TransformStepChunk {
} => {
assert_eq!(out_grid_kind, buffers[*buf_in].grid_kind);
assert_eq!(out_size, buffers[*buf_in].info.size);
- let mut generated_chunks = Vec::<(usize, usize)>::new();
let grid_shape = buffers[buf_out[0]].grid_shape;
{
assert_eq!(out_grid % grid_shape.0, 0);
@@ -222,7 +222,7 @@ impl TransformStepChunk {
));
// Ensure that the output buffers are present.
// TODO(szabadka): Extend the callback to support many grid points.
- with_buffers(buffers, buf_out, out_grid + grid_x, false, |_| Ok(()))?;
+ with_buffers(buffers, buf_out, out_grid + grid_x, |_| Ok(()))?;
}
let in_buf_refs: Vec<&ModularChannel> =
in_bufs.iter().map(|x| x.deref()).collect();
@@ -256,14 +256,10 @@ impl TransformStepChunk {
wp_header,
)?;
}
- buffers[*buf_pal].buffer_grid[0].mark_used();
+ buffers[*buf_pal].buffer_grid[0].mark_used(is_final);
for grid_x in 0..grid_shape.0 {
- buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used();
- for buf in buf_out {
- generated_chunks.push((*buf, out_grid + grid_x));
- }
+ buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used(is_final);
}
- Ok(generated_chunks)
}
TransformStep::HSqueeze { buf_in, buf_out } => {
let buf_avg = &buffers[buf_in[0]];
@@ -309,7 +305,7 @@ impl TransformStepChunk {
))
};
- with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| {
+ with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| {
super::squeeze::do_hsqueeze_step(
&in_avg.data.get_rect(buf_avg.get_grid_rect(
frame_header,
@@ -328,9 +324,8 @@ impl TransformStepChunk {
Ok(())
})?;
}
- buffers[buf_in[0]].buffer_grid[in_grid].mark_used();
- buffers[buf_in[1]].buffer_grid[res_grid].mark_used();
- Ok(vec![(*buf_out, out_grid)])
+ buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final);
+ buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final);
}
TransformStep::VSqueeze { buf_in, buf_out } => {
let buf_avg = &buffers[buf_in[0]];
@@ -379,7 +374,7 @@ impl TransformStepChunk {
buf_avg.get_grid_rect(frame_header, out_grid_kind, (gx, gy));
let res_grid_rect =
buf_res.get_grid_rect(frame_header, out_grid_kind, (gx, gy));
- with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| {
+ with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| {
super::squeeze::do_vsqueeze_step(
&in_avg.data.get_rect(avg_grid_rect),
&in_res.data.get_rect(res_grid_rect),
@@ -390,11 +385,34 @@ impl TransformStepChunk {
Ok(())
})?;
}
- buffers[buf_in[0]].buffer_grid[in_grid].mark_used();
- buffers[buf_in[1]].buffer_grid[res_grid].mark_used();
- Ok(vec![(*buf_out, out_grid)])
+ buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final);
+ buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final);
}
- }
+ };
+
+ Ok(())
+ }
+
+ // Iterates over the list of outputs for this transform.
+ pub fn outputs(&self, buffers: &[ModularBufferInfo]) -> impl Iterator<Item = (usize, usize)> {
+ let buf_out = self.buf_out();
+ let out_grid_kind = buffers[buf_out[0]].grid_kind;
+ let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos);
+ let grid_offset_up = match &self.step {
+ TransformStep::Palette {
+ buf_in,
+ buf_out,
+ predictor,
+ ..
+ } if buffers[*buf_in].info.size.0 != 0 && predictor.requires_full_row() => {
+ buffers[buf_out[0]].grid_shape.0
+ }
+ _ => 1,
+ };
+
+ buf_out
+ .iter()
+ .flat_map(move |x| (0..grid_offset_up).map(move |y| (*x, out_grid + y)))
}
}
@@ -445,7 +463,7 @@ fn meta_apply_single_transform(
for i in 0..3 {
let c = &mut channels[begin_channel + i];
let mut info = c.1;
- info.output_channel_idx = -1;
+ info.output_channel_idx = None;
c.0 = add_transform_buffer(
info,
format!(
@@ -503,7 +521,7 @@ fn meta_apply_single_transform(
((w, h.div_ceil(2)), (w, h - h.div_ceil(2)))
};
let new_0 = ChannelInfo {
- output_channel_idx: -1,
+ output_channel_idx: None,
shift: new_shift,
size: new_size_0,
bit_depth: chan.bit_depth,
@@ -513,7 +531,7 @@ fn meta_apply_single_transform(
format!("Squeezed channel, original channel {}", begin_channel + ic),
);
let new_1 = ChannelInfo {
- output_channel_idx: -1,
+ output_channel_idx: None,
shift: new_shift,
size: new_size_1,
bit_depth: chan.bit_depth,
@@ -551,7 +569,7 @@ fn meta_apply_single_transform(
// equal in the line above.
let bit_depth = channels[begin_channel].1.bit_depth;
let pchan_info = ChannelInfo {
- output_channel_idx: -1,
+ output_channel_idx: None,
shift: None,
size: (num_colors + num_deltas, num_channels),
bit_depth,
@@ -564,7 +582,7 @@ fn meta_apply_single_transform(
),
);
let mut inchan_info = channels[begin_channel].1;
- inchan_info.output_channel_idx = -1;
+ inchan_info.output_channel_idx = None;
let inchan = add_transform_buffer(
inchan_info,
format!(
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
index f1de5be25ec1a..6be51c221ed5c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs
@@ -3,10 +3,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
use apply::TransformStep;
pub use apply::TransformStepChunk;
use num_derive::FromPrimitive;
+use crate::frame::modular::BUFFER_STATUS_NOT_RENDERED;
use crate::frame::modular::ModularBuffer;
use crate::headers::frame_header::FrameHeader;
use crate::util::AtomicRefCell;
@@ -112,15 +116,17 @@ pub fn make_grids(
// Create grids.
for g in buffer_info.iter_mut() {
- let is_output = g.info.output_channel_idx >= 0;
+ let is_output = g.info.output_channel_idx.is_some();
g.buffer_grid = get_grid_indices(g.grid_shape)
.map(|(x, y)| ModularBuffer {
data: AtomicRefCell::new(None),
- remaining_uses: if is_output { 1 } else { 0 },
- used_by_transforms: vec![],
+ remaining_uses: AtomicUsize::new(if is_output { 1 } else { 0 }),
+ used_by_transforms_weak: vec![],
+ used_by_transforms_strong: vec![],
size: g
.get_grid_rect(frame_header, g.grid_kind, (x as usize, y as usize))
.size,
+ status: AtomicUsize::new(BUFFER_STATUS_NOT_RENDERED),
})
.collect();
}
@@ -135,7 +141,8 @@ pub fn make_grids(
grid_transform_steps.push(TransformStepChunk {
step: transform.clone(),
grid_pos: (grid_pos.0 as usize, grid_pos.1 as usize),
- incomplete_deps: 0,
+ deps: vec![],
+ layer: 0,
});
ts
};
@@ -145,6 +152,7 @@ pub fn make_grids(
output_grid_kind: ModularGridKind,
output_grid_shape: (usize, usize),
output_grid_pos: (isize, isize),
+ is_weak: bool,
grid_transform_steps: &mut Vec<TransformStepChunk>,
buffer_info: &mut Vec<ModularBufferInfo>| {
let output_grid_size = (output_grid_shape.0 as isize, output_grid_shape.1 as isize);
@@ -159,15 +167,19 @@ pub fn make_grids(
let output_grid_pos = (output_grid_pos.0 as usize, output_grid_pos.1 as usize);
let input_grid_pos =
buffer_info[input_buffer_idx].get_grid_idx(output_grid_kind, output_grid_pos);
- if !buffer_info[input_buffer_idx].buffer_grid[input_grid_pos]
- .used_by_transforms
- .contains(&ts)
+ let grid = &mut buffer_info[input_buffer_idx].buffer_grid[input_grid_pos];
+ if !grid.used_by_transforms_weak.contains(&ts)
+ && !grid.used_by_transforms_strong.contains(&ts)
{
- buffer_info[input_buffer_idx].buffer_grid[input_grid_pos].remaining_uses += 1;
- buffer_info[input_buffer_idx].buffer_grid[input_grid_pos]
- .used_by_transforms
- .push(ts);
- grid_transform_steps[ts].incomplete_deps += 1;
+ grid.remaining_uses.fetch_add(1, Ordering::Relaxed);
+ grid_transform_steps[ts]
+ .deps
+ .push((input_buffer_idx, input_grid_pos));
+ if is_weak {
+ grid.used_by_transforms_weak.push(ts);
+ } else {
+ grid.used_by_transforms_strong.push(ts);
+ }
}
};
@@ -191,6 +203,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -219,6 +232,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -229,6 +243,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -239,6 +254,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y - 1),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -265,6 +281,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -274,6 +291,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -289,6 +307,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x + dx, y + dy),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -309,6 +328,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -320,6 +340,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x + 1, y),
+ true,
&mut grid_transform_steps,
buffer_info,
);
@@ -330,6 +351,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x - 1, y),
+ true,
&mut grid_transform_steps,
buffer_info,
);
@@ -348,6 +370,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y),
+ false,
&mut grid_transform_steps,
buffer_info,
);
@@ -359,6 +382,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y + 1),
+ true,
&mut grid_transform_steps,
buffer_info,
);
@@ -369,6 +393,7 @@ pub fn make_grids(
out_kind,
out_shape,
(x, y - 1),
+ true,
&mut grid_transform_steps,
buffer_info,
);
@@ -377,6 +402,46 @@ pub fn make_grids(
}
}
+ // Compute the layer of each transform step.
+ // TODO(veluca): for parallelization purposes, it might make sense to try to ensure that
+ // transforms in the same layer are as similar in runtime as possible.
+ let mut transforms_needed_by = vec![vec![]; grid_transform_steps.len()];
+ let mut enabled_transforms = vec![vec![]; grid_transform_steps.len()];
+ for (i, s) in grid_transform_steps.iter().enumerate() {
+ for (b, g) in s.outputs(buffer_info) {
+ for (t, _) in buffer_info[b].buffer_grid[g].users(true) {
+ transforms_needed_by[t].push(i);
+ enabled_transforms[i].push(t);
+ }
+ }
+ }
+
+ let mut missing_prerequisites: Vec<_> = transforms_needed_by.iter().map(|x| x.len()).collect();
+
+ let mut stack = vec![];
+ for (i, m) in missing_prerequisites.iter().enumerate() {
+ if *m == 0 {
+ stack.push(i);
+ }
+ }
+
+ while let Some(i) = stack.pop() {
+ assert_eq!(missing_prerequisites[i], 0);
+ for e in enabled_transforms[i].iter() {
+ missing_prerequisites[*e] = missing_prerequisites[*e].checked_sub(1).unwrap();
+ if missing_prerequisites[*e] == 0 {
+ stack.push(*e);
+ }
+ }
+
+ grid_transform_steps[i].layer = transforms_needed_by[i]
+ .iter()
+ .map(|x| grid_transform_steps[*x].layer)
+ .max()
+ .unwrap_or(0)
+ + 1;
+ }
+
trace!(?grid_transform_steps, ?buffer_info);
grid_transform_steps
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
index 59730862b7061..b5f0022a95270 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs
@@ -63,6 +63,124 @@ pub struct Tree {
pub histograms: Histograms,
}
+fn validate_tree(tree: &[TreeNode], num_properties: usize) -> Result<()> {
+ const HEIGHT_LIMIT: usize = 2048;
+
+ if tree.is_empty() {
+ return Ok(());
+ }
+
+ // This mirrors libjxl's ValidateTree(), but avoids allocating
+ // `num_properties * tree.len()` entries.
+ //
+ // We do an explicit DFS and keep the property ranges only for the current root->node path.
+ // When descending into a child we update exactly one property's range (the one we split on)
+ // and store the previous range in the child frame; when returning from that child we restore
+ // it. This makes memory O(num_properties + height) instead of O(num_properties * tree_size).
+
+ #[derive(Clone, Copy, Debug)]
+ enum Stage {
+ Enter,
+ AfterLeft,
+ AfterRight,
+ }
+
+ struct Frame {
+ node: usize,
+ depth: usize,
+ stage: Stage,
+ restore: Option<(usize, (i32, i32))>,
+ }
+
+ let mut property_ranges: Vec<(i32, i32)> = vec![(i32::MIN, i32::MAX); num_properties];
+ let mut stack = vec![Frame {
+ node: 0,
+ depth: 0,
+ stage: Stage::Enter,
+ restore: None,
+ }];
+
+ while let Some(mut frame) = stack.pop() {
+ if frame.depth > HEIGHT_LIMIT {
+ return Err(Error::TreeTooTall(frame.depth, HEIGHT_LIMIT));
+ }
+
+ match (frame.stage, tree[frame.node]) {
+ (Stage::Enter, TreeNode::Leaf { .. }) => {
+ if let Some((p, old)) = frame.restore {
+ property_ranges[p] = old;
+ }
+ }
+ (
+ Stage::Enter,
+ TreeNode::Split {
+ property,
+ val,
+ left,
+ right: _,
+ },
+ ) => {
+ let p = property as usize;
+ let (l, u) = property_ranges[p];
+ if l > val || u <= val {
+ return Err(Error::TreeSplitOnEmptyRange(property, val, l, u));
+ }
+
+ frame.stage = Stage::AfterLeft;
+ let depth = frame.depth;
+ stack.push(frame);
+
+ // Descend into left child: range becomes (val+1, u).
+ let old = property_ranges[p];
+ property_ranges[p] = (val + 1, u);
+ stack.push(Frame {
+ node: left as usize,
+ depth: depth + 1,
+ stage: Stage::Enter,
+ restore: Some((p, old)),
+ });
+ }
+ (
+ Stage::AfterLeft,
+ TreeNode::Split {
+ property,
+ val,
+ left: _,
+ right,
+ },
+ ) => {
+ let p = property as usize;
+ let (l, u) = property_ranges[p];
+ if l > val || u <= val {
+ return Err(Error::TreeSplitOnEmptyRange(property, val, l, u));
+ }
+
+ frame.stage = Stage::AfterRight;
+ let depth = frame.depth;
+ stack.push(frame);
+
+ // Descend into right child: range becomes (l, val).
+ let old = property_ranges[p];
+ property_ranges[p] = (l, val);
+ stack.push(Frame {
+ node: right as usize,
+ depth: depth + 1,
+ stage: Stage::Enter,
+ restore: Some((p, old)),
+ });
+ }
+ (Stage::AfterRight, TreeNode::Split { .. }) => {
+ if let Some((p, old)) = frame.restore {
+ property_ranges[p] = old;
+ }
+ }
+ _ => unreachable!("invalid tree validation state"),
+ }
+ }
+
+ Ok(())
+}
+
impl Debug for Tree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Tree[{:?}]", self.nodes)
@@ -358,52 +476,7 @@ impl Tree {
tree_reader.check_final_state(&tree_histograms, br)?;
let num_properties = max_property as usize + 1;
- let mut property_ranges = Vec::new_with_capacity(num_properties * tree.len())?;
- property_ranges.resize(num_properties * tree.len(), (i32::MIN, i32::MAX));
- let mut height = Vec::new_with_capacity(tree.len())?;
- height.resize(tree.len(), 0);
- for i in 0..tree.len() {
- const HEIGHT_LIMIT: usize = 2048;
- if height[i] > HEIGHT_LIMIT {
- return Err(Error::TreeTooLarge(height[i], HEIGHT_LIMIT));
- }
- if let TreeNode::Split {
- property,
- val,
- left,
- right,
- } = tree[i]
- {
- height[left as usize] = height[i] + 1;
- height[right as usize] = height[i] + 1;
- for p in 0..num_properties {
- if p == property as usize {
- let (l, u) = property_ranges[i * num_properties + p];
- if l > val || u <= val {
- return Err(Error::TreeSplitOnEmptyRange(p as u8, val, l, u));
- }
- trace!(
- "splitting at node {i} on property {p}, range [{l}, {u}] at position {val}"
- );
- property_ranges[left as usize * num_properties + p] = (val + 1, u);
- property_ranges[right as usize * num_properties + p] = (l, val);
- } else {
- property_ranges[left as usize * num_properties + p] =
- property_ranges[i * num_properties + p];
- property_ranges[right as usize * num_properties + p] =
- property_ranges[i * num_properties + p];
- }
- }
- } else {
- #[cfg(feature = "tracing")]
- {
- for p in 0..num_properties {
- let (l, u) = property_ranges[i * num_properties + p];
- trace!("final range at node {i} property {p}: [{l}, {u}]");
- }
- }
- }
- }
+ validate_tree(&tree, num_properties)?;
let histograms = Histograms::decode(tree.len().div_ceil(2), br, true)?;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
index 7eb13c4ce2495..0bcbda39a020f 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs
@@ -254,6 +254,7 @@ impl QuantEncoding {
None,
&lf_global.tree,
br,
+ None,
)?;
let mut qtable = Vec::with_capacity(required_size_x * required_size_y * 3);
for channel in image.iter_mut() {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
index 57413e811b57a..e1797e3c4fc7e 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs
@@ -13,12 +13,23 @@ use crate::{
pub const NUM_QUANT_TABLES: usize = 17;
pub const GLOBAL_SCALE_DENOM: usize = 1 << 16;
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct LfQuantFactors {
pub quant_factors: [f32; 3],
pub inv_quant_factors: [f32; 3],
}
+impl Default for LfQuantFactors {
+ fn default() -> Self {
+ let quant_factors = quant_weights::LF_QUANT;
+ let inv_quant_factors = quant_factors.map(f32::recip);
+ Self {
+ quant_factors,
+ inv_quant_factors,
+ }
+ }
+}
+
impl LfQuantFactors {
pub fn new(br: &mut BitReader) -> Result<LfQuantFactors> {
let mut quant_factors = [0.0f32; 3];
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
index c8803bf28e113..f419c9858c55d 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs
@@ -12,16 +12,27 @@ use crate::api::JxlOutputBuffer;
use crate::bit_reader::BitReader;
use crate::error::{Error, Result};
use crate::features::epf::SigmaSource;
+use crate::features::noise::Noise;
+use crate::features::patches::PatchesDictionary;
+use crate::features::spline::Splines;
+use crate::frame::RenderUnit;
+use crate::frame::color_correlation_map::ColorCorrelationParams;
+use crate::frame::quantizer::LfQuantFactors;
use crate::headers::frame_header::Encoding;
+use crate::headers::frame_header::FrameType;
use crate::headers::{Orientation, color_encoding::ColorSpace, extra_channels::ExtraChannel};
+use crate::image::Image;
use crate::image::Rect;
+use crate::util::AtomicRefCell;
+use std::sync::Arc;
+
#[cfg(test)]
use crate::render::SimpleRenderPipeline;
use crate::render::buffer_splitter::BufferSplitter;
use crate::render::{LowMemoryRenderPipeline, RenderPipeline, RenderPipelineBuilder, stages::*};
use crate::{
api::JxlPixelFormat,
- frame::{DecoderState, Frame, LfGlobalState},
+ frame::{DecoderState, Frame},
headers::frame_header::FrameHeader,
};
@@ -66,7 +77,7 @@ impl Frame {
mut pipeline: RenderPipelineBuilder<P>,
channels: &[usize],
data_format: JxlDataFormat,
- ) -> Result<RenderPipelineBuilder<P>> {
+ ) -> RenderPipelineBuilder<P> {
use crate::render::stages::{
ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage,
};
@@ -75,24 +86,24 @@ impl Frame {
JxlDataFormat::U8 { bit_depth } => {
for &channel in channels {
pipeline =
- pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth))?;
+ pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth));
}
}
JxlDataFormat::U16 { bit_depth, .. } => {
for &channel in channels {
pipeline =
- pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth))?;
+ pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth));
}
}
JxlDataFormat::F16 { .. } => {
for &channel in channels {
- pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel))?;
+ pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel));
}
}
// F32 doesn't need conversion - the pipeline already uses f32
JxlDataFormat::F32 { .. } => {}
}
- Ok(pipeline)
+ pipeline
}
/// Check if CMS will consume a black channel that the user requested in the output.
@@ -126,8 +137,10 @@ impl Frame {
api_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>,
pixel_format: &JxlPixelFormat,
groups: Vec<(usize, Vec<(usize, BitReader)>)>,
+ do_flush: bool,
+ output_profile: &JxlColorProfile,
) -> Result<()> {
- if self.render_pipeline.is_none() {
+ if self.render_pipeline.is_none() || self.lf_global.is_none() {
assert_eq!(groups.iter().map(|x| x.1.len()).sum::<usize>(), 0);
// We don't yet have any output ready (as the pipeline would be initialized otherwise),
// so exit without doing anything.
@@ -194,41 +207,128 @@ impl Frame {
pipeline!(self, p, p.render_outside_frame(&mut buffer_splitter)?);
- // Render data from the lf global section, if we didn't do so already, before rendering HF.
- if !self.lf_global_was_rendered {
- self.lf_global_was_rendered = true;
- let lf_global = self.lf_global.as_mut().unwrap();
- let mut pass_to_pipeline = |chan, group, num_passes, image| {
+ let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+
+ modular_global.set_pipeline_used_channels(pipeline!(self, p, p.used_channel_mask()));
+
+ // STEP 1: if we are requesting a flush, and did not flush before, mark modular channels
+ // as having been decoded as 0.
+ if !self.was_flushed_once && do_flush {
+ self.was_flushed_once = true;
+ self.groups_to_flush.extend(0..self.header.num_groups());
+ modular_global.zero_fill_empty_channels(
+ self.header.passes.num_passes as usize,
+ self.header.num_groups(),
+ self.header.num_lf_groups(),
+ )?;
+ }
+
+ // STEP 2: ensure that groups that will be re-rendered are marked as such.
+ // VarDCT data to be rendered.
+ for (g, _) in groups.iter() {
+ self.groups_to_flush.insert(*g);
+ pipeline!(self, p, p.mark_group_to_rerender(*g));
+ }
+ // Modular data to be re-rendered.
+ {
+ let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+ for (group, passes) in groups.iter() {
+ for (pass, _) in passes.iter() {
+ modular_global.mark_group_to_be_read(2 + *pass, *group);
+ }
+ }
+ let mut pass_to_pipeline = |_, group, _, _| {
+ self.groups_to_flush.insert(group);
+ pipeline!(self, p, p.mark_group_to_rerender(group));
+ Ok(())
+ };
+ modular_global.process_output(&self.header, true, &mut pass_to_pipeline)?;
+ }
+
+ // STEP 3: decode the groups, eagerly rendering VarDCT channels and noise.
+ for (group, mut passes) in groups {
+ if self.decode_hf_group(group, &mut passes, &mut buffer_splitter, do_flush)? {
+ self.changed_since_last_flush
+ .insert((group, RenderUnit::VarDCT));
+ }
+ }
+
+ // STEP 4: process all modular transforms that can now be processed,
+ // flushing buffers that will not be used again, if either we are forcing a render now
+ // or we are done with the file.
+ if self.incomplete_groups == 0 || do_flush {
+ let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+ let mut pass_to_pipeline = |chan, group, complete, image: Option<Image<i32>>| {
+ self.changed_since_last_flush
+ .insert((group, RenderUnit::Modular(chan)));
pipeline!(
self,
p,
- p.set_buffer_for_group(chan, group, num_passes, image, &mut buffer_splitter)?
+ p.set_buffer_for_group(
+ chan,
+ group,
+ complete,
+ image.unwrap(),
+ &mut buffer_splitter
+ )?
);
Ok(())
};
- lf_global
- .modular_global
- .process_output(0, 0, &self.header, &mut pass_to_pipeline)?;
- for group in 0..self.header.num_lf_groups() {
- lf_global.modular_global.process_output(
- 1,
- group,
- &self.header,
- &mut pass_to_pipeline,
- )?;
+ modular_global.process_output(&self.header, false, &mut pass_to_pipeline)?;
+
+ // STEP 5: re-render VarDCT/noise data in rendered groups for which it was
+ // not rendered, or re-send to pipeline modular channels that were not
+ // updated in those groups.
+ for g in std::mem::take(&mut self.groups_to_flush) {
+ if self
+ .changed_since_last_flush
+ .take(&(g, RenderUnit::VarDCT))
+ .is_none()
+ {
+ self.decode_hf_group(g, &mut [], &mut buffer_splitter, true)?;
+ }
+ let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global;
+ let mut pass_to_pipeline = |chan, group, complete, image| {
+ pipeline!(
+ self,
+ p,
+ p.set_buffer_for_group(chan, group, complete, image, &mut buffer_splitter)?
+ );
+ Ok(())
+ };
+ for c in modular_global.channel_range() {
+ if self
+ .changed_since_last_flush
+ .take(&(g, RenderUnit::Modular(c)))
+ .is_none()
+ {
+ modular_global.flush_output(g, c, &mut pass_to_pipeline)?;
+ }
+ }
}
}
- for (group, passes) in groups {
- // TODO(veluca): render all the available passes at once.
- for (pass, br) in passes {
- self.decode_hf_group(group, pass, br, &mut buffer_splitter)?;
- }
- }
+ let regions = buffer_splitter.into_changed_regions();
self.reference_frame_data = reference_frame_data;
self.lf_frame_data = lf_frame_data;
+ if self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1 {
+ if do_flush && let Some(buffers) = api_buffers {
+ self.maybe_preview_lf_frame(
+ pixel_format,
+ buffers,
+ Some(&regions[..]),
+ output_profile,
+ )?;
+ } else if self.incomplete_groups == 0 {
+ // If we are not requesting another flush at the end of the LF frame, we
+ // probably have a partial render. Ensure we re-render the LF frame when
+ // decoding the actual frame.
+ self.decoder_state.lf_frame_was_rendered = false;
+ }
+ }
+
Ok(())
}
@@ -236,8 +336,12 @@ impl Frame {
pub(crate) fn build_render_pipeline<T: RenderPipeline>(
decoder_state: &DecoderState,
frame_header: &FrameHeader,
- lf_global: &LfGlobalState,
- epf_sigma: &Option<SigmaSource>,
+ patches: Arc<AtomicRefCell<PatchesDictionary>>,
+ splines: Arc<AtomicRefCell<Splines>>,
+ noise: Arc<AtomicRefCell<Noise>>,
+ lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+ color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+ epf_sigma: Arc<AtomicRefCell<SigmaSource>>,
pixel_format: &JxlPixelFormat,
cms: Option<&dyn JxlCms>,
input_profile: &JxlColorProfile,
@@ -251,31 +355,29 @@ impl Frame {
frame_header.size_upsampled(),
frame_header.upsampling.ilog2() as usize,
frame_header.log_group_dim(),
- frame_header.passes.num_passes as usize,
);
if frame_header.encoding == Encoding::Modular {
if decoder_state.file_header.image_metadata.xyb_encoded {
- pipeline = pipeline
- .add_inout_stage(ConvertModularXYBToF32Stage::new(0, &lf_global.lf_quant))?
+ pipeline = pipeline.add_inout_stage(ConvertModularXYBToF32Stage::new(0, lf_quant))
} else {
for i in 0..3 {
pipeline = pipeline
- .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth))?;
+ .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth));
}
}
}
for i in 3..num_channels {
let ec_bit_depth = metadata.extra_channel_info[i - 3].bit_depth();
- pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth))?;
+ pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth));
}
for c in 0..3 {
if frame_header.hshift(c) != 0 {
- pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c))?;
+ pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c));
}
if frame_header.vshift(c) != 0 {
- pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c))?;
+ pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c));
}
}
@@ -286,17 +388,17 @@ impl Frame {
0,
filters.gab_x_weight1,
filters.gab_x_weight2,
- ))?
+ ))
.add_inout_stage(GaborishStage::new(
1,
filters.gab_y_weight1,
filters.gab_y_weight2,
- ))?
+ ))
.add_inout_stage(GaborishStage::new(
2,
filters.gab_b_weight1,
filters.gab_b_weight2,
- ))?;
+ ));
}
let rf = &frame_header.restoration_filter;
@@ -305,24 +407,24 @@ impl Frame {
rf.epf_pass0_sigma_scale,
rf.epf_border_sad_mul,
rf.epf_channel_scale,
- epf_sigma.clone().unwrap(),
- ))?
+ epf_sigma.clone(),
+ ))
}
if rf.epf_iters >= 1 {
pipeline = pipeline.add_inout_stage(Epf1Stage::new(
1.0,
rf.epf_border_sad_mul,
rf.epf_channel_scale,
- epf_sigma.clone().unwrap(),
- ))?
+ epf_sigma.clone(),
+ ))
}
if rf.epf_iters >= 2 {
pipeline = pipeline.add_inout_stage(Epf2Stage::new(
rf.epf_pass2_sigma_scale,
rf.epf_border_sad_mul,
rf.epf_channel_scale,
- epf_sigma.clone().unwrap(),
- ))?
+ epf_sigma.clone(),
+ ))
}
let late_ec_upsample = frame_header.upsampling > 1
@@ -340,26 +442,26 @@ impl Frame {
4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, 3 + ec)),
8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, 3 + ec)),
_ => unreachable!(),
- }?;
+ };
}
}
}
if frame_header.has_patches() {
- pipeline = pipeline.add_inplace_stage(PatchesStage {
- patches: lf_global.patches.clone().unwrap(),
- extra_channels: metadata.extra_channel_info.clone(),
- decoder_state: decoder_state.reference_frames.clone(),
- })?
+ pipeline = pipeline.add_inplace_stage(PatchesStage::new(
+ patches,
+ metadata.extra_channel_info.clone(),
+ decoder_state.reference_frames.clone(),
+ ))
}
if frame_header.has_splines() {
pipeline = pipeline.add_inplace_stage(SplinesStage::new(
- lf_global.splines.clone().unwrap(),
+ splines,
frame_header.size(),
- &lf_global.color_correlation_params.unwrap_or_default(),
+ color_correlation_params.clone(),
decoder_state.high_precision,
- )?)?
+ ))
}
if frame_header.upsampling > 1 {
@@ -375,20 +477,20 @@ impl Frame {
4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, c)),
8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, c)),
_ => unreachable!(),
- }?;
+ };
}
}
if frame_header.has_noise() {
pipeline = pipeline
- .add_inout_stage(ConvolveNoiseStage::new(num_channels))?
- .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1))?
- .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2))?
+ .add_inout_stage(ConvolveNoiseStage::new(num_channels))
+ .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1))
+ .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2))
.add_inplace_stage(AddNoiseStage::new(
- *lf_global.noise.as_ref().unwrap(),
- lf_global.color_correlation_params.unwrap_or_default(),
+ noise,
+ color_correlation_params,
num_channels,
- ))?;
+ ));
}
// Calculate the actual number of API-provided buffers based on pixel_format.
@@ -414,7 +516,7 @@ impl Frame {
JxlColorType::Grayscale,
JxlDataFormat::f32(),
false,
- )?;
+ );
}
}
if frame_header.can_be_referenced && frame_header.save_before_ct {
@@ -426,7 +528,7 @@ impl Frame {
JxlColorType::Grayscale,
JxlDataFormat::f32(),
false,
- )?;
+ );
}
}
@@ -461,9 +563,9 @@ impl Frame {
let xyb_encoded = decoder_state.file_header.image_metadata.xyb_encoded;
if frame_header.do_ycbcr {
- pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0))?;
+ pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0));
} else if xyb_encoded {
- pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone()))?;
+ pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone()));
}
// Insert CMS stage if profiles differ.
@@ -547,7 +649,7 @@ impl Frame {
out_channels,
cms_black_channel,
max_pixels,
- ))?;
+ ));
cms_used = true;
}
}
@@ -556,7 +658,7 @@ impl Frame {
// - Only if output is non-linear AND
// - CMS was not used (CMS already handles the full conversion including TF)
if xyb_encoded && !output_tf.is_linear() && !cms_used {
- pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone()))?;
+ pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone()));
}
if frame_header.needs_blending() {
@@ -564,14 +666,14 @@ impl Frame {
frame_header,
&decoder_state.file_header,
decoder_state.reference_frames.clone(),
- )?)?;
+ )?);
// TODO(veluca): we might not need to add an extend stage if the image size is
// compatible with the frame size.
pipeline = pipeline.add_extend_stage(ExtendToImageDimensionsStage::new(
frame_header,
&decoder_state.file_header,
decoder_state.reference_frames.clone(),
- )?)?;
+ )?);
}
if frame_header.can_be_referenced && !frame_header.save_before_ct {
@@ -583,7 +685,7 @@ impl Frame {
JxlColorType::Grayscale,
JxlDataFormat::f32(),
false,
- )?;
+ );
}
}
@@ -597,7 +699,7 @@ impl Frame {
{
if info.ec_type == ExtraChannel::SpotColor {
pipeline = pipeline
- .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap()))?;
+ .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap()));
}
}
}
@@ -659,10 +761,10 @@ impl Frame {
0,
num_color_channels,
alpha_channel,
- ))?;
+ ));
}
// Add conversion stages for non-float output formats
- pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df)?;
+ pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df);
pipeline = pipeline.add_save_stage(
color_source_channels,
metadata.orientation,
@@ -670,20 +772,26 @@ impl Frame {
pixel_format.color_type,
*df,
fill_opaque_alpha,
- )?;
+ );
}
+ let mut save_idx = if pixel_format.color_data_format.is_some() {
+ 1
+ } else {
+ 0
+ };
for i in 0..frame_header.num_extra_channels as usize {
if let Some(df) = &pixel_format.extra_channel_format[i] {
// Add conversion stages for non-float output formats
- pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df)?;
+ pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df);
pipeline = pipeline.add_save_stage(
&[3 + i],
metadata.orientation,
- 1 + i,
+ save_idx,
JxlColorType::Grayscale,
*df,
false,
- )?;
+ );
+ save_idx += 1;
}
}
}
@@ -697,20 +805,17 @@ impl Frame {
input_profile: &JxlColorProfile,
output_profile: &JxlColorProfile,
) -> Result<()> {
- let lf_global = self.lf_global.as_mut().unwrap();
- let epf_sigma = if self.header.restoration_filter.epf_iters > 0 {
- Some(SigmaSource::new(&self.header, lf_global, &self.hf_meta)?)
- } else {
- None
- };
-
#[cfg(test)]
let render_pipeline = if self.use_simple_pipeline {
Self::build_render_pipeline::<SimpleRenderPipeline>(
&self.decoder_state,
&self.header,
- lf_global,
- &epf_sigma,
+ self.patches.clone(),
+ self.splines.clone(),
+ self.noise.clone(),
+ self.lf_quant.clone(),
+ self.color_correlation_params.clone(),
+ self.epf_sigma.clone(),
pixel_format,
cms,
input_profile,
@@ -720,8 +825,12 @@ impl Frame {
Self::build_render_pipeline::<LowMemoryRenderPipeline>(
&self.decoder_state,
&self.header,
- lf_global,
- &epf_sigma,
+ self.patches.clone(),
+ self.splines.clone(),
+ self.noise.clone(),
+ self.lf_quant.clone(),
+ self.color_correlation_params.clone(),
+ self.epf_sigma.clone(),
pixel_format,
cms,
input_profile,
@@ -732,15 +841,19 @@ impl Frame {
let render_pipeline = Self::build_render_pipeline::<LowMemoryRenderPipeline>(
&self.decoder_state,
&self.header,
- lf_global,
- &epf_sigma,
+ self.patches.clone(),
+ self.splines.clone(),
+ self.noise.clone(),
+ self.lf_quant.clone(),
+ self.color_correlation_params.clone(),
+ self.epf_sigma.clone(),
pixel_format,
cms,
input_profile,
output_profile,
)?;
self.render_pipeline = Some(render_pipeline);
- self.lf_global_was_rendered = false;
+ self.was_flushed_once = false;
Ok(())
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
index d4299928e6e7d..5ec6ba1dea95a 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs
@@ -192,9 +192,9 @@ pub struct ColorEncoding {
impl ColorEncoding {
pub fn check(&self, _: &Empty) -> Result<(), Error> {
- if !self.want_icc
- && (self.color_space == ColorSpace::Unknown
- || self.tf.transfer_function == TransferFunction::Unknown)
+ if self.color_space == ColorSpace::Unknown
+ || self.tf.transfer_function == TransferFunction::Unknown
+ || self.color_space == ColorSpace::XYB
{
Err(Error::InvalidColorEncoding)
} else {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
index b7fc359ec7597..ffa52591b07e4 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs
@@ -851,4 +851,14 @@ mod test_frame_header {
},
)
}
+
+ #[test]
+ fn test_frame_name() {
+ let (_, frame_header, _) =
+ read_headers_and_toc(include_bytes!("../../resources/test/named_frame_test.jxl"))
+ .unwrap();
+ assert_eq!(frame_header.frame_type, FrameType::RegularFrame);
+ assert_eq!(frame_header.name, "TestFrameName");
+ assert_eq!(frame_header.name.len(), 13);
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
index 5ac270930c26e..286f8a4423b28 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs
@@ -97,7 +97,7 @@ pub enum TransformId {
Invalid = 3,
}
-#[derive(UnconditionalCoder, Debug, PartialEq)]
+#[derive(UnconditionalCoder, Debug, PartialEq, Clone)]
#[validate]
pub struct Transform {
#[coder(Bits(2))]
@@ -157,7 +157,7 @@ impl Transform {
}
}
-#[derive(UnconditionalCoder, Debug, PartialEq)]
+#[derive(UnconditionalCoder, Debug, PartialEq, Clone)]
pub struct GroupHeader {
pub use_global_tree: bool,
pub wp_header: WeightedHeader,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
index 8144607a21d19..37617a15fa9e8 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs
@@ -20,7 +20,8 @@ mod stream;
mod tag;
use header::read_header;
-use stream::{IccStream, read_varint_from_reader};
+use stream::IccStream;
+pub(crate) use stream::read_varint_from_reader;
use tag::{read_single_command, read_tag_list};
const ICC_CONTEXTS: usize = 41;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
index 4ca91091311d4..0de64e9422655 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs
@@ -25,7 +25,7 @@ fn read_varint(mut read_one: impl FnMut() -> Result<u8>) -> Result<u64> {
Ok(value)
}
-pub(super) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> {
+pub(crate) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> {
read_varint(|| stream.read_u8().map_err(|_| Error::IccEndOfStream))
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
index 3fbbb20562fc6..9c689e788a29a 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs
@@ -20,6 +20,10 @@ pub struct OwnedRawImage {
}
impl OwnedRawImage {
+ pub fn new(byte_size: (usize, usize)) -> Result<Self> {
+ Self::new_zeroed_with_padding(byte_size, (0, 0), (0, 0))
+ }
+
pub fn new_zeroed_with_padding(
byte_size: (usize, usize),
offset: (usize, usize),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
index d5c1b06c38119..60fab1c83c5d9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs
@@ -7,22 +7,28 @@ use crate::{api::JxlOutputBuffer, headers::Orientation, image::Rect, util::Shift
// Information for splitting the output buffers.
#[derive(Debug)]
-pub(super) struct SaveStageBufferInfo {
- pub(super) downsample: (u8, u8),
- pub(super) orientation: Orientation,
- pub(super) byte_size: usize,
- pub(super) after_extend: bool,
+pub struct SaveStageBufferInfo {
+ pub downsample: (u8, u8),
+ pub orientation: Orientation,
+ pub byte_size: usize,
+ pub after_extend: bool,
}
/// Data structure responsible for handing out access to portions of the output buffers.
-pub struct BufferSplitter<'a, 'b>(&'a mut [Option<JxlOutputBuffer<'b>>]);
+pub struct BufferSplitter<'a, 'b> {
+ buffers: &'a mut [Option<JxlOutputBuffer<'b>>],
+ requested_rects: Vec<Rect>,
+}
impl<'a, 'b> BufferSplitter<'a, 'b> {
pub fn new(bufs: &'a mut [Option<JxlOutputBuffer<'b>>]) -> Self {
- Self(bufs)
+ Self {
+ buffers: bufs,
+ requested_rects: vec![],
+ }
}
- pub(super) fn get_local_buffers(
+ pub(crate) fn get_local_buffers(
&mut self,
save_buffer_info: &[Option<SaveStageBufferInfo>],
rect: Rect,
@@ -31,8 +37,9 @@ impl<'a, 'b> BufferSplitter<'a, 'b> {
full_image_size: (usize, usize),
frame_origin: (isize, isize),
) -> Vec<Option<JxlOutputBuffer<'_>>> {
+ self.requested_rects.push(rect);
let mut local_buffers = vec![];
- let buffers = &mut *self.0;
+ let buffers = &mut *self.buffers;
local_buffers.reserve(buffers.len());
for _ in 0..buffers.len() {
local_buffers.push(None::<JxlOutputBuffer>);
@@ -97,7 +104,11 @@ impl<'a, 'b> BufferSplitter<'a, 'b> {
local_buffers
}
+ pub fn into_changed_regions(self) -> Vec<Rect> {
+ self.requested_rects
+ }
+
pub fn get_full_buffers(&mut self) -> &mut [Option<JxlOutputBuffer<'b>>] {
- &mut *self.0
+ &mut *self.buffers
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
index 0e4957a62401b..9523e1d1607ff 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs
@@ -6,8 +6,10 @@
use crate::api::{JxlColorType, JxlDataFormat};
use crate::error::{Error, Result};
use crate::headers::Orientation;
+use crate::render::StageSpecialCase;
use crate::render::internal::ChannelInfo;
use crate::render::save::SaveStage;
+use crate::render::stages::ConvertI32ToU8Stage;
use crate::util::{ShiftRightCeil, tracing_wrappers::*};
use super::internal::{RenderPipelineShared, Stage};
@@ -25,7 +27,6 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
size: (usize, usize),
downsampling_shift: usize,
mut log_group_size: usize,
- num_passes: usize,
chunk_size: usize,
) -> Self {
info!("creating render pipeline");
@@ -47,70 +48,20 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
log_group_size,
group_count: (size.0.shrc(log_group_size), size.1.shrc(log_group_size)),
stages: vec![],
- group_chan_ready_passes: vec![
- vec![0; num_channels];
- size.0.shrc(log_group_size)
- * size.1.shrc(log_group_size)
+ group_chan_complete: vec![
+ vec![false; num_channels];
+ size.0.shrc(log_group_size) * size.1.shrc(log_group_size)
],
- num_passes,
chunk_size,
extend_stage_index: None,
+ channel_is_used: vec![false; num_channels],
},
}
}
- pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Result<Self> {
- let input_type = stage.input_type();
- let output_type = stage.output_type();
- let shift = stage.shift();
- let border = stage.border();
- let is_extend = matches!(stage, Stage::Extend(_));
- let current_info = self.shared.channel_info.last().unwrap().clone();
- debug!(
- last_stage_channel_info = ?current_info,
- extend_stage_index= ?self.shared.extend_stage_index,
- "adding stage '{stage}'",
- );
- let mut after_info = vec![];
- for (c, info) in current_info.iter().enumerate() {
- if !stage.uses_channel(c) {
- after_info.push(ChannelInfo {
- ty: info.ty,
- downsample: (0, 0),
- });
- } else {
- if let Some(ty) = info.ty
- && ty != input_type
- {
- return Err(Error::PipelineChannelTypeMismatch(
- stage.to_string(),
- c,
- input_type,
- ty,
- ));
- }
- after_info.push(ChannelInfo {
- ty: Some(output_type.unwrap_or(input_type)),
- downsample: shift,
- });
- }
- }
- if self.shared.extend_stage_index.is_some()
- && (shift != (0, 0) || border != (0, 0) || is_extend)
- {
- return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string()));
- }
- if is_extend {
- self.shared.extend_stage_index = Some(self.shared.stages.len());
- }
- debug!(
- new_channel_info = ?after_info,
- extend_stage_index= ?self.shared.extend_stage_index,
- "added stage '{stage}'",
- );
- self.shared.channel_info.push(after_info);
+ pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Self {
self.shared.stages.push(stage);
- Ok(self)
+ self
}
pub fn new(
@@ -118,19 +69,16 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
size: (usize, usize),
downsampling_shift: usize,
log_group_size: usize,
- num_passes: usize,
) -> Self {
Self::new_with_chunk_size(
num_channels,
size,
downsampling_shift,
log_group_size,
- num_passes,
1 << (log_group_size + downsampling_shift),
)
}
- #[instrument(skip_all, err)]
pub fn add_save_stage(
self,
channels: &[usize],
@@ -139,7 +87,7 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
color_type: JxlColorType,
data_format: JxlDataFormat,
fill_opaque_alpha: bool,
- ) -> Result<Self> {
+ ) -> Self {
let stage = SaveStage::new(
channels,
orientation,
@@ -151,25 +99,131 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
self.add_stage_internal(Stage::Save(stage))
}
- #[instrument(skip_all, err)]
- pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Result<Self> {
+ pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Self {
self.add_stage_internal(Stage::Extend(extend))
}
- #[instrument(skip_all, err)]
- pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Result<Self> {
+ pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Self {
self.add_stage_internal(Stage::InPlace(Pipeline::box_inplace_stage(stage)))
}
- #[instrument(skip_all, err)]
- pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Result<Self> {
+ pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Self {
self.add_stage_internal(Stage::InOut(Pipeline::box_inout_stage(stage)))
}
#[instrument(skip_all, err)]
pub fn build(mut self) -> Result<Box<Pipeline>> {
+ let mut stage_is_used = vec![false; self.shared.stages.len()];
+ let num_channels = self.shared.num_channels();
+ let mut channel_next_use = vec![None; num_channels];
+ // Prune unused stages.
+ for i in (0..self.shared.stages.len()).rev() {
+ let stage = &self.shared.stages[i];
+ if matches!(stage, Stage::Save(_)) {
+ for (c, next_use) in channel_next_use.iter_mut().enumerate() {
+ if stage.uses_channel(c) {
+ self.shared.channel_is_used[c] = true;
+ *next_use = Some(i);
+ }
+ }
+ }
+ for c in 0..num_channels {
+ if stage.uses_channel(c) {
+ stage_is_used[i] |= self.shared.channel_is_used[c];
+ }
+ }
+ if stage_is_used[i] {
+ match self.shared.stages[i].is_special_case() {
+ None => (),
+ Some(StageSpecialCase::F32ToU8 { .. }) => (),
+ Some(StageSpecialCase::ModularToF32 { channel, bit_depth }) => {
+ let n = channel_next_use[channel].unwrap();
+ if let Some(StageSpecialCase::F32ToU8 {
+ channel: c,
+ bit_depth: b,
+ }) = self.shared.stages[n].is_special_case()
+ {
+ assert_eq!(c, channel);
+ if b % bit_depth == 0 {
+ let mult = ((1 << b) - 1) / ((1 << bit_depth) - 1);
+ // Remove the next stage, and replace the current stage with I32 -> I8
+ // conversion.
+ stage_is_used[n] = false;
+ self.shared.stages[i] = Stage::InOut(Pipeline::box_inout_stage(
+ ConvertI32ToU8Stage::new(c, mult, (1 << b) - 1),
+ ));
+ }
+ }
+ }
+ }
+ for (c, next_use) in channel_next_use.iter_mut().enumerate() {
+ if self.shared.stages[i].uses_channel(c) {
+ self.shared.channel_is_used[c] = true;
+ *next_use = Some(i);
+ }
+ }
+ }
+ }
+ self.shared.stages = self
+ .shared
+ .stages
+ .into_iter()
+ .zip(stage_is_used)
+ .filter_map(|(s, used)| used.then_some(s))
+ .collect();
+ for (i, stage) in self.shared.stages.iter().enumerate() {
+ let input_type = stage.input_type();
+ let output_type = stage.output_type();
+ let shift = stage.shift();
+ let border = stage.border();
+ let is_extend = matches!(stage, Stage::Extend(_));
+ let current_info = self.shared.channel_info.last().unwrap().clone();
+ debug!(
+ last_stage_channel_info = ?current_info,
+ extend_stage_index= ?self.shared.extend_stage_index,
+ "adding stage '{stage}'",
+ );
+ let mut after_info = vec![];
+ for (c, info) in current_info.iter().enumerate() {
+ if !stage.uses_channel(c) {
+ after_info.push(ChannelInfo {
+ ty: info.ty,
+ downsample: (0, 0),
+ });
+ } else {
+ if let Some(ty) = info.ty
+ && ty != input_type
+ {
+ return Err(Error::PipelineChannelTypeMismatch(
+ stage.to_string(),
+ c,
+ input_type,
+ ty,
+ ));
+ }
+ after_info.push(ChannelInfo {
+ ty: Some(output_type.unwrap_or(input_type)),
+ downsample: shift,
+ });
+ }
+ }
+ if self.shared.extend_stage_index.is_some()
+ && (shift != (0, 0) || border != (0, 0) || is_extend)
+ {
+ return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string()));
+ }
+ if is_extend {
+ self.shared.extend_stage_index = Some(i);
+ }
+ debug!(
+ new_channel_info = ?after_info,
+ extend_stage_index= ?self.shared.extend_stage_index,
+ "added stage '{stage}'",
+ );
+ self.shared.channel_info.push(after_info);
+ }
+
let channel_info = &mut self.shared.channel_info;
- let num_channels = channel_info[0].len();
let mut cur_downsamples = vec![(0u8, 0u8); num_channels];
for (s, stage) in self.shared.stages.iter().enumerate().rev() {
let [current_info, next_info, ..] = &mut channel_info[s..] else {
@@ -232,11 +286,12 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> {
);
}
- // Ensure all channels have been used, so that we know the types of all buffers at all
- // stages.
for (c, chinfo) in channel_info.iter().flat_map(|x| x.iter().enumerate()) {
if chinfo.ty.is_none() {
- return Err(Error::PipelineChannelUnused(c));
+ assert!(!self.shared.channel_is_used[c]);
+ for g in self.shared.group_chan_complete.iter_mut() {
+ g[c] = true;
+ }
}
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
index 89e2989d03efa..e2a98cd58a36c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs
@@ -8,6 +8,7 @@ use std::fmt::Display;
use crate::error::Result;
use crate::image::{DataTypeTag, ImageDataType};
+use crate::render::StageSpecialCase;
use crate::util::ShiftRightCeil;
use super::save::SaveStage;
@@ -74,6 +75,13 @@ impl<Buffer: 'static> Stage<Buffer> {
_ => None,
}
}
+ pub(super) fn is_special_case(&self) -> Option<StageSpecialCase> {
+ match self {
+ Stage::InOut(s) => s.is_special_case(),
+ Stage::InPlace(s) => s.is_special_case(),
+ _ => None,
+ }
+ }
}
impl<Buffer> Display for Stage<Buffer> {
@@ -98,11 +106,11 @@ pub struct RenderPipelineShared<Buffer> {
pub input_size: (usize, usize),
pub log_group_size: usize,
pub group_count: (usize, usize),
- pub group_chan_ready_passes: Vec<Vec<usize>>,
- pub num_passes: usize,
+ pub group_chan_complete: Vec<Vec<bool>>,
pub chunk_size: usize,
pub stages: Vec<Stage<Buffer>>,
pub extend_stage_index: Option<usize>,
+ pub channel_is_used: Vec<bool>,
}
impl<Buffer> RenderPipelineShared<Buffer> {
@@ -158,7 +166,11 @@ impl<Buffer> RenderPipelineShared<Buffer> {
}
pub fn num_channels(&self) -> usize {
- self.channel_info[0].len()
+ self.channel_is_used.len()
+ }
+
+ pub fn num_used_channels(&self) -> usize {
+ self.channel_is_used.iter().filter(|x| **x).count()
}
}
@@ -171,6 +183,7 @@ pub trait InPlaceStage: Any + Display {
fn init_local_state(&self, thread_index: usize) -> Result<Option<Box<dyn Any>>>;
fn uses_channel(&self, c: usize) -> bool;
fn ty(&self) -> DataTypeTag;
+ fn is_special_case(&self) -> Option<StageSpecialCase>;
}
pub trait RunInPlaceStage<Buffer: PipelineBuffer>: InPlaceStage {
@@ -192,6 +205,9 @@ impl<T: RenderPipelineInPlaceStage> InPlaceStage for T {
fn ty(&self) -> DataTypeTag {
T::Type::DATA_TYPE_ID
}
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ self.is_special_case()
+ }
}
pub trait InOutStage: Any + Display {
@@ -201,6 +217,7 @@ pub trait InOutStage: Any + Display {
fn uses_channel(&self, c: usize) -> bool;
fn input_type(&self) -> DataTypeTag;
fn output_type(&self) -> DataTypeTag;
+ fn is_special_case(&self) -> Option<StageSpecialCase>;
}
impl<T: RenderPipelineInOutStage> InOutStage for T {
@@ -222,6 +239,9 @@ impl<T: RenderPipelineInOutStage> InOutStage for T {
fn output_type(&self) -> DataTypeTag {
T::OutputT::DATA_TYPE_ID
}
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ self.is_special_case()
+ }
}
pub trait RunInOutStage<Buffer: PipelineBuffer>: InOutStage {
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs
new file mode 100644
index 0000000000000..abc810ef8a42c
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs
@@ -0,0 +1,372 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+use std::ops::Range;
+
+use crate::error::Result;
+use crate::image::{OwnedRawImage, Rect};
+use crate::render::LowMemoryRenderPipeline;
+use crate::render::buffer_splitter::BufferSplitter;
+use crate::render::internal::{ChannelInfo, Stage};
+use crate::util::tracing_wrappers::*;
+
+pub(super) struct InputBuffer {
+ // One buffer per channel.
+ pub(super) data: Vec<Option<OwnedRawImage>>,
+ // Storage for left/right borders. Includes corners.
+ pub(super) leftright: Vec<Option<OwnedRawImage>>,
+ // Storage for top/bottom borders. Includes corners.
+ pub(super) topbottom: Vec<Option<OwnedRawImage>>,
+ // Number of ready channels in the current pass.
+ ready_channels: usize,
+ pub(super) is_ready: bool,
+ num_completed_groups_3x3: usize,
+}
+
+impl InputBuffer {
+ pub(super) fn set_buffer(&mut self, chan: usize, buf: OwnedRawImage) {
+ assert!(self.data[chan].is_none());
+ self.data[chan] = Some(buf);
+ self.ready_channels += 1;
+ }
+
+ pub(super) fn new(num_channels: usize) -> Self {
+ let b = || (0..num_channels).map(|_| None).collect();
+ Self {
+ data: b(),
+ leftright: b(),
+ topbottom: b(),
+ ready_channels: 0,
+ is_ready: false,
+ num_completed_groups_3x3: 0,
+ }
+ }
+}
+
+// Finds a small set of rectangles that cover all the "true" values in `ready_mask`,
+// and calls `f` on each such rectangle.
+fn foreach_ready_rect(
+ ready_mask: [bool; 9],
+ mut f: impl FnMut(Range<u8>, Range<u8>) -> Result<()>,
+) -> Result<()> {
+ // x range in middle row
+ let xrange = (1 - ready_mask[3] as u8)..(2 + ready_mask[5] as u8);
+ let can_extend_top = xrange.clone().all(|x| ready_mask[x as usize]);
+ let can_extend_bottom = xrange.clone().all(|x| ready_mask[6 + x as usize]);
+ let yrange = (1 - can_extend_top as u8)..(2 + can_extend_bottom as u8);
+ f(xrange.clone(), yrange)?;
+
+ if !can_extend_top {
+ if ready_mask[1] {
+ let xrange = (1 - ready_mask[0] as u8)..(2 + ready_mask[2] as u8);
+ f(xrange, 0..1)?;
+ } else {
+ if ready_mask[0] {
+ f(0..1, 0..1)?;
+ }
+ if ready_mask[2] {
+ f(2..3, 0..1)?;
+ }
+ }
+ } else {
+ if ready_mask[0] && !xrange.contains(&0) {
+ f(0..1, 0..1)?;
+ }
+ if ready_mask[2] && !xrange.contains(&2) {
+ f(2..3, 0..1)?;
+ }
+ }
+
+ if !can_extend_bottom {
+ if ready_mask[7] {
+ let xrange = (1 - ready_mask[6] as u8)..(2 + ready_mask[8] as u8);
+ f(xrange, 2..3)?;
+ } else {
+ if ready_mask[6] {
+ f(0..1, 2..3)?;
+ }
+ if ready_mask[8] {
+ f(2..3, 2..3)?;
+ }
+ }
+ } else {
+ if ready_mask[6] && !xrange.contains(&0) {
+ f(0..1, 2..3)?;
+ }
+ if ready_mask[8] && !xrange.contains(&2) {
+ f(2..3, 2..3)?;
+ }
+ }
+
+ Ok(())
+}
+
+impl LowMemoryRenderPipeline {
+ pub(super) fn maybe_get_scratch_buffer(
+ &mut self,
+ channel: usize,
+ kind: usize,
+ ) -> Option<OwnedRawImage> {
+ self.scratch_channel_buffers[channel * 3 + kind].pop()
+ }
+
+ fn store_scratch_buffer(&mut self, channel: usize, kind: usize, image: OwnedRawImage) {
+ self.scratch_channel_buffers[channel * 3 + kind].push(image)
+ }
+
+ pub(super) fn render_with_new_group(
+ &mut self,
+ g: usize,
+ buffer_splitter: &mut BufferSplitter,
+ ) -> Result<()> {
+ let buf = &mut self.input_buffers[g];
+ assert!(buf.ready_channels <= self.shared.num_used_channels());
+ if buf.ready_channels != self.shared.num_used_channels() {
+ return Ok(());
+ }
+ buf.ready_channels = 0;
+ let (gx, gy) = self.shared.group_position(g);
+ debug!("new data ready for group {gx},{gy}");
+
+ // Prepare output buffers for the group.
+ let (origin, size) = if let Some(e) = self.shared.extend_stage_index {
+ let Stage::Extend(e) = &self.shared.stages[e] else {
+ unreachable!("extend stage is not an extend stage");
+ };
+ (e.frame_origin, e.image_size)
+ } else {
+ ((0, 0), self.shared.input_size)
+ };
+ let gsz = 1 << self.shared.log_group_size;
+ let group_rect = Rect {
+ size: (gsz, gsz),
+ origin: (gsz * gx, gsz * gy),
+ }
+ .clip(self.shared.input_size);
+
+ {
+ for c in 0..self.shared.num_channels() {
+ if !self.shared.channel_is_used[c] {
+ continue;
+ }
+ let (bx, by) = self.border_size;
+ let (sx, sy) = self.input_buffers[g].data[c].as_ref().unwrap().byte_size();
+ let ChannelInfo {
+ ty,
+ downsample: (dx, dy),
+ } = self.shared.channel_info[0][c];
+ let ty = ty.unwrap();
+ let bx = bx >> dx;
+ let by = by >> dy;
+ let mut topbottom = if let Some(b) = self.input_buffers[g].topbottom[c].take() {
+ b
+ } else if let Some(b) = self.maybe_get_scratch_buffer(c, 1) {
+ b
+ } else {
+ let height = 4 * by;
+ let width = (1 << self.shared.log_group_size) * ty.size();
+ OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))?
+ };
+ let mut leftright = if let Some(b) = self.input_buffers[g].leftright[c].take() {
+ b
+ } else if let Some(b) = self.maybe_get_scratch_buffer(c, 2) {
+ b
+ } else {
+ let height = 1 << self.shared.log_group_size;
+ let width = 4 * bx * ty.size();
+ OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))?
+ };
+ let input = self.input_buffers[g].data[c].as_ref().unwrap();
+ if by != 0 {
+ for y in 0..(2 * by).min(sy) {
+ topbottom.row_mut(y)[..sx].copy_from_slice(input.row(y));
+ topbottom.row_mut(4 * by - 1 - y)[..sx]
+ .copy_from_slice(input.row(sy - y - 1));
+ }
+ }
+ if bx != 0 {
+ let cs = (bx * 2 * ty.size()).min(sx);
+ for y in 0..sy {
+ let row_out = leftright.row_mut(y);
+ let row_in = input.row(y);
+ row_out[..cs].copy_from_slice(&row_in[..cs]);
+ row_out[4 * bx * ty.size() - cs..].copy_from_slice(&row_in[sx - cs..]);
+ }
+ }
+ self.input_buffers[g].leftright[c] = Some(leftright);
+ self.input_buffers[g].topbottom[c] = Some(topbottom);
+ }
+ self.input_buffers[g].is_ready = true;
+ }
+
+ let gxm1 = gx.saturating_sub(1);
+ let gym1 = gy.saturating_sub(1);
+ let gxp1 = (gx + 1).min(self.shared.group_count.0 - 1);
+ let gyp1 = (gy + 1).min(self.shared.group_count.1 - 1);
+ let gw = self.shared.group_count.0;
+ // TODO(veluca): this code probably needs to be adapted for multithreading.
+ let mut ready_mask = [
+ self.input_buffers[gym1 * gw + gxm1].is_ready,
+ self.input_buffers[gym1 * gw + gx].is_ready,
+ self.input_buffers[gym1 * gw + gxp1].is_ready,
+ self.input_buffers[gy * gw + gxm1].is_ready,
+ self.input_buffers[gy * gw + gx].is_ready, // should be guaranteed to be 1.
+ self.input_buffers[gy * gw + gxp1].is_ready,
+ self.input_buffers[gyp1 * gw + gxm1].is_ready,
+ self.input_buffers[gyp1 * gw + gx].is_ready,
+ self.input_buffers[gyp1 * gw + gxp1].is_ready,
+ ];
+ // We can only render a corner if we have all the 4 adjacent groups. Thus, mask out corners if
+ // the corresponding side buffers are not ready.
+ ready_mask[0] &= ready_mask[1];
+ ready_mask[0] &= ready_mask[3];
+ ready_mask[2] &= ready_mask[1];
+ ready_mask[2] &= ready_mask[5];
+ ready_mask[6] &= ready_mask[3];
+ ready_mask[6] &= ready_mask[7];
+ ready_mask[8] &= ready_mask[5];
+ ready_mask[8] &= ready_mask[7];
+
+ foreach_ready_rect(ready_mask, |xrange, yrange| {
+ let y0 = match (gy == 0, yrange.start) {
+ (true, 0) => group_rect.origin.1,
+ (false, 0) => group_rect.origin.1 - self.border_size.1,
+ (_, 1) => group_rect.origin.1 + self.border_size.1,
+ // (_, 2)
+ _ => group_rect.end().1 - self.border_size.1,
+ };
+ let x0 = match (gx == 0, xrange.start) {
+ (true, 0) => group_rect.origin.0,
+ (false, 0) => group_rect.origin.0 - self.border_size.0,
+ (_, 1) => group_rect.origin.0 + self.border_size.0,
+ // (_, 2)
+ _ => group_rect.end().0 - self.border_size.0,
+ };
+
+ let y1 = match (gy + 1 == self.shared.group_count.1, yrange.end) {
+ (true, 3) => group_rect.end().1,
+ (false, 3) => group_rect.end().1 + self.border_size.1,
+ (_, 2) => group_rect.end().1 - self.border_size.1,
+ // (_, 1)
+ _ => group_rect.origin.1 + self.border_size.1,
+ };
+
+ let x1 = match (gx + 1 == self.shared.group_count.0, xrange.end) {
+ (true, 3) => group_rect.end().0,
+ (false, 3) => group_rect.end().0 + self.border_size.0,
+ (_, 2) => group_rect.end().0 - self.border_size.0,
+ // (_, 1)
+ _ => group_rect.origin.0 + self.border_size.0,
+ };
+
+ let image_area = Rect {
+ origin: (x0, y0),
+ size: (x1 - x0, y1 - y0),
+ };
+
+ let mut local_buffers = buffer_splitter.get_local_buffers(
+ &self.save_buffer_info,
+ image_area,
+ false,
+ self.shared.input_size,
+ size,
+ origin,
+ );
+
+ self.render_group((gx, gy), image_area, &mut local_buffers)?;
+ Ok(())
+ })?;
+
+ for c in 0..self.input_buffers[g].data.len() {
+ if let Some(b) = std::mem::take(&mut self.input_buffers[g].data[c]) {
+ self.store_scratch_buffer(c, 0, b);
+ }
+ }
+
+ // Clear border buffers that will not be used again.
+ // This is certainly the case if *all* the groups in the 3x3 group area around
+ // the current group are complete.
+ if self.shared.group_chan_complete[g].iter().all(|x| *x) {
+ for g in [
+ gym1 * gw + gxm1,
+ gym1 * gw + gx,
+ gym1 * gw + gxp1,
+ gy * gw + gxm1,
+ gy * gw + gx,
+ gy * gw + gxp1,
+ gyp1 * gw + gxm1,
+ gyp1 * gw + gx,
+ gyp1 * gw + gxp1,
+ ] {
+ self.input_buffers[g].num_completed_groups_3x3 += 1;
+ if self.input_buffers[g].num_completed_groups_3x3 != 9 {
+ continue;
+ }
+ for c in 0..self.input_buffers[g].data.len() {
+ if let Some(b) = std::mem::take(&mut self.input_buffers[g].topbottom[c]) {
+ self.store_scratch_buffer(c, 1, b);
+ }
+ if let Some(b) = std::mem::take(&mut self.input_buffers[g].leftright[c]) {
+ self.store_scratch_buffer(c, 2, b);
+ }
+ }
+ }
+ }
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_foreach_ready_rect() {
+ for i in 0..512 {
+ let mut ready_mask = [false; 9];
+ for j in 0..9 {
+ if (i >> j) & 1 == 1 {
+ ready_mask[j] = true;
+ }
+ }
+ if !ready_mask[4] {
+ continue;
+ }
+
+ let mut covered = [false; 9];
+ foreach_ready_rect(ready_mask, |xr, yr| {
+ for y in yr {
+ for x in xr.clone() {
+ let idx = (y as usize) * 3 + (x as usize);
+ assert!(
+ ready_mask[idx],
+ "Covered not ready index {} in mask {:?} (x={}, y={})",
+ idx, ready_mask, x, y
+ );
+ assert!(
+ !covered[idx],
+ "Double coverage of index {} in mask {:?}",
+ idx, ready_mask
+ );
+ covered[idx] = true;
+ }
+ }
+ Ok(())
+ })
+ .unwrap();
+
+ for j in 0..9 {
+ if ready_mask[j] {
+ assert!(
+ covered[j],
+ "Failed to cover index {} in mask {:?}",
+ j, ready_mask
+ );
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
index cf4a65e81049e..7f8214ff35abe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs
@@ -43,17 +43,3 @@ pub(super) fn get_distinct_indices<'a, T>(
.map(|x| std::mem::take(x).expect("Not all elements were found"))
.collect()
}
-
-/// Mirror-reflects a value v to fit in a [0; s) range.
-pub(super) fn mirror(mut v: isize, s: usize) -> usize {
- // TODO(veluca): consider speeding this up if needed.
- loop {
- if v < 0 {
- v = -v - 1;
- } else if v >= s as isize {
- v = s as isize * 2 - v - 1;
- } else {
- return v as usize;
- }
- }
-}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
index b2d33cade1396..761175a12aa09 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs
@@ -11,27 +11,23 @@ use row_buffers::RowBuffer;
use crate::api::JxlOutputBuffer;
use crate::error::Result;
-use crate::image::{Image, ImageDataType, OwnedRawImage, Rect};
+use crate::image::{DataTypeTag, Image, ImageDataType, OwnedRawImage, Rect};
use crate::render::MAX_BORDER;
use crate::render::buffer_splitter::{BufferSplitter, SaveStageBufferInfo};
use crate::render::internal::Stage;
+use crate::render::low_memory_pipeline::group_scheduler::InputBuffer;
use crate::util::{ShiftRightCeil, tracing_wrappers::*};
use super::RenderPipeline;
use super::internal::{RenderPipelineShared, RunInOutStage, RunInPlaceStage};
+mod group_scheduler;
mod helpers;
mod render_group;
-pub(super) mod row_buffers;
+pub(crate) mod row_buffers;
mod run_stage;
mod save;
-struct InputBuffer {
- // One buffer per channel.
- data: Vec<Option<OwnedRawImage>>,
- completed_passes: usize,
-}
-
pub struct LowMemoryRenderPipeline {
shared: RenderPipelineShared<RowBuffer>,
input_buffers: Vec<InputBuffer>,
@@ -49,7 +45,8 @@ pub struct LowMemoryRenderPipeline {
// The amount of pixels that we need to read (for every channel) in non-edge groups to run all
// stages correctly.
input_border_pixels: Vec<(usize, usize)>,
- has_nontrivial_border: bool,
+ // Size of the border, in image (i.e. non-downsampled) pixels.
+ border_size: (usize, usize),
// For every stage, the downsampling level of *any* channel that the stage uses at that point.
// Note that this must be equal across all the used channels.
downsampling_for_stage: Vec<(usize, usize)>,
@@ -60,160 +57,21 @@ pub struct LowMemoryRenderPipeline {
opaque_alpha_buffers: Vec<Option<RowBuffer>>,
// Sorted indices to call get_distinct_indices.
sorted_buffer_indices: Vec<Vec<(usize, usize, usize)>>,
- // For each channel, buffers that could be reused to store group data for that channel.
+ // For each channel and the 3 kinds of buffers (center / topbottom / leftright), buffers that
+ // could be reused to store group data for that channel.
+ // Indexed by [3*channel] = center, [3*channel+1] = topbottom, [3*channel+2] = leftright.
scratch_channel_buffers: Vec<Vec<OwnedRawImage>>,
}
-impl LowMemoryRenderPipeline {
- // TODO(veluca): most of this logic will need to change to ensure better cache utilization and
- // lower memory usage.
- fn render_with_new_group(
- &mut self,
- new_group_id: usize,
- buffer_splitter: &mut BufferSplitter,
- ) -> Result<()> {
- let (gx, gy) = self.shared.group_position(new_group_id);
-
- // We put groups that are 2 afar here, because even if they could not have become
- // renderable, they might have become freeable.
- let mut possible_groups = vec![];
- for dy in -2..=2 {
- let igy = gy as isize + dy;
- if igy < 0 || igy >= self.shared.group_count.1 as isize {
- continue;
- }
- for dx in -2..=2 {
- let igx = gx as isize + dx;
- if igx < 0 || igx >= self.shared.group_count.0 as isize {
- continue;
- }
- possible_groups.push(igy as usize * self.shared.group_count.0 + igx as usize);
- }
- }
-
- // First, render all groups that have made progress; only check those that *could* have
- // made progress.
- for g in possible_groups.iter().copied() {
- let ready_passes = self.shared.group_chan_ready_passes[g]
- .iter()
- .copied()
- .min()
- .unwrap();
- if self.input_buffers[g].completed_passes < ready_passes {
- let (gx, gy) = self.shared.group_position(g);
- let mut fully_ready_passes = ready_passes;
- // Here we assume that we never need more than one group worth of border.
- if self.has_nontrivial_border {
- for dy in -1..=1 {
- let igy = gy as isize + dy;
- if igy < 0 || igy >= self.shared.group_count.1 as isize {
- continue;
- }
- for dx in -1..=1 {
- let igx = gx as isize + dx;
- if igx < 0 || igx >= self.shared.group_count.0 as isize {
- continue;
- }
- let ig = (igy as usize) * self.shared.group_count.0 + igx as usize;
- let ready_passes = self.shared.group_chan_ready_passes[ig]
- .iter()
- .copied()
- .min()
- .unwrap();
- fully_ready_passes = fully_ready_passes.min(ready_passes);
- }
- }
- }
- if self.input_buffers[g].completed_passes >= fully_ready_passes {
- continue;
- }
- debug!(
- "new ready passes for group {gx},{gy} ({} completed, \
- {ready_passes} ready, {fully_ready_passes} ready including neighbours)",
- self.input_buffers[g].completed_passes
- );
-
- // Prepare output buffers for the group.
- let (origin, size) = if let Some(e) = self.shared.extend_stage_index {
- let Stage::Extend(e) = &self.shared.stages[e] else {
- unreachable!("extend stage is not an extend stage");
- };
- (e.frame_origin, e.image_size)
- } else {
- ((0, 0), self.shared.input_size)
- };
- let gsz = (
- 1 << self.shared.log_group_size,
- 1 << self.shared.log_group_size,
- );
- let rect_to_render = Rect {
- size: gsz,
- origin: (gsz.0 * gx, gsz.1 * gy),
- };
- let mut local_buffers = buffer_splitter.get_local_buffers(
- &self.save_buffer_info,
- rect_to_render,
- false,
- self.shared.input_size,
- size,
- origin,
- );
-
- self.render_group((gx, gy), &mut local_buffers)?;
-
- self.input_buffers[g].completed_passes = fully_ready_passes;
- }
- }
-
- // Clear buffers that will not be used again.
- for g in possible_groups.iter().copied() {
- let (gx, gy) = self.shared.group_position(g);
- let mut neigh_complete_passes = self.input_buffers[g].completed_passes;
- if self.has_nontrivial_border {
- for dy in -1..=1 {
- let igy = gy as isize + dy;
- if igy < 0 || igy >= self.shared.group_count.1 as isize {
- continue;
- }
- for dx in -1..=1 {
- let igx = gx as isize + dx;
- if igx < 0 || igx >= self.shared.group_count.0 as isize {
- continue;
- }
- let ig = (igy as usize) * self.shared.group_count.0 + igx as usize;
- neigh_complete_passes = self.input_buffers[ig]
- .completed_passes
- .min(neigh_complete_passes);
- }
- }
- }
- if self.shared.num_passes <= neigh_complete_passes {
- for (c, b) in self.input_buffers[g].data.iter_mut().enumerate() {
- if let Some(b) = std::mem::take(b) {
- self.scratch_channel_buffers[c].push(b);
- }
- }
- }
- }
- Ok(())
- }
-}
-
impl RenderPipeline for LowMemoryRenderPipeline {
type Buffer = RowBuffer;
fn new_from_shared(shared: RenderPipelineShared<Self::Buffer>) -> Result<Self> {
let mut input_buffers = vec![];
- for _ in 0..shared.group_chan_ready_passes.len() {
- input_buffers.push(InputBuffer {
- data: vec![],
- completed_passes: 0,
- });
- for _ in 0..shared.group_chan_ready_passes[0].len() {
- input_buffers.last_mut().unwrap().data.push(None);
- }
+ let nc = shared.num_channels();
+ for _ in 0..shared.group_chan_complete.len() {
+ input_buffers.push(InputBuffer::new(nc));
}
- let nc = shared.channel_info[0].len();
let mut previous_inout: Vec<_> = (0..nc).map(|x| (0usize, x)).collect();
let mut stage_input_buffer_index = vec![];
let mut next_border_and_cur_downsample = vec![vec![]];
@@ -245,9 +103,10 @@ impl RenderPipeline for LowMemoryRenderPipeline {
let mut initial_buffers = vec![];
for chan in 0..nc {
initial_buffers.push(RowBuffer::new(
- shared.channel_info[0][chan].ty.unwrap(),
+ shared.channel_info[0][chan].ty.unwrap_or(DataTypeTag::U8),
next_border_and_cur_downsample[0][chan].0 as usize,
0,
+ 0,
shared.chunk_size >> shared.channel_info[0][chan].downsample.0,
)?);
}
@@ -261,6 +120,7 @@ impl RenderPipeline for LowMemoryRenderPipeline {
stage.output_type().unwrap(),
*next_y_border as usize,
stage.shift().1 as usize,
+ stage.shift().0 as usize,
shared.chunk_size >> *dsx,
)?);
}
@@ -385,6 +245,24 @@ impl RenderPipeline for LowMemoryRenderPipeline {
})
.collect();
+ let mut border_size = (0, 0);
+ for c in 0..nc {
+ border_size.0 = border_size
+ .0
+ .max(border_pixels[c].0 << shared.channel_info[0][c].downsample.0);
+ border_size.1 = border_size
+ .1
+ .max(border_pixels[c].1 << shared.channel_info[0][c].downsample.1);
+ }
+ for s in 0..shared.stages.len() {
+ border_size.0 = border_size
+ .0
+ .max(border_pixels_per_stage[s].0 << downsampling_for_stage[s].0);
+ border_size.1 = border_size
+ .1
+ .max(border_pixels_per_stage[s].1 << downsampling_for_stage[s].1);
+ }
+
Ok(Self {
input_buffers,
stage_input_buffer_index,
@@ -392,7 +270,7 @@ impl RenderPipeline for LowMemoryRenderPipeline {
padding_was_rendered: false,
save_buffer_info,
stage_output_border_pixels: border_pixels_per_stage,
- has_nontrivial_border: border_pixels.iter().any(|x| *x != (0, 0)),
+ border_size,
input_border_pixels: border_pixels,
local_states: shared
.stages
@@ -403,13 +281,13 @@ impl RenderPipeline for LowMemoryRenderPipeline {
downsampling_for_stage,
opaque_alpha_buffers,
sorted_buffer_indices,
- scratch_channel_buffers: (0..nc).map(|_| vec![]).collect(),
+ scratch_channel_buffers: (0..nc * 3).map(|_| vec![]).collect(),
})
}
#[instrument(skip_all, err)]
fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>> {
- if let Some(b) = self.scratch_channel_buffers[channel].pop() {
+ if let Some(b) = self.maybe_get_scratch_buffer(channel, 0) {
return Ok(Image::from_raw(b));
}
let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
@@ -420,20 +298,23 @@ impl RenderPipeline for LowMemoryRenderPipeline {
&mut self,
channel: usize,
group_id: usize,
- num_passes: usize,
+ complete: bool,
buf: Image<T>,
buffer_splitter: &mut BufferSplitter,
) -> Result<()> {
- debug!(
- "filling data for group {}, channel {}, using type {:?}",
- group_id,
- channel,
- T::DATA_TYPE_ID,
- );
- self.input_buffers[group_id].data[channel] = Some(buf.into_raw());
- self.shared.group_chan_ready_passes[group_id][channel] += num_passes;
+ if self.shared.channel_is_used[channel] {
+ debug!(
+ "filling data for group {}, channel {}, using type {:?}",
+ group_id,
+ channel,
+ T::DATA_TYPE_ID,
+ );
+ self.input_buffers[group_id].set_buffer(channel, buf.into_raw());
+ self.shared.group_chan_complete[group_id][channel] = complete;
- self.render_with_new_group(group_id, buffer_splitter)
+ self.render_with_new_group(group_id, buffer_splitter)?;
+ }
+ Ok(())
}
fn check_buffer_sizes(&self, buffers: &mut [Option<JxlOutputBuffer>]) -> Result<()> {
@@ -535,6 +416,10 @@ impl RenderPipeline for LowMemoryRenderPipeline {
Ok(())
}
+ fn mark_group_to_rerender(&mut self, g: usize) {
+ self.input_buffers[g].is_ready = false;
+ }
+
fn box_inout_stage<S: super::RenderPipelineInOutStage>(
stage: S,
) -> Box<dyn RunInOutStage<Self::Buffer>> {
@@ -546,4 +431,8 @@ impl RenderPipeline for LowMemoryRenderPipeline {
) -> Box<dyn RunInPlaceStage<Self::Buffer>> {
Box::new(stage)
}
+
+ fn used_channel_mask(&self) -> &[bool] {
+ &self.shared.channel_is_used
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
index 6d4ded09003bd..6f9b65b67e1fe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs
@@ -8,15 +8,12 @@ use std::ops::Range;
use crate::{
api::JxlOutputBuffer,
error::Result,
- image::DataTypeTag,
+ image::{DataTypeTag, Rect},
render::{
- internal::Stage,
- low_memory_pipeline::{
- helpers::{get_distinct_indices, mirror},
- run_stage::ExtraInfo,
- },
+ internal::{ChannelInfo, Stage},
+ low_memory_pipeline::{helpers::get_distinct_indices, run_stage::ExtraInfo},
},
- util::{ShiftRightCeil, SmallVec, tracing_wrappers::*},
+ util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*},
};
use super::{LowMemoryRenderPipeline, row_buffers::RowBuffer};
@@ -70,79 +67,134 @@ fn apply_x_padding(
}
impl LowMemoryRenderPipeline {
- fn fill_initial_buffers(&mut self, c: usize, y: usize, y0: usize, (gx, gy): (usize, usize)) {
- let ty = self.shared.channel_info[0][c]
- .ty
- .expect("Channel info should be populated at this point");
- let gys = 1
- << (self.shared.log_group_size - self.shared.channel_info[0][c].downsample.1 as usize);
+ fn fill_initial_buffers(
+ &mut self,
+ c: usize,
+ y: usize,
+ (x0, xsize): (usize, usize),
+ (gx, gy): (usize, usize),
+ ) {
+ if !self.shared.channel_is_used[c] {
+ return;
+ }
+ let ChannelInfo {
+ ty,
+ downsample: (dx, dy),
+ } = self.shared.channel_info[0][c];
+ let ty = ty.expect("Channel info should be populated at this point");
+ let group_ysize = 1 << (self.shared.log_group_size - dy as usize);
+ let group_xsize = 1 << (self.shared.log_group_size - dx as usize);
+
+ let (bx, by) = self.border_size;
- let (input_y, igy) = if y < y0 {
- (y + gys - y0, gy - 1)
- } else if y >= y0 + gys {
- (y - y0 - gys, gy + 1)
+ let group_y0 = gy * group_ysize;
+ let group_x0 = gx << (self.shared.log_group_size - dx as usize);
+ let group_x1 = group_x0 + group_xsize;
+
+ let (input_y, igy, is_topbottom) = if y < group_y0 {
+ (y + (by >> dy) * 4 - group_y0, gy - 1, true)
+ } else if y >= group_y0 + group_ysize {
+ (y - group_y0 - group_ysize, gy + 1, true)
} else {
- (y - y0, gy)
+ (y - group_y0, gy, false)
};
let output_row = self.row_buffers[0][c].get_row_mut::<u8>(y);
- // Both are in units of bytes.
- let x0_offset = RowBuffer::x0_byte_offset();
- let extrax = self.input_border_pixels[c].0 * ty.size();
+
+ let copy_x0 = x0.saturating_sub(self.input_border_pixels[c].0);
+ let copy_x1 =
+ (x0 + xsize + self.input_border_pixels[c].0).min(self.shared.input_size.0.shrc(dx));
+
+ debug_assert!(copy_x1 >= group_x0);
+
+ let mut copy_byte_offset = RowBuffer::x0_byte_offset() - (x0 - copy_x0) * ty.size();
let base_gid = igy * self.shared.group_count.0 + gx;
- // Previous group horizontally, if any.
- if gx > 0 && extrax != 0 {
- let input_buf = self.input_buffers[base_gid - 1].data[c].as_ref().unwrap();
+ // Previous group horizontally, if needed.
+ if copy_x0 < group_x0 {
+ let (input_buf, xs) = if is_topbottom {
+ (
+ self.input_buffers[base_gid - 1].topbottom[c]
+ .as_ref()
+ .unwrap(),
+ group_xsize,
+ )
+ } else {
+ (
+ self.input_buffers[base_gid - 1].leftright[c]
+ .as_ref()
+ .unwrap(),
+ 4 * (bx >> dx),
+ )
+ };
let input_row = input_buf.row(input_y);
- output_row[x0_offset - extrax..x0_offset]
- .copy_from_slice(&input_row[input_buf.byte_size().0 - extrax..]);
+
+ let to_copy = (group_x0 - copy_x0) * ty.size();
+ let src_byte_offset = xs * ty.size() - to_copy;
+
+ output_row[copy_byte_offset..copy_byte_offset + to_copy]
+ .copy_from_slice(&input_row[src_byte_offset..src_byte_offset + to_copy]);
+ copy_byte_offset += to_copy;
}
- let input_buf = self.input_buffers[base_gid].data[c].as_ref().unwrap();
+ let input_buf = if is_topbottom {
+ self.input_buffers[base_gid].topbottom[c].as_ref().unwrap()
+ } else {
+ self.input_buffers[base_gid].data[c].as_ref().unwrap()
+ };
let input_row = input_buf.row(input_y);
- let gxs = input_buf.byte_size().0; // bytes
- output_row[x0_offset..x0_offset + gxs].copy_from_slice(input_row);
+ let copy_start = copy_x0.saturating_sub(group_x0) * ty.size();
+ let copy_end = (copy_x1.min(group_x1) - group_x0) * ty.size();
+ let to_copy = copy_end - copy_start;
+ output_row[copy_byte_offset..copy_byte_offset + to_copy]
+ .copy_from_slice(&input_row[copy_start..copy_end]);
+ copy_byte_offset += to_copy;
// Next group horizontally, if any.
- if gx + 1 < self.shared.group_count.0 && extrax != 0 {
- let input_buf = self.input_buffers[base_gid + 1].data[c].as_ref().unwrap();
+ if copy_x1 > group_x1 {
+ let input_buf = if is_topbottom {
+ self.input_buffers[base_gid + 1].topbottom[c]
+ .as_ref()
+ .unwrap()
+ } else {
+ self.input_buffers[base_gid + 1].leftright[c]
+ .as_ref()
+ .unwrap()
+ };
let input_row = input_buf.row(input_y);
let dx = self.shared.channel_info[0][c].downsample.0;
let gid = gy * self.shared.group_count.0 + gx;
let next_group_xsize = self.shared.group_size(gid + 1).0.shrc(dx);
- let border_x = extrax.min(next_group_xsize * ty.size());
- output_row[gxs + x0_offset..gxs + x0_offset + border_x]
- .copy_from_slice(&input_row[..border_x]);
- if border_x < extrax {
- let pad_from = ((gxs + border_x) / ty.size()) as isize;
- let pad_to = ((gxs + extrax) / ty.size()) as isize;
+ let border_x = (copy_x1 - group_x1).min(next_group_xsize);
+ output_row[copy_byte_offset..copy_byte_offset + border_x * ty.size()]
+ .copy_from_slice(&input_row[..border_x * ty.size()]);
+ if border_x + group_x1 < copy_x1 {
+ let pad_from = (xsize + border_x) as isize;
+ let pad_to = (xsize + copy_x1 - group_x1) as isize;
apply_x_padding(ty, output_row, pad_from..pad_to, 0..pad_from);
}
}
}
- // Renders a single group worth of data.
+ // Renders *parts* of group's worth of data.
+ // In particular, renders the sub-rectangle given in `image_area`, where (1, 1) refers to
+ // the center of the group, and 0 and 2 include data from the neighbouring group (if any).
#[instrument(skip(self, buffers))]
pub(super) fn render_group(
&mut self,
(gx, gy): (usize, usize),
+ image_area: Rect,
buffers: &mut [Option<JxlOutputBuffer>],
) -> Result<()> {
- let gid = gy * self.shared.group_count.0 + gx;
- let (xsize, num_rows) = self.shared.group_size(gid);
- let (x0, y0) = self.shared.group_offset(gid);
+ let start_of_row = image_area.origin.0 == 0;
+ let end_of_row = image_area.end().0 == self.shared.input_size.0;
- let num_channels = self.shared.num_channels();
- let mut num_extra_rows = 0;
+ let Rect {
+ origin: (x0, y0),
+ size: (xsize, num_rows),
+ } = image_area;
- for c in 0..num_channels {
- num_extra_rows = num_extra_rows
- .max(self.input_border_pixels[c].1 << self.shared.channel_info[0][c].downsample.1);
- }
- for s in 0..self.shared.stages.len() {
- num_extra_rows = num_extra_rows
- .max(self.stage_output_border_pixels[s].1 << self.downsampling_for_stage[s].1);
- }
+ let num_channels = self.shared.num_channels();
+ let num_extra_rows = self.border_size.1;
// This follows the same implementation strategy as the C++ code in libjxl.
// We pretend that every stage has a vertical shift of 0, i.e. it is as tall
@@ -152,7 +204,7 @@ impl LowMemoryRenderPipeline {
// when vy % (1<<vshift) == 0.
let vy0 = y0.saturating_sub(num_extra_rows);
- let vy1 = y0 + num_rows + num_extra_rows;
+ let vy1 = image_area.end().1 + num_extra_rows;
for vy in vy0..vy1 {
let mut current_origin = (0, 0);
@@ -161,7 +213,7 @@ impl LowMemoryRenderPipeline {
// Step 1: read input channels.
for c in 0..num_channels {
// Same logic as below, but adapted to the input stage.
- let dy = self.shared.channel_info[0][c].downsample.1;
+ let (dx, dy) = self.shared.channel_info[0][c].downsample;
let scaled_y_border = self.input_border_pixels[c].1 << dy;
let stage_vy = vy as isize - num_extra_rows as isize + scaled_y_border as isize;
if stage_vy % (1 << dy) != 0 {
@@ -176,7 +228,7 @@ impl LowMemoryRenderPipeline {
continue;
}
let y = y as usize;
- self.fill_initial_buffers(c, y, y0 >> dy, (gx, gy));
+ self.fill_initial_buffers(c, y, (x0 >> dx, xsize >> dx), (gx, gy));
}
// Step 2: go through stages one by one.
for (i, stage) in self.shared.stages.iter().enumerate() {
@@ -215,8 +267,8 @@ impl LowMemoryRenderPipeline {
current_row: y,
group_x0: x0 >> dx,
out_extra_x,
- is_first_xgroup: gx == 0,
- is_last_xgroup: gx + 1 == self.shared.group_count.0,
+ start_of_row,
+ end_of_row,
image_height: shifted_ysize,
},
&mut buffers,
@@ -294,8 +346,8 @@ impl LowMemoryRenderPipeline {
current_row: y,
group_x0: x0 >> dx,
out_extra_x,
- is_first_xgroup: gx == 0,
- is_last_xgroup: gx + 1 == self.shared.group_count.0,
+ start_of_row,
+ end_of_row,
image_height: shifted_ysize,
},
&input_data,
@@ -351,8 +403,8 @@ impl LowMemoryRenderPipeline {
current_row: y,
group_x0: x0,
out_extra_x: 0,
- is_first_xgroup: false,
- is_last_xgroup: false,
+ start_of_row: false,
+ end_of_row: false,
image_height: self.shared.input_size.1,
},
&mut buffers,
@@ -397,8 +449,8 @@ impl LowMemoryRenderPipeline {
current_row: y,
group_x0: x0,
out_extra_x: 0,
- is_first_xgroup: false,
- is_last_xgroup: false,
+ start_of_row: false,
+ end_of_row: false,
image_height: self.shared.input_size.1,
},
&input_data,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
index 43c4617c41e92..4cf01155da2a4 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs
@@ -33,13 +33,15 @@ impl RowBuffer {
data_type: DataTypeTag,
next_y_border: usize,
y_shift: usize,
+ x_shift: usize,
row_len: usize,
) -> Result<Self> {
let num_rows = (1 << y_shift) + 2 * next_y_border;
let num_rows = num_rows.next_power_of_two();
// Input offset is at *one* cacheline, and we need up to *two* cachelines on the other
// side as the data might exceed xsize slightly.
- let row_stride = (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + 3;
+ let row_stride =
+ (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + (3 << x_shift);
let mut buffer = Vec::<CacheLine>::new();
buffer.try_reserve_exact(row_stride * num_rows)?;
buffer.resize(row_stride * num_rows, CacheLine::default());
@@ -54,13 +56,15 @@ impl RowBuffer {
/// Creates a new row buffer with a single row filled with a repeating pattern.
/// Used for constant values like opaque alpha.
pub fn new_filled(data_type: DataTypeTag, row_len: usize, fill_pattern: &[u8]) -> Result<Self> {
- let mut result = Self::new(data_type, 0, 0, row_len)?;
+ let mut result = Self::new(data_type, 0, 0, 0, row_len)?;
let row_bytes: &mut [u8] = result.get_row_mut(0);
- let start = Self::x0_offset::<u8>();
- let end = start + row_len * fill_pattern.len();
- for (i, byte) in row_bytes[start..end].iter_mut().enumerate() {
+
+ // Fill the *entire* allocated row, including the padding on both sides,
+ // so cross-group border sampling doesn't read zeros (transparent alpha).
+ for (i, byte) in row_bytes.iter_mut().enumerate() {
*byte = fill_pattern[i % fill_pattern.len()];
}
+
Ok(result)
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
index 704b5a5bdc079..5acced8b34dda 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs
@@ -9,9 +9,9 @@ use crate::{
render::{
Channels, ChannelsMut, RunInPlaceStage,
internal::{PipelineBuffer, RunInOutStage},
- low_memory_pipeline::{helpers::mirror, render_group::ChannelVec},
+ low_memory_pipeline::render_group::ChannelVec,
},
- util::{ShiftRightCeil, SmallVec, tracing_wrappers::*},
+ util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*},
};
use super::{
@@ -26,8 +26,8 @@ pub struct ExtraInfo {
pub(super) out_extra_x: usize,
pub(super) current_row: usize,
pub(super) group_x0: usize,
- pub(super) is_first_xgroup: bool,
- pub(super) is_last_xgroup: bool,
+ pub(super) start_of_row: bool,
+ pub(super) end_of_row: bool,
pub(super) image_height: usize,
}
@@ -46,16 +46,16 @@ impl<T: RenderPipelineInPlaceStage> RunInPlaceStage<RowBuffer> for T {
group_x0,
out_extra_x,
image_height: _,
- is_first_xgroup,
- is_last_xgroup,
+ start_of_row,
+ end_of_row,
}: ExtraInfo,
buffers: &mut [&mut RowBuffer],
state: Option<&mut dyn Any>,
) {
let x0 = RowBuffer::x0_offset::<T::Type>();
- let xpre = if is_first_xgroup { 0 } else { out_extra_x };
+ let xpre = if start_of_row { 0 } else { out_extra_x };
let xstart = x0 - xpre;
- let xend = x0 + xsize + if is_last_xgroup { 0 } else { out_extra_x };
+ let xend = x0 + xsize + if end_of_row { 0 } else { out_extra_x };
let mut rows: ChannelVec<_> = buffers
.iter_mut()
.map(|x| &mut x.get_row_mut::<T::Type>(current_row)[xstart..])
@@ -80,8 +80,8 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
group_x0,
out_extra_x,
image_height,
- is_first_xgroup,
- is_last_xgroup,
+ start_of_row,
+ end_of_row,
}: ExtraInfo,
input_buffers: &[&RowBuffer],
output_buffers: &mut [RowBuffer],
@@ -89,7 +89,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
) {
let ibordery = Self::BORDER.1 as isize;
let x0 = RowBuffer::x0_offset::<T::InputT>();
- let xpre = if is_first_xgroup {
+ let xpre = if start_of_row {
0
} else {
out_extra_x.shrc(T::SHIFT.0)
@@ -97,7 +97,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T {
let xstart = x0 - xpre;
let xend = x0
+ xsize
- + if is_last_xgroup {
+ + if end_of_row {
0
} else {
out_extra_x.shrc(T::SHIFT.0)
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
index b586e9d8dd00d..a5024c175cc7c 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs
@@ -8,105 +8,200 @@
use std::mem::MaybeUninit;
use std::ops::Range;
-use jxl_simd::{F32SimdVec, SimdDescriptor, simd_function};
+use jxl_simd::{F32SimdVec, SimdDescriptor, U8SimdVec, U16SimdVec, simd_function};
use crate::{
api::{Endianness, JxlDataFormat, JxlOutputBuffer},
render::low_memory_pipeline::row_buffers::RowBuffer,
};
-#[inline(always)]
-fn run_interleaved_2<D: SimdDescriptor>(
- d: D,
- a: &[f32],
- b: &[f32],
- out: &mut [MaybeUninit<f32>],
-) -> usize {
- let len = D::F32Vec::LEN;
- let mut n = 0;
-
- for ((chunk_a, chunk_b), chunk_out) in a
- .chunks_exact(len)
- .zip(b.chunks_exact(len))
- .zip(out.chunks_exact_mut(len * 2))
- {
- let va = D::F32Vec::load(d, chunk_a);
- let vb = D::F32Vec::load(d, chunk_b);
- D::F32Vec::store_interleaved_2_uninit(va, vb, chunk_out);
- n += len;
- }
+macro_rules! define_run_interleaved {
+ ($fn_name:ident, $ty:ty, $vec_trait:ident, $store_fn:ident, $cnt:expr, $($arg:ident),+) => {
+ #[inline(always)]
+ fn $fn_name<D: SimdDescriptor>(
+ d: D,
+ $($arg: &[$ty]),+,
+ out: &mut [MaybeUninit<$ty>],
+ ) -> usize {
+ let len = D::$vec_trait::LEN;
+ let mut n = 0;
+ let limit = [$($arg.len()),+][0];
+
+ {
+ let out_chunks = out[..limit * $cnt].chunks_exact_mut(len * $cnt);
+ $(let mut $arg = $arg.chunks_exact(len);)+
+ for out_chunk in out_chunks {
+ $(let $arg = D::$vec_trait::load(d, $arg.next().unwrap());)+
+ D::$vec_trait::$store_fn($($arg),+, out_chunk);
+ n += len;
+ }
+ }
- n
+ let d256 = d.maybe_downgrade_256bit();
+ let len256 = <D::Descriptor256 as SimdDescriptor>::$vec_trait::LEN;
+ if len256 < len {
+ let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len256 * $cnt);
+ $(let mut $arg = $arg[n..limit].chunks_exact(len256);)+
+ for out_chunk in out_chunks {
+ $(let $arg = <D::Descriptor256 as SimdDescriptor>::$vec_trait::load(d256, $arg.next().unwrap());)+
+ <D::Descriptor256 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk);
+ n += len256;
+ }
+ }
+
+ let d128 = d.maybe_downgrade_128bit();
+ let len128 = <D::Descriptor128 as SimdDescriptor>::$vec_trait::LEN;
+ if len128 < len {
+ let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len128 * $cnt);
+ $(let mut $arg = $arg[n..limit].chunks_exact(len128);)+
+ for out_chunk in out_chunks {
+ $(let $arg = <D::Descriptor128 as SimdDescriptor>::$vec_trait::load(d128, $arg.next().unwrap());)+
+ <D::Descriptor128 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk);
+ n += len128;
+ }
+ }
+
+ n
+ }
+ };
}
-#[inline(always)]
-fn run_interleaved_3<D: SimdDescriptor>(
+define_run_interleaved!(
+ run_interleaved_2_f32,
+ f32,
+ F32Vec,
+ store_interleaved_2_uninit,
+ 2,
+ a,
+ b
+);
+define_run_interleaved!(
+ run_interleaved_3_f32,
+ f32,
+ F32Vec,
+ store_interleaved_3_uninit,
+ 3,
+ a,
+ b,
+ c
+);
+define_run_interleaved!(
+ run_interleaved_4_f32,
+ f32,
+ F32Vec,
+ store_interleaved_4_uninit,
+ 4,
+ a,
+ b,
+ c,
+ e
+);
+
+simd_function!(
+ store_interleaved_f32,
d: D,
- a: &[f32],
- b: &[f32],
- c: &[f32],
- out: &mut [MaybeUninit<f32>],
-) -> usize {
- let len = D::F32Vec::LEN;
- let mut n = 0;
-
- for (((chunk_a, chunk_b), chunk_c), chunk_out) in a
- .chunks_exact(len)
- .zip(b.chunks_exact(len))
- .zip(c.chunks_exact(len))
- .zip(out.chunks_exact_mut(len * 3))
- {
- let va = D::F32Vec::load(d, chunk_a);
- let vb = D::F32Vec::load(d, chunk_b);
- let vc = D::F32Vec::load(d, chunk_c);
- D::F32Vec::store_interleaved_3_uninit(va, vb, vc, chunk_out);
- n += len;
+ fn store_interleaved_impl_f32(
+ inputs: &[&[f32]],
+ output: &mut [MaybeUninit<f32>]
+ ) -> usize {
+ match inputs.len() {
+ 2 => run_interleaved_2_f32(d, inputs[0], inputs[1], output),
+ 3 => run_interleaved_3_f32(d, inputs[0], inputs[1], inputs[2], output),
+ 4 => run_interleaved_4_f32(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+ _ => 0,
+ }
}
+);
- n
-}
+define_run_interleaved!(
+ run_interleaved_2_u8,
+ u8,
+ U8Vec,
+ store_interleaved_2_uninit,
+ 2,
+ a,
+ b
+);
+define_run_interleaved!(
+ run_interleaved_3_u8,
+ u8,
+ U8Vec,
+ store_interleaved_3_uninit,
+ 3,
+ a,
+ b,
+ c
+);
+define_run_interleaved!(
+ run_interleaved_4_u8,
+ u8,
+ U8Vec,
+ store_interleaved_4_uninit,
+ 4,
+ a,
+ b,
+ c,
+ e
+);
-#[inline(always)]
-fn run_interleaved_4<D: SimdDescriptor>(
+simd_function!(
+ store_interleaved_u8,
d: D,
- a: &[f32],
- b: &[f32],
- c: &[f32],
- e: &[f32],
- out: &mut [MaybeUninit<f32>],
-) -> usize {
- let len = D::F32Vec::LEN;
- let mut n = 0;
-
- for ((((chunk_a, chunk_b), chunk_c), chunk_e), chunk_out) in a
- .chunks_exact(len)
- .zip(b.chunks_exact(len))
- .zip(c.chunks_exact(len))
- .zip(e.chunks_exact(len))
- .zip(out.chunks_exact_mut(len * 4))
- {
- let va = D::F32Vec::load(d, chunk_a);
- let vb = D::F32Vec::load(d, chunk_b);
- let vc = D::F32Vec::load(d, chunk_c);
- let ve = D::F32Vec::load(d, chunk_e);
- D::F32Vec::store_interleaved_4_uninit(va, vb, vc, ve, chunk_out);
- n += len;
+ fn store_interleaved_impl_u8(
+ inputs: &[&[u8]],
+ output: &mut [MaybeUninit<u8>]
+ ) -> usize {
+ match inputs.len() {
+ 2 => run_interleaved_2_u8(d, inputs[0], inputs[1], output),
+ 3 => run_interleaved_3_u8(d, inputs[0], inputs[1], inputs[2], output),
+ 4 => run_interleaved_4_u8(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+ _ => 0,
+ }
}
+);
- n
-}
+define_run_interleaved!(
+ run_interleaved_2_u16,
+ u16,
+ U16Vec,
+ store_interleaved_2_uninit,
+ 2,
+ a,
+ b
+);
+define_run_interleaved!(
+ run_interleaved_3_u16,
+ u16,
+ U16Vec,
+ store_interleaved_3_uninit,
+ 3,
+ a,
+ b,
+ c
+);
+define_run_interleaved!(
+ run_interleaved_4_u16,
+ u16,
+ U16Vec,
+ store_interleaved_4_uninit,
+ 4,
+ a,
+ b,
+ c,
+ e
+);
simd_function!(
- store_interleaved,
+ store_interleaved_u16,
d: D,
- fn store_interleaved_impl(
- inputs: &[&[f32]],
- output: &mut [MaybeUninit<f32>]
+ fn store_interleaved_impl_u16(
+ inputs: &[&[u16]],
+ output: &mut [MaybeUninit<u16>]
) -> usize {
match inputs.len() {
- 2 => run_interleaved_2(d, inputs[0], inputs[1], output),
- 3 => run_interleaved_3(d, inputs[0], inputs[1], inputs[2], output),
- 4 => run_interleaved_4(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
+ 2 => run_interleaved_2_u16(d, inputs[0], inputs[1], output),
+ 3 => run_interleaved_3_u16(d, inputs[0], inputs[1], inputs[2], output),
+ 4 => run_interleaved_4_u16(d, inputs[0], inputs[1], inputs[2], inputs[3], output),
_ => 0,
}
}
@@ -153,6 +248,43 @@ pub(super) fn store(
}
input_buf.len() / data_format.bytes_per_sample()
}
+ (channels, 1, true) if (2..=4).contains(&channels) => {
+ let start_u8 = byte_start;
+ let end_u8 = byte_end;
+ let mut slices = [&[] as &[u8]; 4];
+ for (i, buf) in input_buf.iter().enumerate() {
+ slices[i] = &buf.get_row::<u8>(input_y)[start_u8..end_u8];
+ }
+ // Note that, by the conditions on the *_uninit methods on U8Vec, this function
+ // never writes uninitialized memory.
+ store_interleaved_u8(&slices[..channels], output_buf)
+ }
+ (channels, 2, true) if (2..=4).contains(&channels) => {
+ let ptr = output_buf.as_mut_ptr();
+ if ptr.align_offset(std::mem::align_of::<u16>()) == 0 {
+ let len_u16 = output_buf.len() / 2;
+ // SAFETY: we checked alignment above, and the size is correct by definition
+ // (note that it is guaranteed that MaybeUninit<T> has the same size and align
+ // of T for any T).
+ let output_u16 = unsafe {
+ std::slice::from_raw_parts_mut(
+ output_buf.as_mut_ptr().cast::<MaybeUninit<u16>>(),
+ len_u16,
+ )
+ };
+ let start_u16 = byte_start / 2;
+ let end_u16 = byte_end / 2;
+ let mut slices = [&[] as &[u16]; 4];
+ for (i, buf) in input_buf.iter().enumerate() {
+ slices[i] = &buf.get_row::<u16>(input_y)[start_u16..end_u16];
+ }
+ // Note that, by the conditions on the *_uninit methods on U16Vec, this function
+ // never writes uninitialized memory.
+ store_interleaved_u16(&slices[..channels], output_u16)
+ } else {
+ 0
+ }
+ }
(channels, 4, true) if (2..=4).contains(&channels) => {
let ptr = output_buf.as_mut_ptr();
if ptr.align_offset(std::mem::align_of::<f32>()) == 0 {
@@ -177,7 +309,7 @@ pub(super) fn store(
// Note that, by the conditions on the *_uninit methods on F32Vec, this function
// never writes uninitialized memory.
- store_interleaved(&slices[..channels], output_f32)
+ store_interleaved_f32(&slices[..channels], output_f32)
} else {
0
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
index d5d13699ec3f7..98c2975535536 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs
@@ -18,7 +18,7 @@ mod identity;
impl SaveStage {
// Takes as input only those channels that are *actually* saved.
#[allow(clippy::too_many_arguments)]
- pub(super) fn save_lowmem(
+ pub(crate) fn save_lowmem(
&self,
data: &[&RowBuffer],
buffers: &mut [Option<JxlOutputBuffer>],
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
index 4270f22eb3af7..5748513ba7ad9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs
@@ -17,8 +17,8 @@ pub mod buffer_splitter;
mod builder;
mod channels;
mod internal;
-mod low_memory_pipeline;
-mod save;
+pub mod low_memory_pipeline;
+pub mod save;
mod simd_utils;
#[cfg(test)]
mod simple_pipeline;
@@ -42,6 +42,11 @@ pub(crate) use low_memory_pipeline::LowMemoryRenderPipeline;
#[cfg(test)]
pub(crate) use simple_pipeline::SimpleRenderPipeline;
+pub enum StageSpecialCase {
+ F32ToU8 { channel: usize, bit_depth: u8 },
+ ModularToF32 { channel: usize, bit_depth: u8 },
+}
+
/// Modifies channels in-place.
pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display {
type Type: ImageDataType;
@@ -60,6 +65,10 @@ pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display {
}
fn uses_channel(&self, c: usize) -> bool;
+
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ None
+ }
}
/// Modifies data and writes it to a new buffer, of possibly different type.
@@ -97,6 +106,10 @@ pub trait RenderPipelineInOutStage: Any + std::fmt::Display {
}
fn uses_channel(&self, c: usize) -> bool;
+
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ None
+ }
}
// TODO(veluca): find a way to reduce the generated code due to having two builders, to integrate
@@ -111,14 +124,13 @@ pub(crate) trait RenderPipeline: Sized {
/// pass, a new buffer, or a re-used buffer from i.e. previously decoded frames.
fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>>;
- /// Gives back the buffer for a channel and group to the render pipeline, marking that
- /// `num_passes` additional passes (wrt. the previous call to this method for the same channel
- /// and group, or 0 if no previous call happend) were rendered into the input buffer.
+ /// Gives back the buffer for a channel and group to the render pipeline, marking whether
+ /// this will be the last time that this function is called for this group.
fn set_buffer_for_group<T: ImageDataType>(
&mut self,
channel: usize,
group_id: usize,
- num_passes: usize,
+ complete: bool,
buf: Image<T>,
buffer_splitter: &mut BufferSplitter,
) -> Result<()>;
@@ -131,6 +143,9 @@ pub(crate) trait RenderPipeline: Sized {
/// implementation to ensure rendering only happens once.
fn render_outside_frame(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()>;
+ // Marks a group for being re-rendered later.
+ fn mark_group_to_rerender(&mut self, g: usize);
+
fn box_inout_stage<S: RenderPipelineInOutStage>(
stage: S,
) -> Box<dyn RunInOutStage<Self::Buffer>>;
@@ -138,4 +153,6 @@ pub(crate) trait RenderPipeline: Sized {
fn box_inplace_stage<S: RenderPipelineInPlaceStage>(
stage: S,
) -> Box<dyn RunInPlaceStage<Self::Buffer>>;
+
+ fn used_channel_mask(&self) -> &[bool];
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
index df09a8fd1c462..227003bdfe4f7 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs
@@ -20,37 +20,28 @@ mod extend;
mod run_stage;
mod save;
-/// A RenderPipeline that waits for all input of a pass to be ready before doing any rendering, and
+/// A RenderPipeline that waits for all input to be ready before doing any rendering, and
/// prioritizes simplicity over memory usage and computational efficiency.
/// Eventually meant to be used only for verification purposes.
pub struct SimpleRenderPipeline {
shared: RenderPipelineShared<Image<f64>>,
input_buffers: Vec<Image<f64>>,
- completed_passes: usize,
}
impl SimpleRenderPipeline {
#[instrument(skip_all, err)]
fn do_render(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()> {
- let ready_passes = self
+ let ready = self
.shared
- .group_chan_ready_passes
+ .group_chan_complete
.iter()
.flat_map(|x| x.iter())
- .copied()
- .min()
- .unwrap();
- if ready_passes <= self.completed_passes {
- debug!(
- "no more ready passes ({} completed, {ready_passes} ready)",
- self.completed_passes
- );
+ .all(|x| *x);
+ if !ready {
+ debug!("not yet ready");
return Ok(());
}
- debug!(
- "new ready passes ({} completed, {ready_passes} ready)",
- self.completed_passes
- );
+ debug!("ready to render");
let mut current_buffers = clone_images(&self.input_buffers)?;
@@ -129,7 +120,6 @@ impl SimpleRenderPipeline {
current_buffers = output_buffers;
}
- self.completed_passes = ready_passes;
Ok(())
}
}
@@ -154,7 +144,6 @@ impl RenderPipeline for SimpleRenderPipeline {
Ok(Self {
shared,
input_buffers,
- completed_passes: 0,
})
}
@@ -168,7 +157,7 @@ impl RenderPipeline for SimpleRenderPipeline {
&mut self,
channel: usize,
group_id: usize,
- num_passes: usize,
+ complete: bool,
buf: Image<T>,
buffer_splitter: &mut BufferSplitter,
) -> Result<()> {
@@ -178,22 +167,24 @@ impl RenderPipeline for SimpleRenderPipeline {
channel,
T::DATA_TYPE_ID,
);
- let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
- let goffset = self.shared.group_offset(group_id);
- let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel];
- let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1);
- debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset);
- let ty = ty.unwrap();
- assert_eq!(ty, T::DATA_TYPE_ID);
- let total_sz = self.input_buffers[channel].size();
- for y in 0..sz.1.min(total_sz.1 - off.1) {
- let row_in = buf.row(y);
- let row_out = self.input_buffers[channel].row_mut(y + off.1);
- for x in 0..sz.0.min(total_sz.0 - off.0) {
- row_out[x + off.0] = row_in[x].to_f64();
+ if self.shared.channel_is_used[channel] {
+ let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID);
+ let goffset = self.shared.group_offset(group_id);
+ let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel];
+ let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1);
+ debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset);
+ let ty = ty.unwrap();
+ assert_eq!(ty, T::DATA_TYPE_ID);
+ let total_sz = self.input_buffers[channel].size();
+ for y in 0..sz.1.min(total_sz.1 - off.1) {
+ let row_in = buf.row(y);
+ let row_out = self.input_buffers[channel].row_mut(y + off.1);
+ for x in 0..sz.0.min(total_sz.0 - off.0) {
+ row_out[x + off.0] = row_in[x].to_f64();
+ }
}
+ self.shared.group_chan_complete[group_id][channel] = complete;
}
- self.shared.group_chan_ready_passes[group_id][channel] += num_passes;
self.do_render(buffer_splitter)
}
@@ -208,6 +199,8 @@ impl RenderPipeline for SimpleRenderPipeline {
Ok(())
}
+ fn mark_group_to_rerender(&mut self, _g: usize) {}
+
fn box_inout_stage<S: RenderPipelineInOutStage>(
stage: S,
) -> Box<dyn super::RunInOutStage<Self::Buffer>> {
@@ -219,4 +212,8 @@ impl RenderPipeline for SimpleRenderPipeline {
) -> Box<dyn super::RunInPlaceStage<Self::Buffer>> {
Box::new(stage)
}
+
+ fn used_channel_mask(&self) -> &[bool] {
+ &self.shared.channel_is_used
+ }
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
index 24a0ee6a59065..bfaea994305a2 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs
@@ -13,7 +13,7 @@ use crate::{
RenderPipelineInOutStage, RenderPipelineInPlaceStage, RunInOutStage, RunInPlaceStage,
internal::PipelineBuffer,
},
- util::{SmallVec, round_up_size_to_cache_line, tracing_wrappers::*},
+ util::{SmallVec, mirror, round_up_size_to_cache_line, tracing_wrappers::*},
};
impl PipelineBuffer for Image<f64> {
@@ -122,31 +122,20 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<Image<f64>> for T {
numc
];
- let mirror = |mut v: i64, size: i64| {
- while v < 0 || v >= size {
- if v < 0 {
- v = -v - 1;
- }
- if v >= size {
- v = size + (size - v) - 1;
- }
- }
- v as usize
- };
for y in 0..input_size.1 {
for x in (0..input_size.0).step_by(chunk_size) {
- let border_x = Self::BORDER.0 as i64;
- let border_y = Self::BORDER.1 as i64;
+ let border_x = Self::BORDER.0 as isize;
+ let border_y = Self::BORDER.1 as isize;
let xsize = input_size.0.min(x + chunk_size) - x;
- let xs = xsize as i64;
+ let xs = xsize as isize;
debug!("position: {x}x{y} xsize: {xsize}");
for c in 0..numc {
for iy in -border_y..=border_y {
- let imgy = mirror(y as i64 + iy, input_size.1 as i64);
+ let imgy = mirror(y as isize + iy, input_size.1);
let in_row = input_buffers[c].row(imgy);
let buf_in_row = &mut buffer_in[c][(iy + border_y) as usize];
for ix in (-border_x..0).chain(xs..xs + border_x) {
- let imgx = mirror(x as i64 + ix, input_size.0 as i64);
+ let imgx = mirror(x as isize + ix, input_size.0);
buf_in_row[(ix + border_x) as usize] =
T::InputT::from_f64(in_row[imgx]);
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
index 1ddd46f593538..b9a941106c39b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs
@@ -3,64 +3,29 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::sync::Arc;
+
use crate::{
frame::quantizer::LfQuantFactors,
headers::bit_depth::BitDepth,
- render::{Channels, ChannelsMut, RenderPipelineInOutStage},
+ render::{Channels, ChannelsMut, RenderPipelineInOutStage, StageSpecialCase},
+ util::AtomicRefCell,
};
-use jxl_simd::{F32SimdVec, I32SimdVec, simd_function};
-
-pub struct ConvertU8F32Stage {
- channel: usize,
-}
-
-impl ConvertU8F32Stage {
- pub fn new(channel: usize) -> ConvertU8F32Stage {
- ConvertU8F32Stage { channel }
- }
-}
-
-impl std::fmt::Display for ConvertU8F32Stage {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "convert U8 data to F32 in channel {}", self.channel)
- }
-}
-
-impl RenderPipelineInOutStage for ConvertU8F32Stage {
- type InputT = u8;
- type OutputT = f32;
- const SHIFT: (u8, u8) = (0, 0);
- const BORDER: (u8, u8) = (0, 0);
-
- fn uses_channel(&self, c: usize) -> bool {
- c == self.channel
- }
-
- fn process_row_chunk(
- &self,
- _position: (usize, usize),
- xsize: usize,
- input_rows: &Channels<u8>,
- output_rows: &mut ChannelsMut<f32>,
- _state: Option<&mut dyn std::any::Any>,
- ) {
- let input = &input_rows[0];
- for i in 0..xsize {
- output_rows[0][0][i] = input[0][i] as f32 * (1.0 / 255.0);
- }
- }
-}
+use jxl_simd::{F32SimdVec, I32SimdVec, SimdMask, simd_function};
pub struct ConvertModularXYBToF32Stage {
first_channel: usize,
- scale: [f32; 3],
+ lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
}
impl ConvertModularXYBToF32Stage {
- pub fn new(first_channel: usize, lf_quant: &LfQuantFactors) -> ConvertModularXYBToF32Stage {
+ pub fn new(
+ first_channel: usize,
+ lf_quant: Arc<AtomicRefCell<LfQuantFactors>>,
+ ) -> ConvertModularXYBToF32Stage {
ConvertModularXYBToF32Stage {
first_channel,
- scale: lf_quant.quant_factors,
+ lf_quant,
}
}
}
@@ -69,10 +34,9 @@ impl std::fmt::Display for ConvertModularXYBToF32Stage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
- "convert modular xyb data to F32 in channels {}..{} with scales {:?}",
+ "convert modular xyb data to F32 in channels {}..{}",
self.first_channel,
self.first_channel + 2,
- self.scale
)
}
}
@@ -95,7 +59,8 @@ impl RenderPipelineInOutStage for ConvertModularXYBToF32Stage {
output_rows: &mut ChannelsMut<f32>,
_state: Option<&mut dyn std::any::Any>,
) {
- let [scale_x, scale_y, scale_b] = self.scale;
+ let lf_quant = self.lf_quant.borrow();
+ let [scale_x, scale_y, scale_b] = lf_quant.quant_factors;
assert_eq!(
input_rows.len(),
3,
@@ -257,6 +222,27 @@ fn int_to_float_generic(input: &[i32], output: &mut [f32], bits: u32, exp_bits:
}
}
+// SIMD modular to 32 bit float conversion
+simd_function!(
+ modular_to_float_32bit_simd_dispatch,
+ d: D,
+ fn modular_to_float_32bit_simd(input: &[i32], output: &mut [f32], scale: f32, xsize: usize) {
+ let simd_width = D::I32Vec::LEN;
+
+ let scale = D::F32Vec::splat(d, scale);
+
+ // Process complete SIMD vectors
+ for (in_chunk, out_chunk) in input
+ .chunks_exact(simd_width)
+ .zip(output.chunks_exact_mut(simd_width))
+ .take(xsize.div_ceil(simd_width))
+ {
+ let val = D::I32Vec::load(d, in_chunk);
+ (val.as_f32() * scale).store(out_chunk);
+ }
+ }
+);
+
impl RenderPipelineInOutStage for ConvertModularToF32Stage {
type InputT = i32;
type OutputT = f32;
@@ -279,11 +265,19 @@ impl RenderPipelineInOutStage for ConvertModularToF32Stage {
if self.bit_depth.floating_point_sample() {
int_to_float(input[0], output_rows[0][0], &self.bit_depth, xsize);
} else {
- // TODO(veluca): SIMDfy this code.
let scale = 1.0 / ((1u64 << self.bit_depth.bits_per_sample()) - 1) as f32;
- for i in 0..xsize {
- output_rows[0][0][i] = input[0][i] as f32 * scale;
- }
+ modular_to_float_32bit_simd_dispatch(input[0], output_rows[0][0], scale, xsize);
+ }
+ }
+
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ if self.bit_depth.floating_point_sample() {
+ None
+ } else {
+ Some(StageSpecialCase::ModularToF32 {
+ channel: self.channel,
+ bit_depth: self.bit_depth.bits_per_sample() as u8,
+ })
}
}
}
@@ -358,6 +352,89 @@ impl RenderPipelineInOutStage for ConvertF32ToU8Stage {
let max = ((1u32 << self.bit_depth) - 1) as f32;
f32_to_u8_simd_dispatch(input, output, max, xsize);
}
+
+ fn is_special_case(&self) -> Option<StageSpecialCase> {
+ Some(StageSpecialCase::F32ToU8 {
+ channel: self.channel,
+ bit_depth: self.bit_depth,
+ })
+ }
+}
+
+/// Stage that converts i32 values to u8 values, applying a multiplier.
+pub struct ConvertI32ToU8Stage {
+ channel: usize,
+ multiplier: i32,
+ max: i32,
+}
+
+impl ConvertI32ToU8Stage {
+ pub fn new(channel: usize, multiplier: i32, max: i32) -> ConvertI32ToU8Stage {
+ ConvertI32ToU8Stage {
+ channel,
+ multiplier,
+ max,
+ }
+ }
+}
+
+impl std::fmt::Display for ConvertI32ToU8Stage {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(
+ f,
+ "convert I32 to U8 in channel {} with multiplier {}",
+ self.channel, self.multiplier
+ )
+ }
+}
+
+// SIMD I32 to U8 conversion
+simd_function!(
+ i32_to_u8_simd_dispatch,
+ d: D,
+ fn i32_to_u8_simd(input: &[i32], output: &mut [u8], scale: i32, max: i32, xsize: usize) {
+ let simd_width = D::F32Vec::LEN;
+ let scale = D::I32Vec::splat(d, scale);
+ let max = D::I32Vec::splat(d, max);
+ let zero = D::I32Vec::splat(d, 0);
+
+ // Process SIMD vectors using div_ceil (buffers are padded)
+ for (input_chunk, output_chunk) in input
+ .chunks_exact(simd_width)
+ .zip(output.chunks_exact_mut(simd_width))
+ .take(xsize.div_ceil(simd_width))
+ {
+ let val = D::I32Vec::load(d, input_chunk);
+ let scaled = val * scale;
+ let zeroclip = scaled.lt_zero().if_then_else_i32(zero, scaled);
+ let clip = scaled.gt(max).if_then_else_i32(max, zeroclip);
+ clip.store_u8(output_chunk);
+ }
+ }
+);
+
+impl RenderPipelineInOutStage for ConvertI32ToU8Stage {
+ type InputT = i32;
+ type OutputT = u8;
+ const SHIFT: (u8, u8) = (0, 0);
+ const BORDER: (u8, u8) = (0, 0);
+
+ fn uses_channel(&self, c: usize) -> bool {
+ c == self.channel
+ }
+
+ fn process_row_chunk(
+ &self,
+ _position: (usize, usize),
+ xsize: usize,
+ input_rows: &Channels<i32>,
+ output_rows: &mut ChannelsMut<u8>,
+ _state: Option<&mut dyn std::any::Any>,
+ ) {
+ let input = input_rows[0][0];
+ let output = &mut output_rows[0][0];
+ i32_to_u8_simd_dispatch(input, output, self.multiplier, self.max, xsize);
+ }
}
/// Stage that converts f32 values in [0, 1] range to u16 values.
@@ -481,11 +558,6 @@ mod test {
use crate::headers::bit_depth::BitDepth;
use test_log::test;
- #[test]
- fn u8_consistency() -> Result<()> {
- crate::render::test::test_stage_consistency(|| ConvertU8F32Stage::new(0), (500, 500), 1)
- }
-
#[test]
fn f32_to_u8_consistency() -> Result<()> {
crate::render::test::test_stage_consistency(
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
index bbeff0ddab3c9..ca83a7b6623b8 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs
@@ -3,6 +3,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::sync::Arc;
+
use crate::{
BLOCK_DIM, MIN_SIGMA,
features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
Channels, ChannelsMut, RenderPipelineInOutStage,
stages::epf::common::{get_sigma, prepare_sad_mul_storage},
},
+ util::AtomicRefCell,
};
use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf0Stage {
/// (inverse) multiplier for sigma on borders
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
}
impl std::fmt::Display for Epf0Stage {
@@ -39,7 +42,7 @@ impl Epf0Stage {
sigma_scale: f32,
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
) -> Self {
Self {
sigma,
@@ -64,7 +67,8 @@ simd_function!(
assert_eq!(input_rows.len(), 3);
assert_eq!(output_rows.len(), 3);
- let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+ let sigma = stage.sigma.borrow();
+ let row_sigma = sigma.row(ypos / BLOCK_DIM);
const { assert!(D::F32Vec::LEN <= 16) };
@@ -76,7 +80,8 @@ simd_function!(
let sigma = get_sigma(d, x + xpos, row_sigma);
let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);
- if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+ let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+ if sigma_mask.all() {
for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) {
D::F32Vec::load(d, &input_c[3][3 + x..]).store(&mut output_c[0][x..]);
}
@@ -204,7 +209,10 @@ simd_function!(
] {
out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out);
}
- (out * inv_w).store(&mut output_c[0][x..]);
+ out *= inv_w;
+ let p33 = D::F32Vec::load(d, &input_c[3][3 + x..]);
+ let out = sigma_mask.if_then_else_f32(p33, out);
+ out.store(&mut output_c[0][x..]);
}
}
});
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
index 53570b34c9b0b..5a1807050f60b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs
@@ -3,6 +3,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::sync::Arc;
+
use crate::{
BLOCK_DIM, MIN_SIGMA,
features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
Channels, ChannelsMut, RenderPipelineInOutStage,
stages::epf::common::{get_sigma, prepare_sad_mul_storage},
},
+ util::AtomicRefCell,
};
use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf1Stage {
/// (inverse) multiplier for sigma on borders
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
}
impl std::fmt::Display for Epf1Stage {
@@ -39,7 +42,7 @@ impl Epf1Stage {
sigma_scale: f32,
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
) -> Self {
Self {
sigma,
@@ -64,7 +67,8 @@ fn epf1_process_row_chunk(
assert_eq!(input_rows.len(), 3);
assert_eq!(output_rows.len(), 3);
- let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+ let sigma = stage.sigma.borrow();
+ let row_sigma = sigma.row(ypos / BLOCK_DIM);
let sm = stage.sigma_scale * 1.65;
let bsm = sm * stage.border_sad_mul;
@@ -74,7 +78,8 @@ fn epf1_process_row_chunk(
let sigma = get_sigma(d, x + xpos, row_sigma);
let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);
- if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+ let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+ if sigma_mask.all() {
for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) {
D::F32Vec::load(d, &input_c[2][2 + x..]).store(&mut output_c[0][x..]);
}
@@ -140,7 +145,10 @@ fn epf1_process_row_chunk(
] {
out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out);
}
- (out * inv_w).store(&mut output_c[0][x..]);
+ out *= inv_w;
+ let p22 = D::F32Vec::load(d, &input_c[2][2 + x..]);
+ let out = sigma_mask.if_then_else_f32(p22, out);
+ out.store(&mut output_c[0][x..]);
}
}
});
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
index 867b36b07c9f7..6c3cb920a62f5 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs
@@ -3,6 +3,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::sync::Arc;
+
use crate::{
BLOCK_DIM, MIN_SIGMA,
features::epf::SigmaSource,
@@ -10,6 +12,7 @@ use crate::{
Channels, ChannelsMut, RenderPipelineInOutStage,
stages::epf::common::{get_sigma, prepare_sad_mul_storage},
},
+ util::AtomicRefCell,
};
use jxl_simd::{F32SimdVec, SimdMask, simd_function};
@@ -21,7 +24,7 @@ pub struct Epf2Stage {
/// (inverse) multiplier for sigma on borders
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
}
impl std::fmt::Display for Epf2Stage {
@@ -39,7 +42,7 @@ impl Epf2Stage {
sigma_scale: f32,
border_sad_mul: f32,
channel_scale: [f32; 3],
- sigma: SigmaSource,
+ sigma: Arc<AtomicRefCell<SigmaSource>>,
) -> Self {
Self {
sigma,
@@ -65,7 +68,8 @@ fn epf2_process_row_chunk(
let (input_x, input_y, input_b) = (&input_rows[0], &input_rows[1], &input_rows[2]);
let (output_x, output_y, output_b) = output_rows.split_first_3_mut();
- let row_sigma = stage.sigma.row(ypos / BLOCK_DIM);
+ let sigma = stage.sigma.borrow();
+ let row_sigma = sigma.row(ypos / BLOCK_DIM);
const { assert!(D::F32Vec::LEN <= 16) };
@@ -77,7 +81,8 @@ fn epf2_process_row_chunk(
let sigma = get_sigma(d, x + xpos, row_sigma);
let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]);
- if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() {
+ let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma);
+ if sigma_mask.all() {
D::F32Vec::load(d, &input_x[1][1 + x..]).store(&mut output_x[0][x..]);
D::F32Vec::load(d, &input_y[1][1 + x..]).store(&mut output_y[0][x..]);
D::F32Vec::load(d, &input_b[1][1 + x..]).store(&mut output_b[0][x..]);
@@ -119,9 +124,15 @@ fn epf2_process_row_chunk(
let inv_w = D::F32Vec::splat(d, 1.0) / w_acc;
- (x_acc * inv_w).store(&mut output_x[0][x..]);
- (y_acc * inv_w).store(&mut output_y[0][x..]);
- (b_acc * inv_w).store(&mut output_b[0][x..]);
+ x_acc *= inv_w;
+ y_acc *= inv_w;
+ b_acc *= inv_w;
+ x_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_x[1][1+x..]), x_acc);
+ y_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_y[1][1+x..]), y_acc);
+ b_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_b[1][1+x..]), b_acc);
+ x_acc.store(&mut output_x[0][x..]);
+ y_acc.store(&mut output_y[0][x..]);
+ b_acc.store(&mut output_b[0][x..]);
}
});
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
index ae3ada234812c..90cccf901f064 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs
@@ -9,12 +9,13 @@ use rand::SeedableRng;
use test_log::test;
use super::*;
-use crate::{error::Result, features::epf::SigmaSource, image::Image};
+use crate::{error::Result, features::epf::SigmaSource, image::Image, util::AtomicRefCell};
#[test]
fn epf0_consistency() -> Result<()> {
let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+ let sigma = Arc::new(AtomicRefCell::new(sigma));
crate::render::test::test_stage_consistency(
|| Epf0Stage::new(0.9, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
(512, 512),
@@ -26,6 +27,7 @@ fn epf0_consistency() -> Result<()> {
fn epf1_consistency() -> Result<()> {
let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+ let sigma = Arc::new(AtomicRefCell::new(sigma));
crate::render::test::test_stage_consistency(
|| Epf1Stage::new(1.0, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
(512, 512),
@@ -37,6 +39,7 @@ fn epf1_consistency() -> Result<()> {
fn epf2_consistency() -> Result<()> {
let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap()));
+ let sigma = Arc::new(AtomicRefCell::new(sigma));
crate::render::test::test_stage_consistency(
|| Epf2Stage::new(6.5, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()),
(512, 512),
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
index 3002776c9e2c9..a3fc4bb043027 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs
@@ -17,7 +17,7 @@ mod premultiply_alpha;
mod splines;
mod spot;
mod to_linear;
-mod upsample;
+pub mod upsample;
mod xyb;
mod ycbcr;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
index 88dc6395f0421..fd717f345b5fe 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs
@@ -5,10 +5,13 @@
#![allow(clippy::needless_range_loop)]
+use std::{any::Any, sync::Arc};
+
use crate::{
features::noise::Noise,
frame::color_correlation_map::ColorCorrelationParams,
render::{Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage},
+ util::AtomicRefCell,
};
use jxl_simd::{F32SimdVec, simd_function};
@@ -103,16 +106,16 @@ impl RenderPipelineInOutStage for ConvolveNoiseStage {
}
pub struct AddNoiseStage {
- noise: Noise,
+ noise: Arc<AtomicRefCell<Noise>>,
first_channel: usize,
- color_correlation: ColorCorrelationParams,
+ color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>,
}
impl AddNoiseStage {
#[allow(dead_code)]
pub fn new(
- noise: Noise,
- color_correlation: ColorCorrelationParams,
+ noise: Arc<AtomicRefCell<Noise>>,
+ color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>,
first_channel: usize,
) -> AddNoiseStage {
assert!(first_channel > 2);
@@ -148,11 +151,16 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {
_position: (usize, usize),
xsize: usize,
row: &mut [&mut [f32]],
- _state: Option<&mut dyn std::any::Any>,
+ _state: Option<&mut dyn Any>,
) {
+ let noise = self.noise.borrow();
+ if noise.lut == [0.0; 8] {
+ return;
+ }
+ let color_correlation = self.color_correlation.borrow();
let norm_const = 0.22;
- let ytox = self.color_correlation.y_to_x_lf();
- let ytob = self.color_correlation.y_to_b_lf();
+ let ytox = color_correlation.y_to_x_lf();
+ let ytob = color_correlation.y_to_b_lf();
for x in 0..xsize {
let row_rnd_r = row[3][x];
let row_rnd_g = row[4][x];
@@ -161,8 +169,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {
let vy = row[1][x];
let in_g = vy - vx;
let in_r = vy + vx;
- let noise_strength_g = self.noise.strength(in_g * 0.5);
- let noise_strength_r = self.noise.strength(in_r * 0.5);
+ let noise_strength_g = noise.strength(in_g * 0.5);
+ let noise_strength_r = noise.strength(in_r * 0.5);
let addit_rnd_noise_red = row_rnd_r * norm_const;
let addit_rnd_noise_green = row_rnd_g * norm_const;
let addit_rnd_noise_correlated = row_rnd_c * norm_const;
@@ -182,6 +190,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage {
#[cfg(test)]
mod test {
+ use std::sync::Arc;
+
use crate::{
error::Result,
features::noise::Noise,
@@ -191,7 +201,7 @@ mod test {
stages::noise::{AddNoiseStage, ConvolveNoiseStage},
test::make_and_run_simple_pipeline,
},
- util::test::assert_almost_abs_eq,
+ util::{AtomicRefCell, test::assert_almost_abs_eq},
};
use test_log::test;
@@ -228,10 +238,10 @@ mod test {
let input_c4: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?;
let input_c5: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?;
let stage = AddNoiseStage::new(
- Noise {
+ Arc::new(AtomicRefCell::new(Noise {
lut: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
- },
- ColorCorrelationParams::default(),
+ })),
+ Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
3,
);
let output = make_and_run_simple_pipeline(
@@ -325,10 +335,10 @@ mod test {
crate::render::test::test_stage_consistency(
|| {
AddNoiseStage::new(
- Noise {
+ Arc::new(AtomicRefCell::new(Noise {
lut: [0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.1, 2.3],
- },
- ColorCorrelationParams::default(),
+ })),
+ Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
3,
)
},
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
index 225302a3955ee..38ac4e3d34085 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs
@@ -6,15 +6,31 @@
use std::{any::Any, sync::Arc};
use crate::{
- features::patches::PatchesDictionary, frame::ReferenceFrame,
- headers::extra_channels::ExtraChannelInfo, render::RenderPipelineInPlaceStage,
- util::NewWithCapacity as _,
+ features::patches::PatchesDictionary,
+ frame::ReferenceFrame,
+ headers::extra_channels::ExtraChannelInfo,
+ render::RenderPipelineInPlaceStage,
+ util::{AtomicRefCell, NewWithCapacity as _},
};
pub struct PatchesStage {
- pub patches: Arc<PatchesDictionary>,
- pub extra_channels: Vec<ExtraChannelInfo>,
- pub decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+ patches: Arc<AtomicRefCell<PatchesDictionary>>,
+ extra_channels: Vec<ExtraChannelInfo>,
+ decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+}
+
+impl PatchesStage {
+ pub fn new(
+ patches: Arc<AtomicRefCell<PatchesDictionary>>,
+ extra_channels: Vec<ExtraChannelInfo>,
+ decoder_state: Arc<[Option<ReferenceFrame>; 4]>,
+ ) -> Self {
+ Self {
+ patches,
+ extra_channels,
+ decoder_state,
+ }
+ }
}
impl std::fmt::Display for PatchesStage {
@@ -37,8 +53,15 @@ impl RenderPipelineInPlaceStage for PatchesStage {
row: &mut [&mut [f32]],
state: Option<&mut dyn Any>,
) {
+ let patches = self.patches.borrow();
+ if patches.positions.is_empty() {
+ return;
+ }
let state: &mut Vec<usize> = state.unwrap().downcast_mut().unwrap();
- self.patches.add_one_row(
+ if state.capacity() < patches.positions.len() {
+ state.reserve(patches.positions.len() - state.len());
+ }
+ patches.add_one_row(
row,
position,
xsize,
@@ -49,7 +72,10 @@ impl RenderPipelineInPlaceStage for PatchesStage {
}
fn init_local_state(&self, _thread_index: usize) -> crate::error::Result<Option<Box<dyn Any>>> {
- let patches_for_row_result = Vec::<usize>::new_with_capacity(self.patches.positions.len())?;
+ // TODO(veluca): I think this is wrong, check that.
+ let patches = self.patches.borrow();
+ let len = patches.positions.len();
+ let patches_for_row_result = Vec::<usize>::new_with_capacity(len)?;
Ok(Some(Box::new(patches_for_row_result) as Box<dyn Any>))
}
}
@@ -70,13 +96,13 @@ mod test {
let (file_header, _, _) =
read_headers_and_toc(include_bytes!("../../../resources/test/basic.jxl")).unwrap();
let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0);
- let patch_dict = Arc::new(PatchesDictionary::random(
+ let patch_dict = PatchesDictionary::random(
(500, 500),
file_header.image_metadata.extra_channel_info.len(),
0,
4,
&mut rng,
- ));
+ );
let reference_frames = Arc::new([
Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?),
Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?),
@@ -85,7 +111,7 @@ mod test {
]);
crate::render::test::test_stage_consistency(
|| PatchesStage {
- patches: patch_dict.clone(),
+ patches: Arc::new(AtomicRefCell::new(patch_dict.clone())),
extra_channels: file_header.image_metadata.extra_channel_info.clone(),
decoder_state: reference_frames.clone(),
},
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
index b2ebd88461e85..295c33c663ad2 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs
@@ -3,29 +3,33 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
+use std::{any::Any, sync::Arc};
+
use crate::{
- error::Result, features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams,
- render::RenderPipelineInPlaceStage,
+ features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams,
+ render::RenderPipelineInPlaceStage, util::AtomicRefCell,
};
pub struct SplinesStage {
- splines: Splines,
+ splines: Arc<AtomicRefCell<Splines>>,
+ image_size: (usize, usize),
+ color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
+ high_precision: bool,
}
impl SplinesStage {
pub fn new(
- mut splines: Splines,
- frame_size: (usize, usize),
- color_correlation_params: &ColorCorrelationParams,
+ splines: Arc<AtomicRefCell<Splines>>,
+ image_size: (usize, usize),
+ color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>,
high_precision: bool,
- ) -> Result<Self> {
- splines.initialize_draw_cache(
- frame_size.0 as u64,
- frame_size.1 as u64,
+ ) -> Self {
+ SplinesStage {
+ splines,
+ image_size,
color_correlation_params,
high_precision,
- )?;
- Ok(SplinesStage { splines })
+ }
}
}
@@ -47,17 +51,36 @@ impl RenderPipelineInPlaceStage for SplinesStage {
position: (usize, usize),
xsize: usize,
row: &mut [&mut [f32]],
- _state: Option<&mut dyn std::any::Any>,
+ _state: Option<&mut dyn Any>,
) {
- self.splines.draw_segments(row, position, xsize);
+ // TODO(veluca): this is wrong!! Race condition in MT.
+ let mut splines = self.splines.borrow_mut();
+ if splines.splines.is_empty() {
+ return;
+ }
+ if !splines.is_initialized() {
+ let color_correlation_params = self.color_correlation_params.borrow();
+ splines
+ .initialize_draw_cache(
+ self.image_size.0 as u64,
+ self.image_size.1 as u64,
+ &color_correlation_params,
+ self.high_precision,
+ )
+ .unwrap();
+ }
+ splines.draw_segments(row, position, xsize);
}
}
#[cfg(test)]
mod test {
+ use std::sync::Arc;
+
use crate::features::spline::{Point, QuantizedSpline, Splines};
use crate::frame::color_correlation_map::ColorCorrelationParams;
use crate::render::test::make_and_run_simple_pipeline;
+ use crate::util::AtomicRefCell;
use crate::util::test::{self, assert_all_almost_abs_eq, read_pfm};
use crate::{error::Result, image::Image, render::stages::splines::SplinesStage};
use test_log::test;
@@ -104,12 +127,11 @@ mod test {
);
let output: Vec<Image<f32>> = make_and_run_simple_pipeline(
SplinesStage::new(
- splines.clone(),
+ Arc::new(AtomicRefCell::new(splines.clone())),
size,
- &ColorCorrelationParams::default(),
+ Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
true,
- )
- .unwrap(),
+ ),
&target_images,
size,
0,
@@ -123,6 +145,7 @@ mod test {
Ok(())
}
+ #[ignore = "spline rendering is not fully consistent due to sqrt precision differences"]
#[test]
fn splines_consistency() -> Result<()> {
let splines = Splines::create(
@@ -160,12 +183,11 @@ mod test {
crate::render::test::test_stage_consistency(
|| {
SplinesStage::new(
- splines.clone(),
+ Arc::new(AtomicRefCell::new(splines.clone())),
(500, 500),
- &ColorCorrelationParams::default(),
+ Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())),
false,
)
- .unwrap()
},
(500, 500),
6,
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
index 4e1f80a5a3f90..77797bc4d2b23 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs
@@ -103,10 +103,9 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
image_size,
downsampling_shift,
LOG_GROUP_SIZE,
- 1,
chunk_size,
)
- .add_stage_internal(stage)?;
+ .add_stage_internal(stage);
let jxl_data_type = match OutputT::DATA_TYPE_ID {
DataTypeTag::U8 | DataTypeTag::I8 => JxlDataFormat::U8 { bit_depth: 8 },
@@ -129,7 +128,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
JxlColorType::Grayscale,
jxl_data_type,
false,
- )?;
+ );
}
let mut pipeline = pipeline.build()?;
@@ -168,7 +167,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy
pipeline.set_buffer_for_group(
c,
g,
- 1,
+ true,
extract_group_rect(&input_images[c], g, log_group_size)?,
&mut buffer_splitter,
)?;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
index 74cb6784bf85c..6fa51988ba393 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs
@@ -58,6 +58,22 @@ pub fn fast_erff(x: f32) -> f32 {
result.copysign(x)
}
+#[inline(always)]
+pub fn fast_erff_simd<D: SimdDescriptor>(d: D, x: D::F32Vec) -> D::F32Vec {
+ let absx = x.abs();
+ let denom1 = absx.mul_add(
+ D::F32Vec::splat(d, 7.77394369e-02),
+ D::F32Vec::splat(d, 2.05260015e-04),
+ );
+ let denom2 = denom1.mul_add(absx, D::F32Vec::splat(d, 2.32120216e-01));
+ let denom3 = denom2.mul_add(absx, D::F32Vec::splat(d, 2.77820801e-01));
+ let denom4 = denom3.mul_add(absx, D::F32Vec::splat(d, 1.0));
+ let denom5 = denom4 * denom4;
+ let inv_denom5 = D::F32Vec::splat(d, 1.0) / denom5;
+ let result = D::F32Vec::splat(d, 1.0) - inv_denom5 * inv_denom5;
+ result.copysign(x)
+}
+
#[inline]
pub fn fast_pow2f(x: f32) -> f32 {
let x_floor = x.floor();
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs
new file mode 100644
index 0000000000000..4d7e6c2fcd8e5
--- /dev/null
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// Mirror-reflects a value v to fit in a [0; s) range.
+pub fn mirror(mut v: isize, s: usize) -> usize {
+ // TODO(veluca): consider speeding this up if needed.
+ loop {
+ if v < 0 {
+ v = -v - 1;
+ } else if v >= s as isize {
+ v = s as isize * 2 - v - 1;
+ } else {
+ return v as usize;
+ }
+ }
+}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
index ac8db74e3a558..514820bcd24ef 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs
@@ -14,6 +14,7 @@ mod fast_math;
mod float16;
mod linalg;
mod log2;
+mod mirror;
pub mod ndarray;
mod rational_poly;
mod shift_right_ceil;
@@ -30,6 +31,7 @@ pub use fast_math::*;
pub use float16::f16;
pub use linalg::*;
pub use log2::*;
+pub use mirror::*;
pub(crate) use ndarray::*;
pub use rational_poly::*;
pub use shift_right_ceil::*;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
index a9b3ca24d4cd6..8e47c7bb1ce68 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs
@@ -222,6 +222,39 @@ pub fn check_equal_images<T: ImageDataType>(a: &Image<T>, b: &Image<T>) {
}
}
+/// Encode a u64 value as a LEB128 varint. Useful for building test data for
+/// frame index boxes and other container structures.
+pub fn encode_varint(mut value: u64) -> Vec<u8> {
+ let mut result = Vec::new();
+ loop {
+ let mut byte = (value & 0x7f) as u8;
+ value >>= 7;
+ if value > 0 {
+ byte |= 0x80;
+ }
+ result.push(byte);
+ if value == 0 {
+ break;
+ }
+ }
+ result
+}
+
+/// Build raw jxli frame index box content bytes from tnum, tden, and
+/// delta-coded entries `(OFF_delta, T, F)`.
+pub fn build_frame_index_content(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> {
+ let mut buf = Vec::new();
+ buf.extend(encode_varint(entries.len() as u64));
+ buf.extend(tnum.to_be_bytes());
+ buf.extend(tden.to_be_bytes());
+ for &(off, t, f) in entries {
+ buf.extend(encode_varint(off));
+ buf.extend(encode_varint(t));
+ buf.extend(encode_varint(f));
+ }
+ buf
+}
+
pub fn read_headers_and_toc(image: &[u8]) -> Result<(FileHeader, FrameHeader, Toc), JXLError> {
let codestream = ContainerParser::collect_codestream(image).unwrap();
let mut br = BitReader::new(&codestream);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
index c0d6499398b2c..5dbd975587f40 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs
@@ -14,7 +14,7 @@ use std::{
use crate::U32SimdVec;
-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
// Safety invariant: this type is only ever constructed if neon is available.
#[derive(Clone, Copy, Debug)]
@@ -41,6 +41,10 @@ impl SimdDescriptor for NeonDescriptor {
type U32Vec = U32VecNeon;
+ type U16Vec = U16VecNeon;
+
+ type U8Vec = U8VecNeon;
+
type Mask = MaskNeon;
type Bf16Table8 = Bf16Table8Neon;
@@ -122,7 +126,7 @@ unsafe impl F32SimdVec for F32VecNeon {
fn load(d: Self::Descriptor, mem: &[f32]) -> Self {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
- // from the safety invariant on `d`.
+ // from the safety invariant on `d`. vld1q_f32 supports unaligned loads.
Self(unsafe { vld1q_f32(mem.as_ptr()) }, d)
}
@@ -130,7 +134,7 @@ unsafe impl F32SimdVec for F32VecNeon {
fn store(&self, mem: &mut [f32]) {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
- // from the safety invariant on `d`.
+ // from the safety invariant on `d`. vst1q_f32 supports unaligned stores.
unsafe { vst1q_f32(mem.as_mut_ptr(), self.0) }
}
@@ -138,9 +142,9 @@ unsafe impl F32SimdVec for F32VecNeon {
fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<f32>]) {
assert!(dest.len() >= 2 * Self::LEN);
// SAFETY: we just checked that `dest` has enough space, and neon is available
- // from the safety invariant on the descriptor stored in `a`.
+ // from the safety invariant on the descriptor stored in `a`. vst2q_f32 supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
vst2q_f32(dest_ptr, float32x4x2_t(a.0, b.0));
}
}
@@ -148,9 +152,9 @@ unsafe impl F32SimdVec for F32VecNeon {
#[inline(always)]
fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<f32>]) {
assert!(dest.len() >= 3 * Self::LEN);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. vst3q_f32 supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
vst3q_f32(dest_ptr, float32x4x3_t(a.0, b.0, c.0));
}
}
@@ -165,9 +169,9 @@ unsafe impl F32SimdVec for F32VecNeon {
) {
assert!(dest.len() >= 4 * Self::LEN);
// SAFETY: we just checked that `dest` has enough space, and neon is available
- // from the safety invariant on the descriptor stored in `a`.
+ // from the safety invariant on the descriptor stored in `a`. vst4q_f32 supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
vst4q_f32(dest_ptr, float32x4x4_t(a.0, b.0, c.0, d.0));
}
}
@@ -277,7 +281,7 @@ unsafe impl F32SimdVec for F32VecNeon {
fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self) {
assert!(src.len() >= 2 * Self::LEN);
// SAFETY: we just checked that `src` has enough space, and neon is available
- // from the safety invariant on `d`.
+ // from the safety invariant on `d`. vld2q_f32 supports unaligned loads.
let float32x4x2_t(a, b) = unsafe { vld2q_f32(src.as_ptr()) };
(Self(a, d), Self(b, d))
}
@@ -286,7 +290,7 @@ unsafe impl F32SimdVec for F32VecNeon {
fn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self) {
assert!(src.len() >= 3 * Self::LEN);
// SAFETY: we just checked that `src` has enough space, and neon is available
- // from the safety invariant on `d`.
+ // from the safety invariant on `d`. vld3q_f32 supports unaligned loads.
let float32x4x3_t(a, b, c) = unsafe { vld3q_f32(src.as_ptr()) };
(Self(a, d), Self(b, d), Self(c, d))
}
@@ -295,7 +299,7 @@ unsafe impl F32SimdVec for F32VecNeon {
fn load_deinterleaved_4(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self, Self) {
assert!(src.len() >= 4 * Self::LEN);
// SAFETY: we just checked that `src` has enough space, and neon is available
- // from the safety invariant on `d`.
+ // from the safety invariant on `d`. vld4q_f32 supports unaligned loads.
let float32x4x4_t(a, b, c, e) = unsafe { vld4q_f32(src.as_ptr()) };
(Self(a, d), Self(b, d), Self(c, d), Self(e, d))
}
@@ -337,7 +341,7 @@ unsafe impl F32SimdVec for F32VecNeon {
assert!(data.len() > 3);
// Transposed load
- // SAFETY: input is verified to be large enough for this pointer.
+ // SAFETY: input is verified to be large enough for this pointer. vld4q_f32 supports unaligned loads.
let float32x4x4_t(p0, p1, p2, p3) = unsafe { vld4q_f32(data.as_ptr().cast()) };
F32VecNeon(p0, d).store_array(&mut data[0]);
@@ -422,9 +426,9 @@ unsafe impl F32SimdVec for F32VecNeon {
let u16s = vqmovun_s32(i32s);
let u8s = vqmovn_u16(vcombine_u16(u16s, u16s));
// Store lower 4 bytes
- // SAFETY: we checked dest has enough space
+ // SAFETY: we checked dest has enough space. vst1_lane_u32 supports unaligned stores.
unsafe {
- vst1_lane_u32::<0>(dest.as_mut_ptr() as *mut u32, vreinterpret_u32_u8(u8s));
+ vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_u8(u8s));
}
}
@@ -436,7 +440,7 @@ unsafe impl F32SimdVec for F32VecNeon {
let i32s = vcvtq_s32_f32(rounded);
let u16s = vqmovun_s32(i32s);
// Store 4 u16s (8 bytes)
- // SAFETY: we checked dest has enough space
+ // SAFETY: we checked dest has enough space. vst1_u16 supports unaligned stores.
unsafe {
vst1_u16(dest.as_mut_ptr(), u16s);
}
@@ -447,7 +451,8 @@ unsafe impl F32SimdVec for F32VecNeon {
// Use inline asm because Rust stdarch incorrectly requires fp16 target feature
// for vcvt_f16_f32 (fixed in https://github.com/rust-lang/stdarch/pull/1978)
let f16_bits: uint16x4_t;
- // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space
+ // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space,
+ // vst1_u16 supports unaligned stores.
unsafe {
std::arch::asm!(
"fcvtn {out:v}.4h, {inp:v}.4s",
@@ -466,7 +471,8 @@ unsafe impl F32SimdVec for F32VecNeon {
// Use inline asm because Rust stdarch incorrectly requires fp16 target feature
// for vcvt_f32_f16 (fixed in https://github.com/rust-lang/stdarch/pull/1978)
let result: float32x4_t;
- // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space
+ // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space.
+ // vld1_u16 supports unaligned loads.
unsafe {
let f16_bits = vld1_u16(mem.as_ptr());
std::arch::asm!(
@@ -487,7 +493,7 @@ unsafe impl F32SimdVec for F32VecNeon {
// Convert f32 table to BF16 packed in 128 bits (16 bytes for 8 entries)
// BF16 is the high 16 bits of f32
// SAFETY: neon is available from target_feature, and `table` is large
- // enough for the loads.
+ // enough for the loads. vld1q_f32 supports unaligned loads.
let (table_lo, table_hi) =
unsafe { (vld1q_f32(table.as_ptr()), vld1q_f32(table.as_ptr().add(4))) };
@@ -695,13 +701,28 @@ impl I32SimdVec for I32VecNeon {
fn store_u16(self, dest: &mut [u16]) {
assert!(dest.len() >= Self::LEN);
// SAFETY: We know neon is available from the safety invariant on `self.1`,
- // and we just checked that `dest` has enough space.
+ // and we just checked that `dest` has enough space. vst1_u16 supports unaligned
+ // stores.
unsafe {
// vmovn narrows i32 to i16 by taking the lower 16 bits
let narrowed = vmovn_s32(self.0);
vst1_u16(dest.as_mut_ptr(), vreinterpret_u16_s16(narrowed));
}
}
+
+ #[inline(always)]
+ fn store_u8(self, dest: &mut [u8]) {
+ assert!(dest.len() >= Self::LEN);
+ // SAFETY: We know neon is available from the safety invariant on `self.1`,
+ // and we just checked that `dest` has enough space. vst1_lane_u32 supports unaligned stores.
+ unsafe {
+ // vmovn narrows i32 -> i16 -> i8
+ let narrowed_i16 = vmovn_s32(self.0);
+ let combined_i16 = vcombine_s16(narrowed_i16, narrowed_i16);
+ let narrowed_i8 = vmovn_s16(combined_i16);
+ vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_s8(narrowed_i8));
+ }
+ }
}
impl Add<I32VecNeon> for I32VecNeon {
@@ -837,6 +858,150 @@ impl U32SimdVec for U32VecNeon {
}
}
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecNeon(uint8x16_t, NeonDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecNeon {
+ type Descriptor = NeonDescriptor;
+ const LEN: usize = 16;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+ // from the safety invariant on `d`. vld1q_u8 supports unaligned loads.
+ Self(unsafe { vld1q_u8(mem.as_ptr()) }, d)
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u8) -> Self {
+ // SAFETY: We know neon is available from the safety invariant on `d`.
+ Self(unsafe { vdupq_n_u8(v) }, d)
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u8]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+ // from the safety invariant on `d`. vst1q_u8 supports unaligned stores.
+ unsafe { vst1q_u8(mem.as_mut_ptr(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 2 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst2q_u8 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+ vst2q_u8(dest_ptr, uint8x16x2_t(a.0, b.0));
+ }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 3 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst3q_u8 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+ vst3q_u8(dest_ptr, uint8x16x3_t(a.0, b.0, c.0));
+ }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ assert!(dest.len() >= 4 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst4q_u8 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u8>();
+ vst4q_u8(dest_ptr, uint8x16x4_t(a.0, b.0, c.0, d.0));
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecNeon(uint16x8_t, NeonDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecNeon {
+ type Descriptor = NeonDescriptor;
+ const LEN: usize = 8;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+ // from the safety invariant on `d`. vld1q_u16 supports unaligned loads.
+ Self(unsafe { vld1q_u16(mem.as_ptr().cast()) }, d)
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u16) -> Self {
+ // SAFETY: We know neon is available from the safety invariant on `d`.
+ Self(unsafe { vdupq_n_u16(v) }, d)
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u16]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available
+ // from the safety invariant on `d`. vst1q_u16 supports unaligned stores.
+ unsafe { vst1q_u16(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 2 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst2q_u16 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+ vst2q_u16(dest_ptr, uint16x8x2_t(a.0, b.0));
+ }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 3 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst3q_u16 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+ vst3q_u16(dest_ptr, uint16x8x3_t(a.0, b.0, c.0));
+ }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 4 * Self::LEN);
+ // SAFETY: we just checked that `dest` has enough space, and neon is available
+ // from the safety invariant on the descriptor stored in `a`. vst4q_u16 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<u16>();
+ vst4q_u16(dest_ptr, uint16x8x4_t(a.0, b.0, c.0, d.0));
+ }
+ }
+}
+
#[derive(Clone, Copy, Debug)]
#[repr(transparent)]
pub struct MaskNeon(uint32x4_t, NeonDescriptor);
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
index 4f06dbddcd7b8..0129db2a1fa96 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs
@@ -44,6 +44,10 @@ pub trait SimdDescriptor: Sized + Copy + Debug + Send + Sync {
type U32Vec: U32SimdVec<Descriptor = Self>;
+ type U16Vec: U16SimdVec<Descriptor = Self>;
+
+ type U8Vec: U8SimdVec<Descriptor = Self>;
+
type Mask: SimdMask<Descriptor = Self>;
/// Prepared 8-entry BF16 lookup table for fast approximate lookups.
@@ -124,7 +128,7 @@ pub unsafe trait F32SimdVec:
// SAFETY: f32 and MaybeUninit<f32> have the same layout.
// We are writing to initialized memory, so treating it as uninit for writing is fine.
let dest = unsafe {
- std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
};
Self::store_interleaved_2_uninit(a, b, dest);
}
@@ -136,7 +140,7 @@ pub unsafe trait F32SimdVec:
// SAFETY: f32 and MaybeUninit<f32> have the same layout.
// We are writing to initialized memory, so treating it as uninit for writing is fine.
let dest = unsafe {
- std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
};
Self::store_interleaved_3_uninit(a, b, c, dest);
}
@@ -148,7 +152,7 @@ pub unsafe trait F32SimdVec:
// SAFETY: f32 and MaybeUninit<f32> have the same layout.
// We are writing to initialized memory, so treating it as uninit for writing is fine.
let dest = unsafe {
- std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len())
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len())
};
Self::store_interleaved_4_uninit(a, b, c, d, dest);
}
@@ -344,6 +348,10 @@ pub trait I32SimdVec:
/// Stores the lower 16 bits of each i32 lane as u16 values.
/// Requires `dest.len() >= Self::LEN` or it will panic.
fn store_u16(self, dest: &mut [u16]);
+
+ /// Stores the lower 8 bits of each i32 lane as u8 values.
+ /// Requires `dest.len() >= Self::LEN` or it will panic.
+ fn store_u8(self, dest: &mut [u8]);
}
pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync {
@@ -357,6 +365,150 @@ pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync {
fn shr<const AMOUNT_U: u32, const AMOUNT_I: i32>(self) -> Self;
}
+/// # Safety
+///
+/// Implementors are required to respect the safety promises of the methods in this trait.
+/// Specifically, this applies to the store_*_uninit methods.
+pub unsafe trait U8SimdVec: Sized + Copy + Debug + Send + Sync {
+ type Descriptor: SimdDescriptor;
+
+ const LEN: usize;
+
+ fn load(d: Self::Descriptor, mem: &[u8]) -> Self;
+ fn splat(d: Self::Descriptor, v: u8) -> Self;
+ fn store(&self, mem: &mut [u8]);
+
+ /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+ /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_2(a: Self, b: Self, dest: &mut [u8]) {
+ // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+ };
+ Self::store_interleaved_2_uninit(a, b, dest);
+ }
+
+ /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+ /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u8]) {
+ // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+ };
+ Self::store_interleaved_3_uninit(a, b, c, dest);
+ }
+
+ /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+ /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u8]) {
+ // SAFETY: u8 and MaybeUninit<u8> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len())
+ };
+ Self::store_interleaved_4_uninit(a, b, c, d, dest);
+ }
+
+ /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+ /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+ ///
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]);
+
+ /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+ /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]);
+
+ /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+ /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_4_uninit(a: Self, b: Self, c: Self, d: Self, dest: &mut [MaybeUninit<u8>]);
+}
+
+/// # Safety
+///
+/// Implementors are required to respect the safety promises of the methods in this trait.
+/// Specifically, this applies to the store_*_uninit methods.
+pub unsafe trait U16SimdVec: Sized + Copy + Debug + Send + Sync {
+ type Descriptor: SimdDescriptor;
+
+ const LEN: usize;
+
+ fn load(d: Self::Descriptor, mem: &[u16]) -> Self;
+ fn splat(d: Self::Descriptor, v: u16) -> Self;
+ fn store(&self, mem: &mut [u16]);
+
+ /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+ /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_2(a: Self, b: Self, dest: &mut [u16]) {
+ // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+ };
+ Self::store_interleaved_2_uninit(a, b, dest);
+ }
+
+ /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+ /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u16]) {
+ // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+ };
+ Self::store_interleaved_3_uninit(a, b, c, dest);
+ }
+
+ /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+ /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+ #[inline(always)]
+ fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u16]) {
+ // SAFETY: u16 and MaybeUninit<u16> have the same layout.
+ // We are writing to initialized memory, so treating it as uninit for writing is fine.
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len())
+ };
+ Self::store_interleaved_4_uninit(a, b, c, d, dest);
+ }
+
+ /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...].
+ /// Requires `dest.len() >= 2 * Self::LEN` or it will panic.
+ ///
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]);
+
+ /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...].
+ /// Requires `dest.len() >= 3 * Self::LEN` or it will panic.
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]);
+
+ /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...].
+ /// Requires `dest.len() >= 4 * Self::LEN` or it will panic.
+ /// Safety note:
+ /// Does not write uninitialized data into `dest`.
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ );
+}
+
#[macro_export]
macro_rules! shl {
($val: expr, $amount: literal) => {
@@ -436,7 +588,8 @@ mod test {
use arbtest::arbitrary::Unstructured;
use crate::{
- F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, test_all_instruction_sets,
+ F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, U8SimdVec, U16SimdVec,
+ test_all_instruction_sets,
};
enum Distribution {
@@ -1215,4 +1368,160 @@ mod test {
}
}
test_all_instruction_sets!(test_store_u16);
+
+ fn test_store_interleaved_2_u8<D: SimdDescriptor>(d: D) {
+ let len = D::U8Vec::LEN;
+ let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+ let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+ let mut output = vec![0u8; 2 * len];
+
+ let a_vec = D::U8Vec::load(d, &a);
+ let b_vec = D::U8Vec::load(d, &b);
+ D::U8Vec::store_interleaved_2(a_vec, b_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[2 * i], a[i]);
+ assert_eq!(output[2 * i + 1], b[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_2_u8);
+
+ fn test_store_interleaved_3_u8<D: SimdDescriptor>(d: D) {
+ let len = D::U8Vec::LEN;
+ let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+ let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+ let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect();
+ let mut output = vec![0u8; 3 * len];
+
+ let a_vec = D::U8Vec::load(d, &a);
+ let b_vec = D::U8Vec::load(d, &b);
+ let c_vec = D::U8Vec::load(d, &c);
+ D::U8Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[3 * i], a[i]);
+ assert_eq!(output[3 * i + 1], b[i]);
+ assert_eq!(output[3 * i + 2], c[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_3_u8);
+
+ fn test_store_interleaved_4_u8<D: SimdDescriptor>(d: D) {
+ let len = D::U8Vec::LEN;
+ let a: Vec<u8> = (0..len).map(|i| i as u8).collect();
+ let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect();
+ let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect();
+ let e: Vec<u8> = (0..len).map(|i| (i + 200) as u8).collect();
+ let mut output = vec![0u8; 4 * len];
+
+ let a_vec = D::U8Vec::load(d, &a);
+ let b_vec = D::U8Vec::load(d, &b);
+ let c_vec = D::U8Vec::load(d, &c);
+ let d_vec = D::U8Vec::load(d, &e);
+ D::U8Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[4 * i], a[i]);
+ assert_eq!(output[4 * i + 1], b[i]);
+ assert_eq!(output[4 * i + 2], c[i]);
+ assert_eq!(output[4 * i + 3], e[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_4_u8);
+
+ fn test_store_interleaved_2_u16<D: SimdDescriptor>(d: D) {
+ let len = D::U16Vec::LEN;
+ let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+ let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+ let mut output = vec![0u16; 2 * len];
+
+ let a_vec = D::U16Vec::load(d, &a);
+ let b_vec = D::U16Vec::load(d, &b);
+ D::U16Vec::store_interleaved_2(a_vec, b_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[2 * i], a[i]);
+ assert_eq!(output[2 * i + 1], b[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_2_u16);
+
+ fn test_store_interleaved_3_u16<D: SimdDescriptor>(d: D) {
+ let len = D::U16Vec::LEN;
+ let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+ let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+ let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect();
+ let mut output = vec![0u16; 3 * len];
+
+ let a_vec = D::U16Vec::load(d, &a);
+ let b_vec = D::U16Vec::load(d, &b);
+ let c_vec = D::U16Vec::load(d, &c);
+ D::U16Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[3 * i], a[i]);
+ assert_eq!(output[3 * i + 1], b[i]);
+ assert_eq!(output[3 * i + 2], c[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_3_u16);
+
+ fn test_store_interleaved_4_u16<D: SimdDescriptor>(d: D) {
+ let len = D::U16Vec::LEN;
+ let a: Vec<u16> = (0..len).map(|i| i as u16).collect();
+ let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect();
+ let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect();
+ let e: Vec<u16> = (0..len).map(|i| (i + 3000) as u16).collect();
+ let mut output = vec![0u16; 4 * len];
+
+ let a_vec = D::U16Vec::load(d, &a);
+ let b_vec = D::U16Vec::load(d, &b);
+ let c_vec = D::U16Vec::load(d, &c);
+ let d_vec = D::U16Vec::load(d, &e);
+ D::U16Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output);
+
+ for i in 0..len {
+ assert_eq!(output[4 * i], a[i]);
+ assert_eq!(output[4 * i + 1], b[i]);
+ assert_eq!(output[4 * i + 2], c[i]);
+ assert_eq!(output[4 * i + 3], e[i]);
+ }
+ }
+ test_all_instruction_sets!(test_store_interleaved_4_u16);
+
+ fn test_store_u8<D: SimdDescriptor>(d: D) {
+ let data = [
+ 0xba_i32,
+ 0x12345678_i32,
+ 0xdeadbabeu32 as i32,
+ 0x76543210_i32,
+ 0x11111111_i32,
+ 0x00000000_i32,
+ 0xffffffffu32 as i32,
+ 0x12345678_i32,
+ 0x87654321u32 as i32,
+ 0xabcdef01u32 as i32,
+ 0x10203040_i32,
+ 0x50607080_i32,
+ 0x01020304_i32,
+ 0x05060708_i32,
+ 0x090a0b0c_i32,
+ 0x0d0e0f00_i32,
+ ];
+ let mut output = [0u8; 16];
+ for i in (0..16).step_by(D::I32Vec::LEN) {
+ let vec = D::I32Vec::load(d, &data[i..]);
+ vec.store_u8(&mut output[i..]);
+ }
+
+ for i in 0..16 {
+ let expected = (data[i] & 0xff) as u8;
+ assert_eq!(
+ output[i], expected,
+ "store_u8 failed at index {}: expected 0x{:02x}, got 0x{:02x}",
+ i, expected, output[i]
+ );
+ }
+ }
+ test_all_instruction_sets!(test_store_u8);
}
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
index f0444c34bf4f8..a423db2f1d56b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs
@@ -8,7 +8,7 @@ use std::num::Wrapping;
use crate::{U32SimdVec, f16, impl_f32_array_interface};
-use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
#[derive(Clone, Copy, Debug)]
pub struct ScalarDescriptor;
@@ -17,6 +17,8 @@ impl SimdDescriptor for ScalarDescriptor {
type F32Vec = f32;
type I32Vec = Wrapping<i32>;
type U32Vec = Wrapping<u32>;
+ type U8Vec = u8;
+ type U16Vec = u16;
type Mask = bool;
type Bf16Table8 = [f32; 8];
@@ -310,6 +312,11 @@ impl I32SimdVec for Wrapping<i32> {
fn store_u16(self, dest: &mut [u16]) {
dest[0] = self.0 as u16;
}
+
+ #[inline(always)]
+ fn store_u8(self, dest: &mut [u8]) {
+ dest[0] = self.0 as u8;
+ }
}
impl U32SimdVec for Wrapping<u32> {
@@ -328,6 +335,104 @@ impl U32SimdVec for Wrapping<u32> {
}
}
+// SAFETY: This implementation only write initialized data in the
+// `&mut [MaybeUninit<u8>]` arguments to *_uninit methods.
+unsafe impl U8SimdVec for u8 {
+ type Descriptor = ScalarDescriptor;
+ const LEN: usize = 1;
+
+ #[inline(always)]
+ fn load(_d: Self::Descriptor, mem: &[u8]) -> Self {
+ mem[0]
+ }
+
+ #[inline(always)]
+ fn splat(_d: Self::Descriptor, v: u8) -> Self {
+ v
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u8]) {
+ mem[0] = *self;
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+ dest[0].write(a);
+ dest[1].write(b);
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+ dest[0].write(a);
+ dest[1].write(b);
+ dest[2].write(c);
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ dest[0].write(a);
+ dest[1].write(b);
+ dest[2].write(c);
+ dest[3].write(d);
+ }
+}
+
+// SAFETY: This implementation only write initialized data in the
+// `&mut [MaybeUninit<u16>]` arguments to *_uninit methods.
+unsafe impl U16SimdVec for u16 {
+ type Descriptor = ScalarDescriptor;
+ const LEN: usize = 1;
+
+ #[inline(always)]
+ fn load(_d: Self::Descriptor, mem: &[u16]) -> Self {
+ mem[0]
+ }
+
+ #[inline(always)]
+ fn splat(_d: Self::Descriptor, v: u16) -> Self {
+ v
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u16]) {
+ mem[0] = *self;
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+ dest[0].write(a);
+ dest[1].write(b);
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+ dest[0].write(a);
+ dest[1].write(b);
+ dest[2].write(c);
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ dest[0].write(a);
+ dest[1].write(b);
+ dest[2].write(c);
+ dest[3].write(d);
+ }
+}
+
impl SimdMask for bool {
type Descriptor = ScalarDescriptor;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
index 0da8ec9f0da4d..0ab752a656478 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs
@@ -5,7 +5,7 @@
use crate::{U32SimdVec, impl_f32_array_interface, x86_64::sse42::Sse42Descriptor};
-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
use std::{
arch::x86_64::*,
mem::MaybeUninit,
@@ -124,6 +124,8 @@ impl SimdDescriptor for AvxDescriptor {
type F32Vec = F32VecAvx;
type I32Vec = I32VecAvx;
type U32Vec = U32VecAvx;
+ type U8Vec = U8VecAvx;
+ type U16Vec = U16VecAvx;
type Mask = MaskAvx;
type Bf16Table8 = Bf16Table8Avx;
@@ -198,16 +200,16 @@ unsafe impl F32SimdVec for F32VecAvx {
fn load(d: Self::Descriptor, mem: &[f32]) -> Self {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
- // from the safety invariant on `d`.
- Self(unsafe { _mm256_loadu_ps(mem.as_ptr()) }, d)
+ // from the safety invariant on `d`. _mm256_loadu_ps supports unaligned loads.
+ Self(unsafe { _mm256_loadu_ps(mem.as_ptr().cast()) }, d)
}
#[inline(always)]
fn store(&self, mem: &mut [f32]) {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
- // from the safety invariant on `self.1`.
- unsafe { _mm256_storeu_ps(mem.as_mut_ptr(), self.0) }
+ // from the safety invariant on `self.1`. _mm256_storeu_ps supports unaligned stores.
+ unsafe { _mm256_storeu_ps(mem.as_mut_ptr().cast(), self.0) }
}
#[inline(always)]
@@ -223,9 +225,9 @@ unsafe impl F32SimdVec for F32VecAvx {
// Need to permute to get correct order
let out0 = _mm256_permute2f128_ps::<0x20>(lo, hi); // lower halves: [a0,b0,a1,b1, a2,b2,a3,b3]
let out1 = _mm256_permute2f128_ps::<0x31>(lo, hi); // upper halves: [a4,b4,a5,b5, a6,b6,a7,b7]
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm256_storeu_ps(dest_ptr, out0);
_mm256_storeu_ps(dest_ptr.add(8), out1);
}
@@ -274,9 +276,9 @@ unsafe impl F32SimdVec for F32VecAvx {
let out2 = _mm256_blend_ps::<0b01001001>(a2, b2);
let out2 = _mm256_blend_ps::<0b10010010>(out2, c2);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm256_storeu_ps(dest_ptr, out0);
_mm256_storeu_ps(dest_ptr.add(8), out1);
_mm256_storeu_ps(dest_ptr.add(16), out2);
@@ -335,9 +337,9 @@ unsafe impl F32SimdVec for F32VecAvx {
let out2 = _mm256_permute2f128_ps::<0x31>(abcd_0, abcd_1);
let out3 = _mm256_permute2f128_ps::<0x31>(abcd_2, abcd_3);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm256_storeu_ps(dest_ptr, out0);
_mm256_storeu_ps(dest_ptr.add(8), out1);
_mm256_storeu_ps(dest_ptr.add(16), out2);
@@ -636,9 +638,15 @@ unsafe impl F32SimdVec for F32VecAvx {
// Pack 8 u16s to 8 u8s (use same vector twice, take lower half)
let u8s = _mm_packus_epi16(u16s, u16s);
// Store lower 8 bytes
- // SAFETY: we checked dest has enough space
+ let val = _mm_cvtsi128_si64(u8s);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+ // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8.
+ // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 4. `src` and `dst` do not overlap as `src` is a local stack array.
unsafe {
- _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u8s);
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
}
}
// SAFETY: avx2 is available from the safety invariant on the descriptor.
@@ -661,9 +669,9 @@ unsafe impl F32SimdVec for F32VecAvx {
// Pack 4+4 i32s to 8 u16s
let u16s = _mm_packus_epi32(lo, hi);
// Store 8 u16s (16 bytes)
- // SAFETY: we checked dest has enough space
+ // SAFETY: we checked dest has enough space. _mm_storeu_si128 supports unaligned stores.
unsafe {
- _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u16s);
+ _mm_storeu_si128(dest.as_mut_ptr().cast(), u16s);
}
}
// SAFETY: avx2 is available from the safety invariant on the descriptor.
@@ -678,8 +686,8 @@ unsafe impl F32SimdVec for F32VecAvx {
#[inline]
fn load_f16_impl(d: AvxDescriptor, mem: &[u16]) -> F32VecAvx {
assert!(mem.len() >= F32VecAvx::LEN);
- // SAFETY: mem.len() >= 8 is checked above
- let bits = unsafe { _mm_loadu_si128(mem.as_ptr() as *const __m128i) };
+ // SAFETY: mem.len() >= 8 is checked above. _mm_loadu_si128 supports unaligned loads.
+ let bits = unsafe { _mm_loadu_si128(mem.as_ptr().cast()) };
F32VecAvx(_mm256_cvtph_ps(bits), d)
}
// SAFETY: avx2 and f16c are available from the safety invariant on the descriptor
@@ -693,8 +701,8 @@ unsafe impl F32SimdVec for F32VecAvx {
fn store_f16_bits_impl(v: __m256, dest: &mut [u16]) {
assert!(dest.len() >= F32VecAvx::LEN);
let bits = _mm256_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v);
- // SAFETY: dest.len() >= 8 is checked above
- unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, bits) };
+ // SAFETY: dest.len() >= 8 is checked above. _mm_storeu_si128 supports unaligned stores.
+ unsafe { _mm_storeu_si128(dest.as_mut_ptr().cast(), bits) };
}
// SAFETY: avx2 and f16c are available from the safety invariant on the descriptor
unsafe { store_f16_bits_impl(self.0, dest) }
@@ -800,8 +808,8 @@ impl I32SimdVec for I32VecAvx {
fn load(d: Self::Descriptor, mem: &[i32]) -> Self {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available
- // from the safety invariant on `d`.
- Self(unsafe { _mm256_loadu_si256(mem.as_ptr() as *const _) }, d)
+ // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+ Self(unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) }, d)
}
#[inline(always)]
@@ -893,7 +901,7 @@ impl I32SimdVec for I32VecAvx {
),
);
let tmp = _mm256_permute4x64_epi64(tmp, 0xD8);
- // SAFETY: we just checked that `dest` has enough space.
+ // SAFETY: we just checked that `dest` has enough space. _mm_storeu_si128 supports unaligned stores.
unsafe {
_mm_storeu_si128(dest.as_mut_ptr().cast(), _mm256_extracti128_si256::<0>(tmp))
};
@@ -901,6 +909,38 @@ impl I32SimdVec for I32VecAvx {
// SAFETY: avx2 is available from the safety invariant on the descriptor.
unsafe { store_u16_impl(self.0, dest) }
}
+
+ #[inline(always)]
+ fn store_u8(self, dest: &mut [u8]) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_u8_impl(v: __m256i, dest: &mut [u8]) {
+ assert!(dest.len() >= I32VecAvx::LEN);
+ let tmp = _mm256_shuffle_epi8(
+ v,
+ _mm256_setr_epi8(
+ 0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, //
+ 0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ ),
+ );
+ let lo = _mm256_castsi256_si128(tmp);
+ let hi = _mm256_extracti128_si256::<1>(tmp);
+ let packed = _mm_unpacklo_epi32(lo, hi);
+ let val = _mm_cvtsi128_si64(packed);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. we just checked that `dest` has enough space (dest.len() >= 8).
+ // 2. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+ // 3. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8.
+ // 4. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 5. `src` and `dst` do not overlap as `src` is a local stack array.
+ unsafe {
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_u8_impl(self.0, dest) }
+ }
}
impl Add<I32VecAvx> for I32VecAvx {
@@ -1035,6 +1075,414 @@ impl U32SimdVec for U32VecAvx {
}
}
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecAvx(__m256i, AvxDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecAvx {
+ type Descriptor = AvxDescriptor;
+ const LEN: usize = 32;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+ assert!(mem.len() >= U8VecAvx::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+ // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+ unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u8) -> Self {
+ // SAFETY: We know avx2 is available from the safety invariant on `self.1`.
+ unsafe { Self(_mm256_set1_epi8(v as i8), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u8]) {
+ assert!(mem.len() >= U8VecAvx::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+ // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores.
+ unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 2 * U8VecAvx::LEN);
+ // a = [A0..A15 | A16..A31]
+ // b = [B0..B15 | B16..B31]
+ let lo = _mm256_unpacklo_epi8(a, b); // [A0 B0..A7 B7 | A16 B16..A23 B23]
+ let hi = _mm256_unpackhi_epi8(a, b); // [A8 B8..A15 B15 | A24 B24..A31 B31]
+
+ // R0 = [A0 B0..A7 B7 | A8 B8..A15 B15]
+ let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi);
+ // R1 = [A16 B16..A23 B23 | A24 B24..A31 B31]
+ let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_3_impl(
+ a: __m256i,
+ b: __m256i,
+ c: __m256i,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ assert!(dest.len() >= 3 * U8VecAvx::LEN);
+
+ // U8 Masks
+ let mask_a0 = _mm256_setr_epi8(
+ 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1,
+ -1, 8, -1, -1, 9, -1, -1, 10, -1,
+ );
+ let mask_a1 = _mm256_setr_epi8(
+ -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1,
+ -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5,
+ );
+ let mask_a2 = _mm256_setr_epi8(
+ -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1,
+ -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+ );
+ let mask_b0 = _mm256_setr_epi8(
+ -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7,
+ -1, -1, 8, -1, -1, 9, -1, -1, 10,
+ );
+ let mask_b1 = _mm256_setr_epi8(
+ -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1,
+ -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1,
+ );
+ let mask_b2 = _mm256_setr_epi8(
+ 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12,
+ -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+ );
+ let mask_c0 = _mm256_setr_epi8(
+ -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1,
+ -1, 7, -1, -1, 8, -1, -1, 9, -1, -1,
+ );
+ let mask_c1 = _mm256_setr_epi8(
+ 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1,
+ 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1,
+ );
+ let mask_c2 = _mm256_setr_epi8(
+ -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1,
+ 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+ );
+
+ // Create duplicated vectors for lane swizzling
+ let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a);
+ let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b);
+ let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c);
+
+ let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a);
+ let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b);
+ let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c);
+
+ let out0 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a_dup_lo, mask_a0),
+ _mm256_shuffle_epi8(b_dup_lo, mask_b0),
+ ),
+ _mm256_shuffle_epi8(c_dup_lo, mask_c0),
+ );
+
+ let out1 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a, mask_a1),
+ _mm256_shuffle_epi8(b, mask_b1),
+ ),
+ _mm256_shuffle_epi8(c, mask_c1),
+ );
+
+ let out2 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a_dup_hi, mask_a2),
+ _mm256_shuffle_epi8(b_dup_hi, mask_b2),
+ ),
+ _mm256_shuffle_epi8(c_dup_hi, mask_c2),
+ );
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ _mm256_storeu_si256(dest_ptr.add(2), out2);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_4_impl(
+ a: __m256i,
+ b: __m256i,
+ c: __m256i,
+ d: __m256i,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ assert!(dest.len() >= 4 * U8VecAvx::LEN);
+ // First interleave pairs: ab and cd
+ let ab_lo = _mm256_unpacklo_epi8(a, b);
+ let ab_hi = _mm256_unpackhi_epi8(a, b);
+ let cd_lo = _mm256_unpacklo_epi8(c, d);
+ let cd_hi = _mm256_unpackhi_epi8(c, d);
+
+ // Then interleave the pairs to get 4-byte chunks
+ let out0_p = _mm256_unpacklo_epi16(ab_lo, cd_lo);
+ let out1_p = _mm256_unpackhi_epi16(ab_lo, cd_lo);
+ let out2_p = _mm256_unpacklo_epi16(ab_hi, cd_hi);
+ let out3_p = _mm256_unpackhi_epi16(ab_hi, cd_hi);
+
+ // Reorder lanes
+ let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p);
+ let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p);
+ let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p);
+ let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ _mm256_storeu_si256(dest_ptr.add(2), out2);
+ _mm256_storeu_si256(dest_ptr.add(3), out3);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecAvx(__m256i, AvxDescriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecAvx {
+ type Descriptor = AvxDescriptor;
+ const LEN: usize = 16;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+ assert!(mem.len() >= U16VecAvx::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+ // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads.
+ unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u16) -> Self {
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { Self(_mm256_set1_epi16(v as i16), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u16]) {
+ assert!(mem.len() >= U16VecAvx::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available
+ // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores.
+ unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 2 * U16VecAvx::LEN);
+ // a = [A0..A7 | A8..A15]
+ // b = [B0..B7 | B8..B15]
+ let lo = _mm256_unpacklo_epi16(a, b); // [A0 B0..A3 B3 | A8 B8..A11 B11]
+ let hi = _mm256_unpackhi_epi16(a, b); // [A4 B4..A7 B7 | A12 B12..A15 B15]
+
+ // R0 = [A0 B0..A7 B7]
+ let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi);
+ // R1 = [A8 B8..A15 B15]
+ let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_3_impl(
+ a: __m256i,
+ b: __m256i,
+ c: __m256i,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 3 * U16VecAvx::LEN);
+
+ // U16 Masks
+ let mask_a0 = _mm256_setr_epi8(
+ 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1,
+ -1, 8, 9, -1, -1, -1, -1, 10, 11,
+ );
+ let mask_a1 = _mm256_setr_epi8(
+ -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1, -1, -1,
+ -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1,
+ );
+ let mask_a2 = _mm256_setr_epi8(
+ -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1, 12, 13,
+ -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+ );
+ let mask_b0 = _mm256_setr_epi8(
+ -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1,
+ -1, -1, -1, 8, 9, -1, -1, -1, -1,
+ );
+ let mask_b1 = _mm256_setr_epi8(
+ 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1,
+ -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5,
+ );
+ let mask_b2 = _mm256_setr_epi8(
+ -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1,
+ 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+ );
+ let mask_c0 = _mm256_setr_epi8(
+ -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1,
+ 6, 7, -1, -1, -1, -1, 8, 9, -1, -1,
+ );
+ let mask_c1 = _mm256_setr_epi8(
+ -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0,
+ 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1,
+ );
+ let mask_c2 = _mm256_setr_epi8(
+ 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1,
+ -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+ );
+
+ // Create duplicated vectors for lane swizzling
+ let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a);
+ let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b);
+ let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c);
+
+ let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a);
+ let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b);
+ let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c);
+
+ let out0 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a_dup_lo, mask_a0),
+ _mm256_shuffle_epi8(b_dup_lo, mask_b0),
+ ),
+ _mm256_shuffle_epi8(c_dup_lo, mask_c0),
+ );
+
+ let out1 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a, mask_a1),
+ _mm256_shuffle_epi8(b, mask_b1),
+ ),
+ _mm256_shuffle_epi8(c, mask_c1),
+ );
+
+ let out2 = _mm256_or_si256(
+ _mm256_or_si256(
+ _mm256_shuffle_epi8(a_dup_hi, mask_a2),
+ _mm256_shuffle_epi8(b_dup_hi, mask_b2),
+ ),
+ _mm256_shuffle_epi8(c_dup_hi, mask_c2),
+ );
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ _mm256_storeu_si256(dest_ptr.add(2), out2);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ #[target_feature(enable = "avx2")]
+ #[inline]
+ fn store_interleaved_4_impl(
+ a: __m256i,
+ b: __m256i,
+ c: __m256i,
+ d: __m256i,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 4 * U16VecAvx::LEN);
+ // First interleave pairs: ab and cd
+ let ab_lo = _mm256_unpacklo_epi16(a, b);
+ let ab_hi = _mm256_unpackhi_epi16(a, b);
+ let cd_lo = _mm256_unpacklo_epi16(c, d);
+ let cd_hi = _mm256_unpackhi_epi16(c, d);
+
+ // Then interleave the pairs to get 4-u16 chunks (8 bytes)
+ let out0_p = _mm256_unpacklo_epi32(ab_lo, cd_lo);
+ let out1_p = _mm256_unpackhi_epi32(ab_lo, cd_lo);
+ let out2_p = _mm256_unpacklo_epi32(ab_hi, cd_hi);
+ let out3_p = _mm256_unpackhi_epi32(ab_hi, cd_hi);
+
+ // Reorder lanes
+ let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p);
+ let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p);
+ let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p);
+ let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m256i>();
+ _mm256_storeu_si256(dest_ptr, out0);
+ _mm256_storeu_si256(dest_ptr.add(1), out1);
+ _mm256_storeu_si256(dest_ptr.add(2), out2);
+ _mm256_storeu_si256(dest_ptr.add(3), out3);
+ }
+ }
+ // SAFETY: avx2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
impl SimdMask for MaskAvx {
type Descriptor = AvxDescriptor;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
index 89086c50c9715..48bc32a61032b 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs
@@ -3,7 +3,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-use super::super::{AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{
+ AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec,
+};
use crate::{Sse42Descriptor, U32SimdVec, impl_f32_array_interface};
use std::{
arch::x86_64::*,
@@ -43,6 +45,8 @@ impl SimdDescriptor for Avx512Descriptor {
type F32Vec = F32VecAvx512;
type I32Vec = I32VecAvx512;
type U32Vec = U32VecAvx512;
+ type U8Vec = U8VecAvx512;
+ type U16Vec = U16VecAvx512;
type Mask = MaskAvx512;
type Bf16Table8 = Bf16Table8Avx512;
@@ -149,9 +153,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
let out0 = _mm512_permutex2var_ps(lo, idx_lo, hi);
let out1 = _mm512_permutex2var_ps(lo, idx_hi, hi);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm512_storeu_ps(dest_ptr, out0);
_mm512_storeu_ps(dest_ptr.add(16), out1);
}
@@ -192,9 +196,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
let out2 = _mm512_permutex2var_ps(a, idx_ab2, b);
let out2 = _mm512_mask_permutexvar_ps(out2, 0b1001001001001001, idx_c2, c);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm512_storeu_ps(dest_ptr, out0);
_mm512_storeu_ps(dest_ptr.add(16), out1);
_mm512_storeu_ps(dest_ptr.add(32), out2);
@@ -291,9 +295,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
let out1 = _mm512_permutex2var_ps(pair01_13, idx_0, pair23_13);
let out3 = _mm512_permutex2var_ps(pair01_13, idx_1, pair23_13);
- // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm512_storeu_ps(dest_ptr, out0);
_mm512_storeu_ps(dest_ptr.add(16), out1);
_mm512_storeu_ps(dest_ptr.add(32), out2);
@@ -428,9 +432,9 @@ unsafe impl F32SimdVec for F32VecAvx512 {
let out6 = _mm512_permutex2var_ps(full_0_13, idx_hi, full_1_13);
let out7 = _mm512_permutex2var_ps(full_2_13, idx_hi, full_3_13);
- // SAFETY: we just checked that dest has enough space.
+ // SAFETY: we just checked that dest has enough space. _mm512_storeu_ps supports unaligned stores.
unsafe {
- let ptr = dest.as_mut_ptr();
+ let ptr = dest.as_mut_ptr().cast::<f32>();
_mm512_storeu_ps(ptr, out0);
_mm512_storeu_ps(ptr.add(16), out1);
_mm512_storeu_ps(ptr.add(32), out2);
@@ -454,7 +458,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
assert!(src.len() >= 2 * F32VecAvx512::LEN);
// Input: [a0,b0,a1,b1,...,a15,b15]
// Output: a = [a0..a15], b = [b0..b15]
- // SAFETY: we just checked that src has enough space.
+ // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
let (in0, in1) = unsafe {
(
_mm512_loadu_ps(src.as_ptr()),
@@ -491,7 +495,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
// in2: [c10,a11,b11,c11,a12,b12,c12,a13,b13,c13,a14,b14,c14,a15,b15,c15]
// Output: a = [a0..a15], b = [b0..b15], c = [c0..c15]
- // SAFETY: we just checked that src has enough space.
+ // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
let (in0, in1, in2) = unsafe {
(
_mm512_loadu_ps(src.as_ptr()),
@@ -544,7 +548,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
assert!(src.len() >= 4 * F32VecAvx512::LEN);
// Input: [a0,b0,c0,d0,a1,b1,c1,d1,...] (64 floats)
// Output: a = [a0..a15], b = [b0..b15], c = [c0..c15], d = [d0..d15]
- // SAFETY: we just checked that src has enough space.
+ // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads.
let (in0, in1, in2, in3) = unsafe {
(
_mm512_loadu_ps(src.as_ptr()),
@@ -700,7 +704,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
// Store 16 bytes
// SAFETY: we checked dest has enough space
unsafe {
- _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u8s);
+ _mm_storeu_si128(dest.as_mut_ptr().cast(), u8s);
}
}
// SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor.
@@ -722,7 +726,7 @@ unsafe impl F32SimdVec for F32VecAvx512 {
// Store 16 u16s (32 bytes)
// SAFETY: we checked dest has enough space
unsafe {
- _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, u16s);
+ _mm256_storeu_si256(dest.as_mut_ptr().cast(), u16s);
}
}
// SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor.
@@ -738,8 +742,8 @@ unsafe impl F32SimdVec for F32VecAvx512 {
#[inline]
fn load_f16_impl(d: Avx512Descriptor, mem: &[u16]) -> F32VecAvx512 {
assert!(mem.len() >= F32VecAvx512::LEN);
- // SAFETY: mem.len() >= 16 is checked above
- let bits = unsafe { _mm256_loadu_si256(mem.as_ptr() as *const __m256i) };
+ // SAFETY: mem.len() >= 16 is checked above.
+ let bits = unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) };
F32VecAvx512(_mm512_cvtph_ps(bits), d)
}
// SAFETY: avx512f is available from the safety invariant on the descriptor
@@ -754,8 +758,8 @@ unsafe impl F32SimdVec for F32VecAvx512 {
fn store_f16_bits_impl(v: __m512, dest: &mut [u16]) {
assert!(dest.len() >= F32VecAvx512::LEN);
let bits = _mm512_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v);
- // SAFETY: dest.len() >= 16 is checked above
- unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, bits) };
+ // SAFETY: dest.len() >= 16 is checked above.
+ unsafe { _mm256_storeu_si256(dest.as_mut_ptr().cast(), bits) };
}
// SAFETY: avx512f is available from the safety invariant on the descriptor
unsafe { store_f16_bits_impl(self.0, dest) }
@@ -1070,6 +1074,22 @@ impl I32SimdVec for I32VecAvx512 {
// SAFETY: avx512f is available from the safety invariant on the descriptor.
unsafe { store_u16_impl(self.0, dest) }
}
+
+ #[inline(always)]
+ fn store_u8(self, dest: &mut [u8]) {
+ #[target_feature(enable = "avx512f")]
+ #[inline]
+ fn store_u8_impl(v: __m512i, dest: &mut [u8]) {
+ assert!(dest.len() >= I32VecAvx512::LEN);
+ let tmp_vec = _mm512_cvtepi32_epi8(v);
+ // SAFETY: We just checked `dst` has enough space.
+ unsafe {
+ _mm_storeu_si128(dest.as_mut_ptr().cast(), tmp_vec);
+ }
+ }
+ // SAFETY: avx512f is available from the safety invariant on the descriptor.
+ unsafe { store_u8_impl(self.0, dest) }
+ }
}
impl Add<I32VecAvx512> for I32VecAvx512 {
@@ -1204,6 +1224,398 @@ impl U32SimdVec for U32VecAvx512 {
}
}
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecAvx512(__m512i, Avx512Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecAvx512 {
+ type Descriptor = Avx512Descriptor;
+ const LEN: usize = 64;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+ // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads.
+ unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u8) -> Self {
+ // SAFETY: We know avx512f is available from the safety invariant on `d`.
+ unsafe { Self(_mm512_set1_epi8(v as i8), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u8]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+ // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores.
+ unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u8_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 2 * U8VecAvx512::LEN);
+ let lo = _mm512_unpacklo_epi8(a, b);
+ let hi = _mm512_unpackhi_epi8(a, b);
+ let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+ let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+ let out0 = _mm512_permutex2var_epi64(lo, idx0, hi);
+ let out1 = _mm512_permutex2var_epi64(lo, idx1, hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, out0);
+ _mm512_storeu_si512(ptr.add(1), out1);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u8_2(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u8_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 3 * U8VecAvx512::LEN);
+
+ let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5,
+ ));
+ let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1,
+ ));
+ let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1,
+ ));
+
+ let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1,
+ ));
+ let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10,
+ ));
+ let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1,
+ ));
+
+ let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+ ));
+ let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+ ));
+ let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+ ));
+
+ let res0 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a0),
+ _mm512_shuffle_epi8(b, mask_b0),
+ ),
+ _mm512_shuffle_epi8(c, mask_c0),
+ );
+ let res1 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a1),
+ _mm512_shuffle_epi8(b, mask_b1),
+ ),
+ _mm512_shuffle_epi8(c, mask_c1),
+ );
+ let res2 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a2),
+ _mm512_shuffle_epi8(b, mask_b2),
+ ),
+ _mm512_shuffle_epi8(c, mask_c2),
+ );
+ let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1);
+ let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1);
+ let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5);
+ let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2);
+ let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1);
+ let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2);
+ let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5);
+ let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0);
+ let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1);
+ let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0);
+ let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5);
+ let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, final0);
+ _mm512_storeu_si512(ptr.add(1), final1);
+ _mm512_storeu_si512(ptr.add(2), final2);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u8_3(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u8_4(a: __m512i, b: __m512i, c: __m512i, d: __m512i, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 4 * U8VecAvx512::LEN);
+ let ab_lo = _mm512_unpacklo_epi8(a, b);
+ let ab_hi = _mm512_unpackhi_epi8(a, b);
+ let cd_lo = _mm512_unpacklo_epi8(c, d);
+ let cd_hi = _mm512_unpackhi_epi8(c, d);
+
+ let abcd_0 = _mm512_unpacklo_epi16(ab_lo, cd_lo);
+ let abcd_1 = _mm512_unpackhi_epi16(ab_lo, cd_lo);
+ let abcd_2 = _mm512_unpacklo_epi16(ab_hi, cd_hi);
+ let abcd_3 = _mm512_unpackhi_epi16(ab_hi, cd_hi);
+
+ let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+ let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+
+ let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1);
+ let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1);
+ let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3);
+ let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3);
+
+ let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11);
+ let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15);
+
+ let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02);
+ let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02);
+ let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13);
+ let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, out0);
+ _mm512_storeu_si512(ptr.add(1), out1);
+ _mm512_storeu_si512(ptr.add(2), out2);
+ _mm512_storeu_si512(ptr.add(3), out3);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u8_4(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecAvx512(__m512i, Avx512Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecAvx512 {
+ type Descriptor = Avx512Descriptor;
+ const LEN: usize = 32;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+ // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads.
+ unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u16) -> Self {
+ // SAFETY: avx512 available.
+ unsafe { Self(_mm512_set1_epi16(v as i16), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u16]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available
+ // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores.
+ unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u16_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 2 * U16VecAvx512::LEN);
+ let lo = _mm512_unpacklo_epi16(a, b);
+ let hi = _mm512_unpackhi_epi16(a, b);
+ let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+ let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+ let out0 = _mm512_permutex2var_epi64(lo, idx0, hi);
+ let out1 = _mm512_permutex2var_epi64(lo, idx1, hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, out0);
+ _mm512_storeu_si512(ptr.add(1), out1);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u16_2(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u16_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 3 * U16VecAvx512::LEN);
+
+ let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1,
+ ));
+ let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5,
+ ));
+ let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1,
+ ));
+
+ let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11,
+ ));
+ let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1,
+ ));
+ let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1,
+ ));
+
+ let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+ ));
+ let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+ ));
+ let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8(
+ -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+ ));
+
+ let res0 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a0),
+ _mm512_shuffle_epi8(b, mask_b0),
+ ),
+ _mm512_shuffle_epi8(c, mask_c0),
+ );
+ let res1 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a1),
+ _mm512_shuffle_epi8(b, mask_b1),
+ ),
+ _mm512_shuffle_epi8(c, mask_c1),
+ );
+ let res2 = _mm512_or_si512(
+ _mm512_or_si512(
+ _mm512_shuffle_epi8(a, mask_a2),
+ _mm512_shuffle_epi8(b, mask_b2),
+ ),
+ _mm512_shuffle_epi8(c, mask_c2),
+ );
+
+ let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1);
+ let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1);
+ let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5);
+ let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2);
+
+ let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1);
+ let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2);
+ let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5);
+ let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0);
+
+ let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1);
+ let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0);
+ let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5);
+ let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, final0);
+ _mm512_storeu_si512(ptr.add(1), final1);
+ _mm512_storeu_si512(ptr.add(2), final2);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u16_3(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ #[target_feature(enable = "avx512f,avx512bw")]
+ #[inline]
+ fn impl_u16_4(
+ a: __m512i,
+ b: __m512i,
+ c: __m512i,
+ d: __m512i,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 4 * U16VecAvx512::LEN);
+ let ab_lo = _mm512_unpacklo_epi16(a, b);
+ let ab_hi = _mm512_unpackhi_epi16(a, b);
+ let cd_lo = _mm512_unpacklo_epi16(c, d);
+ let cd_hi = _mm512_unpackhi_epi16(c, d);
+
+ let abcd_0 = _mm512_unpacklo_epi32(ab_lo, cd_lo);
+ let abcd_1 = _mm512_unpackhi_epi32(ab_lo, cd_lo);
+ let abcd_2 = _mm512_unpacklo_epi32(ab_hi, cd_hi);
+ let abcd_3 = _mm512_unpackhi_epi32(ab_hi, cd_hi);
+
+ // Transpose 4x4 of 128-bit lanes (same as u8)
+ let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11);
+ let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15);
+
+ let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1);
+ let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1);
+ let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3);
+ let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3);
+
+ let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11);
+ let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15);
+
+ let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02);
+ let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02);
+ let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13);
+ let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m512i>();
+ _mm512_storeu_si512(ptr, out0);
+ _mm512_storeu_si512(ptr.add(1), out1);
+ _mm512_storeu_si512(ptr.add(2), out2);
+ _mm512_storeu_si512(ptr.add(3), out3);
+ }
+ }
+ // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`.
+ unsafe { impl_u16_4(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
impl SimdMask for MaskAvx512 {
type Descriptor = Avx512Descriptor;
diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
index b4021570c3f6d..5a4f52f4c30f9 100644
--- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
+++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs
@@ -5,7 +5,7 @@
use crate::{U32SimdVec, impl_f32_array_interface};
-use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask};
+use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec};
use std::{
arch::x86_64::*,
mem::MaybeUninit,
@@ -31,6 +31,8 @@ impl SimdDescriptor for Sse42Descriptor {
type F32Vec = F32VecSse42;
type I32Vec = I32VecSse42;
type U32Vec = U32VecSse42;
+ type U16Vec = U16VecSse42;
+ type U8Vec = U8VecSse42;
type Mask = MaskSse42;
type Bf16Table8 = Bf16Table8Sse42;
@@ -131,7 +133,7 @@ unsafe impl F32SimdVec for F32VecSse42 {
let hi = _mm_unpackhi_ps(a, b);
// SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm_storeu_ps(dest_ptr, lo);
_mm_storeu_ps(dest_ptr.add(4), hi);
}
@@ -184,7 +186,7 @@ unsafe impl F32SimdVec for F32VecSse42 {
// Store the results
// SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm_storeu_ps(dest_ptr, out0);
_mm_storeu_ps(dest_ptr.add(4), out1);
_mm_storeu_ps(dest_ptr.add(8), out2);
@@ -227,7 +229,7 @@ unsafe impl F32SimdVec for F32VecSse42 {
// SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid.
unsafe {
- let dest_ptr = dest.as_mut_ptr() as *mut f32;
+ let dest_ptr = dest.as_mut_ptr().cast::<f32>();
_mm_storeu_ps(dest_ptr, out0);
_mm_storeu_ps(dest_ptr.add(4), out1);
_mm_storeu_ps(dest_ptr.add(8), out2);
@@ -575,10 +577,15 @@ unsafe impl F32SimdVec for F32VecSse42 {
let u16s = _mm_packus_epi32(i32s, i32s);
let u8s = _mm_packus_epi16(u16s, u16s);
// Store lower 4 bytes
- // SAFETY: we checked dest has enough space
+ let val = _mm_cvtsi128_si32(u8s);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4].
+ // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4.
+ // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 4. `src` and `dst` do not overlap as `src` is a local stack array.
unsafe {
- let ptr = dest.as_mut_ptr() as *mut i32;
- *ptr = _mm_cvtsi128_si32(u8s);
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4);
}
}
// SAFETY: sse4.2 is available from the safety invariant on the descriptor.
@@ -598,9 +605,15 @@ unsafe impl F32SimdVec for F32VecSse42 {
// Pack i32 -> u16 (use same vector twice, take lower half)
let u16s = _mm_packus_epi32(i32s, i32s);
// Store lower 8 bytes (4 u16s)
- // SAFETY: we checked dest has enough space
+ let val = _mm_cvtsi128_si64(u16s);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+ // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes.
+ // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 4. `src` and `dst` do not overlap as `src` is a local stack array.
unsafe {
- _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u16s);
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
}
}
// SAFETY: sse4.2 is available from the safety invariant on the descriptor.
@@ -732,7 +745,7 @@ impl I32SimdVec for I32VecSse42 {
assert!(mem.len() >= Self::LEN);
// SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
// from the safety invariant on `d`.
- Self(unsafe { _mm_loadu_si128(mem.as_ptr() as *const _) }, d)
+ Self(unsafe { _mm_loadu_si128(mem.as_ptr().cast()) }, d)
}
#[inline(always)]
@@ -820,17 +833,50 @@ impl I32SimdVec for I32VecSse42 {
#[inline]
fn store_u16_impl(v: __m128i, dest: &mut [u16]) {
assert!(dest.len() >= I32VecSse42::LEN);
- // Use scalar loop since _mm_packs_epi32 would saturate incorrectly for unsigned values
- let mut tmp = [0i32; 4];
- // SAFETY: tmp has 4 elements, matching LEN
- unsafe { _mm_storeu_si128(tmp.as_mut_ptr() as *mut __m128i, v) };
- for i in 0..4 {
- dest[i] = tmp[i] as u16;
+ // Truncate i32 -> u16 using shuffle
+ let shuffle_mask =
+ _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1);
+ let u16s = _mm_shuffle_epi8(v, shuffle_mask);
+ let val = _mm_cvtsi128_si64(u16s);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8].
+ // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes.
+ // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 4. `src` and `dst` do not overlap as `src` is a local stack array.
+ unsafe {
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8);
}
}
// SAFETY: sse4.2 is available from the safety invariant on the descriptor.
unsafe { store_u16_impl(self.0, dest) }
}
+
+ #[inline(always)]
+ fn store_u8(self, dest: &mut [u8]) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_u8_impl(v: __m128i, dest: &mut [u8]) {
+ assert!(dest.len() >= I32VecSse42::LEN);
+ // Truncate i32 -> u8 using shuffle
+ let shuffle_mask =
+ _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+ let u8s = _mm_shuffle_epi8(v, shuffle_mask);
+ // Store lower 4 bytes
+ let val = _mm_cvtsi128_si32(u8s);
+ let bytes = val.to_ne_bytes();
+ // SAFETY:
+ // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4].
+ // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4.
+ // 3. `src` and `dst` are properly aligned for u8 (alignment 1).
+ // 4. `src` and `dst` do not overlap as `src` is a local stack array.
+ unsafe {
+ std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_u8_impl(self.0, dest) }
+ }
}
impl Add<I32VecSse42> for I32VecSse42 {
@@ -939,6 +985,312 @@ impl U32SimdVec for U32VecSse42 {
}
}
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U8VecSse42(__m128i, Sse42Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U8SimdVec for U8VecSse42 {
+ type Descriptor = Sse42Descriptor;
+ const LEN: usize = 16;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u8]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+ // from the safety invariant on `d`.
+ unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u8) -> Self {
+ // SAFETY: We know sse4.2 is available from the safety invariant on `d`.
+ unsafe { Self(_mm_set1_epi8(v as i8), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u8]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+ // from the safety invariant on `self.1`.
+ unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u8>]) {
+ assert!(dest.len() >= 2 * U8VecSse42::LEN);
+ let lo = _mm_unpacklo_epi8(a, b);
+ let hi = _mm_unpackhi_epi8(a, b);
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(dest_ptr, lo);
+ _mm_storeu_si128(dest_ptr.add(1), hi);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_3_impl(
+ a: __m128i,
+ b: __m128i,
+ c: __m128i,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ assert!(dest.len() >= 3 * U8VecSse42::LEN);
+
+ // Masks for out0
+ let mask_a0 = _mm_setr_epi8(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5);
+ let mask_b0 = _mm_setr_epi8(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1);
+ let mask_c0 = _mm_setr_epi8(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1);
+
+ // Masks for out1
+ let mask_a1 = _mm_setr_epi8(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1);
+ let mask_b1 = _mm_setr_epi8(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10);
+ let mask_c1 = _mm_setr_epi8(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1);
+
+ // Masks for out2
+ let mask_a2 = _mm_setr_epi8(
+ -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1,
+ );
+ let mask_b2 = _mm_setr_epi8(
+ -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1,
+ );
+ let mask_c2 = _mm_setr_epi8(
+ 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15,
+ );
+
+ let out0 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)),
+ _mm_shuffle_epi8(c, mask_c0),
+ );
+ let out1 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)),
+ _mm_shuffle_epi8(c, mask_c1),
+ );
+ let out2 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)),
+ _mm_shuffle_epi8(c, mask_c2),
+ );
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(ptr, out0);
+ _mm_storeu_si128(ptr.add(1), out1);
+ _mm_storeu_si128(ptr.add(2), out2);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_4_impl(
+ a: __m128i,
+ b: __m128i,
+ c: __m128i,
+ d: __m128i,
+ dest: &mut [MaybeUninit<u8>],
+ ) {
+ assert!(dest.len() >= 4 * U8VecSse42::LEN);
+ // First interleave pairs: ab and cd
+ let ab_lo = _mm_unpacklo_epi8(a, b);
+ let ab_hi = _mm_unpackhi_epi8(a, b);
+ let cd_lo = _mm_unpacklo_epi8(c, d);
+ let cd_hi = _mm_unpackhi_epi8(c, d);
+
+ // Then interleave the pairs to get final layout
+ let out0 = _mm_unpacklo_epi16(ab_lo, cd_lo);
+ let out1 = _mm_unpackhi_epi16(ab_lo, cd_lo);
+ let out2 = _mm_unpacklo_epi16(ab_hi, cd_hi);
+ let out3 = _mm_unpackhi_epi16(ab_hi, cd_hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(dest_ptr, out0);
+ _mm_storeu_si128(dest_ptr.add(1), out1);
+ _mm_storeu_si128(dest_ptr.add(2), out2);
+ _mm_storeu_si128(dest_ptr.add(3), out3);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+#[repr(transparent)]
+pub struct U16VecSse42(__m128i, Sse42Descriptor);
+
+// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*)
+// ensure that they write valid data to the output slice without reading uninitialized memory.
+unsafe impl U16SimdVec for U16VecSse42 {
+ type Descriptor = Sse42Descriptor;
+ const LEN: usize = 8;
+
+ #[inline(always)]
+ fn load(d: Self::Descriptor, mem: &[u16]) -> Self {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+ // from the safety invariant on `d`.
+ unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) }
+ }
+
+ #[inline(always)]
+ fn splat(d: Self::Descriptor, v: u16) -> Self {
+ // SAFETY: We know sse4.2 is available from the safety invariant on `d`.
+ unsafe { Self(_mm_set1_epi16(v as i16), d) }
+ }
+
+ #[inline(always)]
+ fn store(&self, mem: &mut [u16]) {
+ assert!(mem.len() >= Self::LEN);
+ // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available
+ // from the safety invariant on `self.1`.
+ unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u16>]) {
+ assert!(dest.len() >= 2 * U16VecSse42::LEN);
+ let lo = _mm_unpacklo_epi16(a, b);
+ let hi = _mm_unpackhi_epi16(a, b);
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(dest_ptr, lo);
+ _mm_storeu_si128(dest_ptr.add(1), hi);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_2_impl(a.0, b.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_3_impl(
+ a: __m128i,
+ b: __m128i,
+ c: __m128i,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 3 * U16VecSse42::LEN);
+
+ // Masks for out0
+ let mask_a0 = _mm_setr_epi8(0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1);
+ let mask_b0 = _mm_setr_epi8(-1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5);
+ let mask_c0 = _mm_setr_epi8(-1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1);
+
+ // Masks for out1
+ let mask_a1 = _mm_setr_epi8(-1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11);
+ let mask_b1 = _mm_setr_epi8(-1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1);
+ let mask_c1 = _mm_setr_epi8(4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1);
+
+ // Masks for out2
+ let mask_a2 = _mm_setr_epi8(
+ -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1,
+ );
+ let mask_b2 = _mm_setr_epi8(
+ 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1,
+ );
+ let mask_c2 = _mm_setr_epi8(
+ -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15,
+ );
+
+ let out0 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)),
+ _mm_shuffle_epi8(c, mask_c0),
+ );
+ let out1 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)),
+ _mm_shuffle_epi8(c, mask_c1),
+ );
+ let out2 = _mm_or_si128(
+ _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)),
+ _mm_shuffle_epi8(c, mask_c2),
+ );
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+ unsafe {
+ let ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(ptr, out0);
+ _mm_storeu_si128(ptr.add(1), out1);
+ _mm_storeu_si128(ptr.add(2), out2);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) }
+ }
+
+ #[inline(always)]
+ fn store_interleaved_4_uninit(
+ a: Self,
+ b: Self,
+ c: Self,
+ d: Self,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ #[target_feature(enable = "sse4.2")]
+ #[inline]
+ fn store_interleaved_4_impl(
+ a: __m128i,
+ b: __m128i,
+ c: __m128i,
+ d: __m128i,
+ dest: &mut [MaybeUninit<u16>],
+ ) {
+ assert!(dest.len() >= 4 * U16VecSse42::LEN);
+ // First interleave pairs: ab and cd
+ let ab_lo = _mm_unpacklo_epi16(a, b);
+ let ab_hi = _mm_unpackhi_epi16(a, b);
+ let cd_lo = _mm_unpacklo_epi16(c, d);
+ let cd_hi = _mm_unpackhi_epi16(c, d);
+
+ // Then interleave the pairs to get final layout
+ let out0 = _mm_unpacklo_epi32(ab_lo, cd_lo);
+ let out1 = _mm_unpackhi_epi32(ab_lo, cd_lo);
+ let out2 = _mm_unpacklo_epi32(ab_hi, cd_hi);
+ let out3 = _mm_unpackhi_epi32(ab_hi, cd_hi);
+
+ // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid.
+ unsafe {
+ let dest_ptr = dest.as_mut_ptr().cast::<__m128i>();
+ _mm_storeu_si128(dest_ptr, out0);
+ _mm_storeu_si128(dest_ptr.add(1), out1);
+ _mm_storeu_si128(dest_ptr.add(2), out2);
+ _mm_storeu_si128(dest_ptr.add(3), out3);
+ }
+ }
+ // SAFETY: sse4.2 is available from the safety invariant on the descriptor.
+ unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) }
+ }
+}
+
impl SimdMask for MaskSse42 {
type Descriptor = Sse42Descriptor;
diff --git a/third_party/rust/jxl/v0_3/BUILD.gn b/third_party/rust/jxl/v0_3/BUILD.gn
index 4407346d077db..87612e3a76cfa 100644
--- a/third_party/rust/jxl/v0_3/BUILD.gn
+++ b/third_party/rust/jxl/v0_3/BUILD.gn
@@ -28,11 +28,13 @@ cargo_crate("lib") {
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/signature.rs",
+ "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/bit_reader.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/mod.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/tf.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/box_header.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs",
+ "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/parse.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/context_map.rs",
@@ -53,6 +55,7 @@ cargo_crate("lib") {
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/color_correlation_map.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs",
+ "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs",
@@ -101,6 +104,7 @@ cargo_crate("lib") {
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/channels.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs",
+ "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs",
@@ -148,6 +152,7 @@ cargo_crate("lib") {
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/float16.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/linalg.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/log2.rs",
+ "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/ndarray.rs",
"//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/rational_poly.rs",
diff --git a/third_party/rust/jxl/v0_3/wrapper/lib.rs b/third_party/rust/jxl/v0_3/wrapper/lib.rs
index 0e7e83dc6f1d5..66696c1235d80 100644
--- a/third_party/rust/jxl/v0_3/wrapper/lib.rs
+++ b/third_party/rust/jxl/v0_3/wrapper/lib.rs
@@ -4,8 +4,12 @@
//! Minimal C++ wrapper for jxl-rs decoder.
//!
-//! This thin wrapper provides C++-compatible types for the jxl-rs decoder.
-//! State tracking is handled by the C++ caller (JXLImageDecoder).
+//! Two decoder types are exposed:
+//!
+//! - `JxlRsFrameScanner`: lightweight frame-header-only scanner that discovers
+//! frame count, durations, and seek offsets without decoding any pixels.
+//! - `JxlRsDecoder`: full pixel decoder with the original state-machine API,
+//! plus new seeking and progressive flush support.
use jxl::api::{
check_signature, Endianness, JxlBasicInfo, JxlColorEncoding, JxlColorProfile, JxlColorType,
@@ -62,15 +66,60 @@ mod ffi {
bytes_consumed: usize,
}
+ /// Information about a single visible frame discovered by the scanner.
+ #[derive(Debug, Clone)]
+ struct JxlRsVisibleFrameInfo {
+ /// Duration in milliseconds.
+ duration_ms: f64,
+ /// Whether this frame can be decoded independently (no dependencies).
+ is_keyframe: bool,
+ /// Whether this is the last frame in the codestream.
+ is_last: bool,
+ /// File byte offset to start feeding input from when seeking.
+ decode_start_file_offset: usize,
+ /// Box parser state at seek point (for container-wrapped files).
+ remaining_in_box: u64,
+ /// Number of visible frames to skip after seeking before decoding
+ /// the target.
+ visible_frames_to_skip: usize,
+ }
+
extern "Rust" {
+ // ---- Frame scanner (lightweight, no pixel decoding) ----
+ type JxlRsFrameScanner;
+
+ fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner>;
+
+ /// Feed data to the scanner. Returns Success when all frames have been
+ /// scanned (is_last seen), NeedMoreInput if more data is needed, or
+ /// Error on failure.
+ fn feed(
+ self: &mut JxlRsFrameScanner,
+ data: &[u8],
+ all_input: bool,
+ ) -> JxlRsProcessResult;
+
+ /// Get basic info (valid after first successful feed).
+ fn get_basic_info(self: &JxlRsFrameScanner) -> JxlRsBasicInfo;
+
+ /// Get ICC profile data.
+ fn get_icc_profile(self: &JxlRsFrameScanner) -> &[u8];
+
+ /// Number of visible frames discovered so far.
+ fn frame_count(self: &JxlRsFrameScanner) -> usize;
+
+ /// Get info for a specific frame index.
+ fn get_frame_info(self: &JxlRsFrameScanner, index: usize) -> JxlRsVisibleFrameInfo;
+
+ /// Whether basic info has been parsed.
+ fn has_basic_info(self: &JxlRsFrameScanner) -> bool;
+
+ // ---- Full pixel decoder ----
type JxlRsDecoder;
fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder>;
fn jxl_rs_signature_check(data: &[u8]) -> bool;
- /// Rewind decoder for animation loop replay.
- fn rewind(self: &mut JxlRsDecoder);
-
/// Set the output pixel format. Must be called after getting basic info.
fn set_pixel_format(
self: &mut JxlRsDecoder,
@@ -85,53 +134,198 @@ mod ffi {
all_input: bool,
) -> JxlRsProcessResult;
- /// Parse until next frame header is available. Returns Success if no more frames.
+ /// Parse until next frame header is available.
fn parse_frame_header(
self: &mut JxlRsDecoder,
data: &[u8],
all_input: bool,
) -> JxlRsProcessResult;
- /// Decode frame pixels into the provided buffer.
- fn decode_frame(
+ /// Decode frame pixels with custom stride (for direct frame buffer
+ /// decoding).
+ fn decode_frame_with_stride(
self: &mut JxlRsDecoder,
data: &[u8],
all_input: bool,
buffer: &mut [u8],
width: u32,
height: u32,
+ row_stride: usize,
) -> JxlRsProcessResult;
- /// Decode frame pixels with custom stride (for direct frame buffer decoding).
- fn decode_frame_with_stride(
+ /// Flush whatever pixels have been decoded so far into the buffer.
+ /// Use for progressive rendering.
+ fn flush_pixels(
self: &mut JxlRsDecoder,
- data: &[u8],
- all_input: bool,
buffer: &mut [u8],
width: u32,
height: u32,
row_stride: usize,
) -> JxlRsProcessResult;
- /// Get basic info (valid after parse_basic_info succeeds, or a decode
- /// call that yields BasicInfo).
+ /// Get basic info (valid after parse_basic_info succeeds).
fn get_basic_info(self: &JxlRsDecoder) -> JxlRsBasicInfo;
/// Get frame header (valid after parse_frame_header succeeds).
fn get_frame_header(self: &JxlRsDecoder) -> JxlRsFrameHeader;
/// Get ICC profile data (valid after parse_basic_info succeeds).
- /// Returns an empty slice if no embedded ICC profile exists.
fn get_icc_profile(self: &JxlRsDecoder) -> &[u8];
/// Check if more frames are available.
fn has_more_frames(self: &JxlRsDecoder) -> bool;
+
+ /// Seek the decoder to a specific frame using offsets from the scanner.
+ /// After calling this, provide input starting from
+ /// decode_start_file_offset. The decoder must have basic info parsed.
+ fn seek_to_frame(
+ self: &mut JxlRsDecoder,
+ remaining_in_box: u64,
+ );
+
+ /// Skip N visible frames without decoding pixels.
+ /// Use after seek_to_frame when visible_frames_to_skip > 0.
+ /// Returns Success when one frame has been skipped, NeedMoreInput or
+ /// Error otherwise.
+ fn skip_visible_frame(
+ self: &mut JxlRsDecoder,
+ data: &[u8],
+ all_input: bool,
+ ) -> JxlRsProcessResult;
}
}
use ffi::*;
-/// Thin wrapper around JxlDecoderInner.
+// ---------------------------------------------------------------------------
+// Frame Scanner
+// ---------------------------------------------------------------------------
+
+/// Lightweight scanner that discovers frame info without decoding pixels.
+pub struct JxlRsFrameScanner {
+ decoder: JxlDecoderInner,
+ icc_profile: Vec<u8>,
+ has_basic_info: bool,
+}
+
+fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner> {
+ let mut opts = JxlDecoderOptions::default();
+ opts.scan_frames_only = true;
+ if pixel_limit > 0 {
+ opts.pixel_limit = Some(pixel_limit as usize);
+ }
+
+ Box::new(JxlRsFrameScanner {
+ decoder: JxlDecoderInner::new(opts),
+ icc_profile: Vec::new(),
+ has_basic_info: false,
+ })
+}
+
+impl JxlRsFrameScanner {
+ fn feed(&mut self, data: &[u8], all_input: bool) -> JxlRsProcessResult {
+ let mut input = data;
+ let len_before = input.len();
+
+ loop {
+ match self.decoder.process(&mut input, None) {
+ Ok(ProcessingResult::Complete { .. }) => {
+ if !self.has_basic_info && self.decoder.basic_info().is_some() {
+ self.has_basic_info = true;
+ if let Some(profile) = self.decoder.output_color_profile() {
+ if let Some(icc) = profile.try_as_icc() {
+ if !icc.is_empty() {
+ self.icc_profile = icc.into_owned();
+ }
+ }
+ }
+ }
+
+ if !self.decoder.has_more_frames() {
+ return JxlRsProcessResult {
+ status: JxlRsStatus::Success,
+ bytes_consumed: len_before - input.len(),
+ };
+ }
+ }
+ Ok(ProcessingResult::NeedsMoreInput { .. }) => {
+ return JxlRsProcessResult {
+ status: if all_input {
+ JxlRsStatus::Error
+ } else {
+ JxlRsStatus::NeedMoreInput
+ },
+ bytes_consumed: len_before - input.len(),
+ };
+ }
+ Err(_) => {
+ return JxlRsProcessResult {
+ status: JxlRsStatus::Error,
+ bytes_consumed: 0,
+ };
+ }
+ }
+ }
+ }
+
+ fn get_basic_info(&self) -> JxlRsBasicInfo {
+ let mut info = self
+ .decoder
+ .basic_info()
+ .map(JxlRsBasicInfo::from)
+ .unwrap_or_default();
+
+ if let Some(profile) = self.decoder.embedded_color_profile() {
+ info.is_grayscale = matches!(
+ profile,
+ JxlColorProfile::Simple(JxlColorEncoding::GrayscaleColorSpace { .. })
+ );
+ }
+
+ info
+ }
+
+ fn get_icc_profile(&self) -> &[u8] {
+ &self.icc_profile
+ }
+
+ fn frame_count(&self) -> usize {
+ self.decoder.scanned_frames().len()
+ }
+
+ fn get_frame_info(&self, index: usize) -> JxlRsVisibleFrameInfo {
+ let frames = self.decoder.scanned_frames();
+ if index >= frames.len() {
+ return JxlRsVisibleFrameInfo {
+ duration_ms: 0.0,
+ is_keyframe: false,
+ is_last: false,
+ decode_start_file_offset: 0,
+ remaining_in_box: 0,
+ visible_frames_to_skip: 0,
+ };
+ }
+ let f = &frames[index];
+ JxlRsVisibleFrameInfo {
+ duration_ms: f.duration_ms,
+ is_keyframe: f.is_keyframe,
+ is_last: f.is_last,
+ decode_start_file_offset: f.seek_target.decode_start_file_offset,
+ remaining_in_box: f.seek_target.remaining_in_box,
+ visible_frames_to_skip: f.seek_target.visible_frames_to_skip,
+ }
+ }
+
+ fn has_basic_info(&self) -> bool {
+ self.has_basic_info
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Full Pixel Decoder
+// ---------------------------------------------------------------------------
+
+/// Full pixel decoder with seeking and progressive flush support.
pub struct JxlRsDecoder {
decoder: JxlDecoderInner,
pixel_format: Option<JxlPixelFormat>,
@@ -140,7 +334,7 @@ pub struct JxlRsDecoder {
fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder> {
let mut opts = JxlDecoderOptions::default();
- opts.progressive_mode = JxlProgressiveMode::FullFrame;
+ opts.progressive_mode = JxlProgressiveMode::Pass;
opts.premultiply_output = premultiply_alpha;
if pixel_limit > 0 {
opts.pixel_limit = Some(pixel_limit as usize);
@@ -162,10 +356,6 @@ fn jxl_rs_signature_check(data: &[u8]) -> bool {
}
impl JxlRsDecoder {
- fn rewind(&mut self) {
- let _ = self.decoder.rewind();
- }
-
fn set_pixel_format(&mut self, format: JxlRsPixelFormat, num_extra_channels: u32) {
let pixel_format = match format {
JxlRsPixelFormat::Rgba8 => JxlPixelFormat {
@@ -216,9 +406,6 @@ impl JxlRsDecoder {
match self.decoder.process(&mut input, None) {
Ok(ProcessingResult::Complete { .. }) => {
- // Extract ICC profile on first successful parse.
- // Use try_as_icc() which returns None on error instead of
- // as_icc() which panics on malformed color profiles.
if self.icc_profile.is_empty() {
if let Some(profile) = self.decoder.output_color_profile() {
if let Some(icc) = profile.try_as_icc() {
@@ -282,22 +469,17 @@ impl JxlRsDecoder {
}
}
- fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> {
- let fh = self.decoder.frame_header()?;
- Some(JxlRsFrameHeader {
- duration_ms: fh.duration.unwrap_or(0.0),
- name_length: fh.name.len() as u32,
- })
- }
-
- fn decode_frame(
+ fn decode_frame_with_stride(
&mut self,
data: &[u8],
all_input: bool,
buffer: &mut [u8],
width: u32,
height: u32,
+ row_stride: usize,
) -> JxlRsProcessResult {
+ use std::mem::MaybeUninit;
+
let mut input = data;
let len_before = input.len();
@@ -308,8 +490,8 @@ impl JxlRsDecoder {
.map(|d| d.bytes_per_sample() * 4)
.unwrap_or(4);
let bytes_per_row = width as usize * bytes_per_pixel;
- let expected_size = bytes_per_row * height as usize;
+ let expected_size = row_stride * (height as usize - 1) + bytes_per_row;
if buffer.len() < expected_size {
return JxlRsProcessResult {
status: JxlRsStatus::Error,
@@ -317,7 +499,16 @@ impl JxlRsDecoder {
};
}
- let output = JxlOutputBuffer::new(buffer, height as usize, bytes_per_row);
+ // SAFETY: The buffer is valid for writes, and we've verified it has
+ // enough space.
+ let output = unsafe {
+ JxlOutputBuffer::new_from_ptr(
+ buffer.as_mut_ptr() as *mut MaybeUninit<u8>,
+ height as usize,
+ bytes_per_row,
+ row_stride,
+ )
+ };
match self.decoder.process(&mut input, Some(&mut [output])) {
Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
@@ -344,10 +535,8 @@ impl JxlRsDecoder {
}
}
- fn decode_frame_with_stride(
+ fn flush_pixels(
&mut self,
- data: &[u8],
- all_input: bool,
buffer: &mut [u8],
width: u32,
height: u32,
@@ -355,9 +544,6 @@ impl JxlRsDecoder {
) -> JxlRsProcessResult {
use std::mem::MaybeUninit;
- let mut input = data;
- let len_before = input.len();
-
let bytes_per_pixel = self
.pixel_format
.as_ref()
@@ -366,7 +552,6 @@ impl JxlRsDecoder {
.unwrap_or(4);
let bytes_per_row = width as usize * bytes_per_pixel;
- // Validate buffer size with custom stride
let expected_size = row_stride * (height as usize - 1) + bytes_per_row;
if buffer.len() < expected_size {
return JxlRsProcessResult {
@@ -375,8 +560,6 @@ impl JxlRsDecoder {
};
}
- // SAFETY: The buffer is valid for writes, and we've verified it has enough space.
- // new_from_ptr allows custom stride (bytes_between_rows).
let output = unsafe {
JxlOutputBuffer::new_from_ptr(
buffer.as_mut_ptr() as *mut MaybeUninit<u8>,
@@ -386,24 +569,11 @@ impl JxlRsDecoder {
)
};
- match self.decoder.process(&mut input, Some(&mut [output])) {
- Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
+ match self.decoder.flush_pixels(&mut [output]) {
+ Ok(()) => JxlRsProcessResult {
status: JxlRsStatus::Success,
- bytes_consumed: len_before - input.len(),
+ bytes_consumed: 0,
},
- Ok(ProcessingResult::NeedsMoreInput { .. }) => {
- if all_input {
- JxlRsProcessResult {
- status: JxlRsStatus::Error,
- bytes_consumed: 0,
- }
- } else {
- JxlRsProcessResult {
- status: JxlRsStatus::NeedMoreInput,
- bytes_consumed: len_before - input.len(),
- }
- }
- }
Err(_) => JxlRsProcessResult {
status: JxlRsStatus::Error,
bytes_consumed: 0,
@@ -418,7 +588,6 @@ impl JxlRsDecoder {
.map(JxlRsBasicInfo::from)
.unwrap_or_default();
- // Check if the image is grayscale based on the embedded color profile.
if let Some(profile) = self.decoder.embedded_color_profile() {
info.is_grayscale = matches!(
profile,
@@ -429,6 +598,14 @@ impl JxlRsDecoder {
info
}
+ fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> {
+ let fh = self.decoder.frame_header()?;
+ Some(JxlRsFrameHeader {
+ duration_ms: fh.duration.unwrap_or(0.0),
+ name_length: fh.name.len() as u32,
+ })
+ }
+
fn get_frame_header(&self) -> JxlRsFrameHeader {
self.extract_frame_header().unwrap_or_default()
}
@@ -440,6 +617,60 @@ impl JxlRsDecoder {
fn has_more_frames(&self) -> bool {
self.decoder.has_more_frames()
}
+
+ fn seek_to_frame(&mut self, remaining_in_box: u64) {
+ self.decoder.start_new_frame(remaining_in_box);
+ }
+
+ fn skip_visible_frame(
+ &mut self,
+ data: &[u8],
+ all_input: bool,
+ ) -> JxlRsProcessResult {
+ let mut input = data;
+ let len_before = input.len();
+
+ // Phase 1: process to get frame header (WithImageInfo -> WithFrameInfo)
+ match self.decoder.process(&mut input, None) {
+ Ok(ProcessingResult::Complete { .. }) => {}
+ Ok(ProcessingResult::NeedsMoreInput { .. }) => {
+ return JxlRsProcessResult {
+ status: if all_input {
+ JxlRsStatus::Error
+ } else {
+ JxlRsStatus::NeedMoreInput
+ },
+ bytes_consumed: len_before - input.len(),
+ };
+ }
+ Err(_) => {
+ return JxlRsProcessResult {
+ status: JxlRsStatus::Error,
+ bytes_consumed: 0,
+ };
+ }
+ }
+
+ // Phase 2: skip frame (WithFrameInfo -> WithImageInfo)
+ match self.decoder.process(&mut input, None) {
+ Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult {
+ status: JxlRsStatus::Success,
+ bytes_consumed: len_before - input.len(),
+ },
+ Ok(ProcessingResult::NeedsMoreInput { .. }) => JxlRsProcessResult {
+ status: if all_input {
+ JxlRsStatus::Error
+ } else {
+ JxlRsStatus::NeedMoreInput
+ },
+ bytes_consumed: len_before - input.len(),
+ },
+ Err(_) => JxlRsProcessResult {
+ status: JxlRsStatus::Error,
+ bytes_consumed: 0,
+ },
+ }
+ }
}
impl Default for JxlRsBasicInfo {
@@ -486,8 +717,6 @@ impl From<&JxlBasicInfo> for JxlRsBasicInfo {
animation_tps_denominator: tps_den,
uses_original_profile: info.uses_original_profile,
orientation: info.orientation as u32,
- // Note: is_grayscale is set by get_basic_info() after checking the
- // color profile, since JxlBasicInfo doesn't contain color info.
is_grayscale: false,
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment