Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fuzz/fuzz_targets/buf_independent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ fn assert_same_info(lhs: &png::Info, rhs: &png::Info) {
trait IteratorExtensionsForFuzzing: Iterator + Sized {
/// Verifies that either 1) all items in the iterator are `Ok(_)` or 2) all items in the
/// iterator are `Err(_)`. Passes through unmodified iterator items.
#[track_caller]
fn assert_all_results_are_consistent<T>(self) -> impl Iterator<Item = Self::Item>
where
Self: Iterator<Item = Result<T, png::DecodingError>>,
Expand Down Expand Up @@ -321,6 +322,7 @@ trait IteratorExtensionsForFuzzing: Iterator + Sized {

/// Verifies that all items in the iterator are the same (according to their `Eq`
/// implementation). Returns one of the items.
#[track_caller]
fn assert_all_items_are_equal(self) -> Self::Item
where
Self::Item: Debug + Eq,
Expand All @@ -330,6 +332,7 @@ trait IteratorExtensionsForFuzzing: Iterator + Sized {

/// Verifies that all items in the iterator are the same (according to the `assert_same`
/// function. Returns one of the items.
#[track_caller]
fn assert_all_items_are_same<F>(self, mut assert_same: F) -> <Self as Iterator>::Item
where
F: for<'a, 'b> FnMut(&'a Self::Item, &'b Self::Item),
Expand Down
31 changes: 26 additions & 5 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ impl<R: BufRead + Seek> Reader<R> {
) -> Result<(), DecodingError> {
self.next_raw_interlaced_row(rowlen)?;
let row = self.unfiltering_buffer.prev_row();
assert_eq!(row.len(), rowlen - 1);
let row = &row[..rowlen - 1];

// Apply transformations and write resulting data to buffer.
let transform_fn = {
Expand Down Expand Up @@ -679,17 +679,38 @@ impl<R: BufRead + Seek> Reader<R> {
/// Unfilter the next raw interlaced row into `self.unfiltering_buffer`.
fn next_raw_interlaced_row(&mut self, rowlen: usize) -> Result<(), DecodingError> {
// Read image data until we have at least one full row (but possibly more than one).
while self.unfiltering_buffer.curr_row_len() < rowlen {
while self.unfiltering_buffer.curr_row_len_with_unfilter_ahead() < rowlen as isize {
if self.subframe.consumed_and_flushed {
return Err(DecodingError::Format(
FormatErrorInner::NoMoreImageData.into(),
));
}

let mut buffer = self.unfiltering_buffer.as_unfilled_buffer();
match self.decoder.decode_image_data(Some(&mut buffer))? {
ImageDataCompletionStatus::ExpectingMoreData => (),
ImageDataCompletionStatus::Done => self.mark_subframe_as_consumed_and_flushed(),
match self.decoder.decode_image_data(Some(&mut buffer)) {
Ok(ImageDataCompletionStatus::ExpectingMoreData) => (),
Ok(ImageDataCompletionStatus::Done) => self.mark_subframe_as_consumed_and_flushed(),
Err(DecodingError::IoError(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
// We only have partial data but we want to make as much data available as
// possible. The immediate problem is now that zlib requires a lookback buffer
// of prior data which must stay available while our filter code will want to
// modify the data in the rowlines (i.e. prediction). So what to do?
//
// A copy of the data is necessary unless all filters are no-op. We must decide
// how to do this copy without interfering with the main code paths. The
// resulting unfilter operations puts the indices in the buffer into a weird
// state at the cost of having to shuffle data around to make the actual data
// valid for the following rows.
return if self
.unfiltering_buffer
.unfilter_ahead_row(rowlen, self.bpp)?
{
Ok(())
} else {
Err(DecodingError::IoError(e))
};
}
Err(e) => return Err(e),
}
}

Expand Down
155 changes: 145 additions & 10 deletions src/decoder/unfiltering_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,16 @@ impl UnfilteringBuffer {
/// Asserts in debug builds that all the invariants hold. No-op in release
/// builds. Intended to be called after creating or mutating `self` to
/// ensure that the final state preserves the invariants.
#[track_caller]
fn debug_assert_invariants(&self) {
// The previous row pointer is always behind the current row pointer.
debug_assert!(self.prev_start <= self.current_start);
debug_assert!(self.current_start <= self.available);
// The current row pointer is always into filled bytes (but sometimes may point to
// read-only back reference buffer, see `curr_row_len_with_unfilter_ahead`).
debug_assert!(self.current_start <= self.filled);
// The mut-available region is always well-defined.
debug_assert!(self.available <= self.filled);
// The logically filled region is always within bounds of the data stream.
debug_assert!(self.filled <= self.data_stream.len());
}

Expand Down Expand Up @@ -111,6 +117,7 @@ impl UnfilteringBuffer {
self.current_start = 0;
self.filled = 0;
self.available = 0;
self.debug_assert_invariants();
}

/// Returns the previous (already `unfilter`-ed) row.
Expand All @@ -123,6 +130,20 @@ impl UnfilteringBuffer {
self.available - self.current_start
}

/// Returns how many bytes of the current row are available in the buffer.
///
/// Why negative? There's a backreference section in the buffer that is reserved for the zlib /
/// deflate decoder (or any other for that matter). We must not modify that part of the data
/// but if we only *read* the row then the start of our current_start is allowed to run _into_
/// that section. So as a less efficient fallback where partial data is crucial we can unfilter
/// outside the read area instead of in-place but still indicate how many bytes we have
/// consumed. In that case, this number is negative.
pub(crate) fn curr_row_len_with_unfilter_ahead(&self) -> isize {
// Both are indices into the buffer that is an allocation of bytes. Allocations are at most
// isize::MAX in size hence this is valid.
self.available as isize - self.current_start as isize
}

/// Returns a `&mut Vec<u8>` suitable for passing to
/// `ReadDecoder.decode_image_data` or `StreamingDecoder.update`.
///
Expand All @@ -132,7 +153,9 @@ impl UnfilteringBuffer {
/// invariants by returning an append-only view of the vector
/// (`FnMut(&[u8])`??? or maybe `std::io::Write`???).
pub fn as_unfilled_buffer(&mut self) -> UnfilterBuf<'_> {
if self.prev_start >= self.shift_back_limit
let shift_back = self.prev_start.min(self.available);

if shift_back >= self.shift_back_limit
// Avoid the shift back if the buffer is still very empty. Consider how we got here: a
// previous decompression filled the buffer, then we unfiltered, we're now refilling
// the buffer again. The condition implies, the previous decompression filled at most
Expand All @@ -147,18 +170,21 @@ impl UnfilteringBuffer {
// question if we could be a little smarter and avoid crossing page boundaries when
// that is not required. Alas, microbenchmarking TBD.
if let Some(16..) = self.data_stream.len().checked_sub(self.filled) {
self.data_stream
.copy_within(self.prev_start..self.filled, 0);
self.data_stream.copy_within(shift_back..self.filled, 0);
} else {
self.data_stream.copy_within(self.prev_start.., 0);
self.data_stream.copy_within(shift_back.., 0);
}

self.debug_assert_invariants();

// The data kept its relative position to `filled` which now lands exactly at
// the distance between prev_start and filled.
self.current_start -= self.prev_start;
self.available -= self.prev_start;
self.filled -= self.prev_start;
self.prev_start = 0;
self.current_start -= shift_back;
self.available -= shift_back;
self.filled -= shift_back;
self.prev_start -= shift_back;

self.debug_assert_invariants();
}

if self.filled + Self::GROWTH_BYTES > self.data_stream.len() {
Expand All @@ -175,17 +201,26 @@ impl UnfilteringBuffer {
/// Runs `unfilter` on the current row, and then shifts rows so that the current row becomes the previous row.
///
/// Will panic if `self.curr_row_len() < rowlen`.
///
/// For correctness this also assumes that `curr_row_len_with_unfilter_ahead` is greater than
/// `rowlen`.
pub fn unfilter_curr_row(
&mut self,
rowlen: usize,
bpp: BytesPerPixel,
) -> Result<(), DecodingError> {
debug_assert!(rowlen >= 2); // 1 byte for `FilterType` and at least 1 byte of pixel data.
debug_assert_eq!(rowlen as isize as usize, rowlen);
debug_assert!(self.curr_row_len_with_unfilter_ahead() >= rowlen as isize);

let (prev, row) = self.data_stream.split_at_mut(self.current_start);
let prev: &[u8] = &prev[self.prev_start..];

debug_assert!(prev.is_empty() || prev.len() == (rowlen - 1));
let prev = if prev.is_empty() {
prev
} else {
&prev[..rowlen - 1]
};

// Get the filter type.
let filter = RowFilter::from_u8(row[0]).ok_or(DecodingError::Format(
Expand All @@ -202,6 +237,106 @@ impl UnfilteringBuffer {

Ok(())
}

/// Unfilter but allow the current_start to exceed `available` at the cost of some compute.
/// This will be called when we encounter an `UnexpectedEof` and want to push out all
/// interlaced rows that we can, i.e. it is not in the usual critical path of decoding.
pub(crate) fn unfilter_ahead_row(
&mut self,
rowlen: usize,
bpp: BytesPerPixel,
) -> Result<bool, DecodingError> {
if self.filled - self.current_start < rowlen {
return Ok(false);
}

// We can not really mutate the row data to unfilter it! So where do we put the unfiltered
// row then? In our interface it should occur at `self.prev_start` after this method is
// finished. So really simple, we just copy it back there.
//
// There is of course subtlety. First we need to make sure that the buffer of the previous
// row does not overlap with the data for the decoder as we will overwrite it. That is
// usually trivial, when it was previously unfiltered we had already mutated it so just
// reuse, except that at the first line of each interlace pass we start with an _empty_
// previous row and consequently need to potentially move all our data further back.

// The fixup discussed. Make space for a previous row. It does not matter where we put it
// so just put it right before the minimum of `current_start` and `available`. In this case
// however we should also pass an empty row to `unfilter`.
if self.prev_start == self.current_start {
let potential_end_of_new_prev = self.current_start.min(self.available);
// Insert free space between what we treat as the previous row and the current row.
// NOTE: this is because of the decoder's `commit` function which will take a fixed
// window of data back from its filled state before it is done. But we move that region
// upwards so it may erroneously point backwards by more than necessary. That just
// freezes data but it might also freeze a portion that we are using as the unfilter
// buffer / the space to put the 'previous row'.
let padding = rowlen - 1;

let start_of_new_prev = potential_end_of_new_prev.saturating_sub(padding);
let end_of_new_prev = start_of_new_prev + padding;

// Shift everything up as required.
let current_shift = end_of_new_prev - potential_end_of_new_prev;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if instead of reusing/shifting self.data_stream it may be a bit easier/clearer if a separate buffer (or two) would be introduced as a field of UnfilteringBuffer? Say copied_curr_row: Vec<u8> and copied_prev_row: Option<Vec<u8>> or something like that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and if we do that, then I guess the type of the current_start and prev_start fields would need to change from usize to something like Either<usize> (where Either::Left means the old behavior - data is in data_stream and where Either::Right means that the rows are in the new buffers).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type change in current_start was the main concern that drove me away from that concept. I wanted to keep influence on the main path as low as possible, having an Either would be an extra match / conditional read on every single path accessing current_start—even the happy one. I can benchmark it but intuitively, it will be quite visible.


self.data_stream.splice(
start_of_new_prev..start_of_new_prev,
core::iter::repeat(0u8).take(current_shift),
);

self.current_start += current_shift;
// Temporary in case of error.
self.prev_start = self.current_start;
self.available += current_shift;
self.filled += current_shift;

self.debug_assert_invariants();

let (prev, row) = self.data_stream.split_at_mut(self.current_start);
let prev = &mut prev[start_of_new_prev..][..rowlen - 1];

let filter = RowFilter::from_u8(row[0]).ok_or(DecodingError::Format(
FormatErrorInner::UnknownFilterMethod(row[0]).into(),
))?;

prev.copy_from_slice(&row[1..rowlen]);
unfilter(filter, bpp, &[], prev);

self.prev_start = start_of_new_prev;
self.current_start += rowlen;
self.debug_assert_invariants();

Ok(true)
} else {
let (prev, row) = self.data_stream.split_at_mut(self.current_start);

assert!(
self.available - self.prev_start >= rowlen - 1,
"prev {prev}, cur {cur}, avail {avail}, fill {fill}, rowlen {rowlen}",
prev = self.prev_start,
cur = self.current_start,
avail = self.available,
fill = self.filled,
);

let prev = &mut prev[self.prev_start..][..rowlen - 1];

let filter = RowFilter::from_u8(row[0]).ok_or(DecodingError::Format(
FormatErrorInner::UnknownFilterMethod(row[0]).into(),
))?;

// Unfilter this in a temporary buffer.
let mut row = row[1..rowlen].to_vec();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: IIUC to_vec will always allocate here. Would it be possible to avoid doing such an allocation for ~every row in a partially read input? Can we allocate once and then reuse the allocation?

unfilter(filter, bpp, prev, &mut row);
prev.copy_from_slice(&row);

// Do NOT modify prev_start
self.current_start += rowlen;
self.debug_assert_invariants();

Ok(true)
}
}
}

fn checked_next_multiple_of(val: usize, factor: usize) -> Option<usize> {
Expand Down
2 changes: 1 addition & 1 deletion src/decoder/zlib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ impl UnfilterBuf<'_> {
}

pub(crate) fn commit(&mut self, howmany: usize) {
*self.available = howmany;
*self.available = howmany.max(*self.available);
}

pub(crate) fn flush_allocate(&mut self) {
Expand Down
34 changes: 34 additions & 0 deletions tests/partial_decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use png::{Decoder, DecodingError};

#[test]
fn test_partial_decode() {
// The first 0x8D bytes from the following test image from Skia:
// resources/images/apng-test-suite--dispose-ops--none-basic.png
let partial_png: &[u8] = &[
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44,
0x52, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x40, 0x08, 0x06, 0x00, 0x00, 0x00, 0xd2,
0xd6, 0x7f, 0x7f, 0x00, 0x00, 0x00, 0x08, 0x61, 0x63, 0x54, 0x4c, 0x00, 0x00, 0x00, 0x03,
0x00, 0x00, 0x00, 0x01, 0xb9, 0xea, 0x8a, 0x56, 0x00, 0x00, 0x00, 0x1a, 0x66, 0x63, 0x54,
0x4c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x64, 0x00, 0x01, 0x26, 0x12, 0x2f,
0xe0, 0x00, 0x00, 0x00, 0x93, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9c, 0xed, 0xd2, 0xa1, 0x01,
0x00, 0x30, 0x10, 0x84, 0xb0, 0xdb, 0x7f, 0xe9, 0xef, 0x18, 0x15, 0x44, 0xc4, 0x23, 0xd8,
0x6d, 0x47, 0xd7, 0x7e, 0x07, 0x60, 0x00, 0x0c, 0x80, 0x01, 0x30, 0x00, 0x06, 0xc0, 0x00,
0x18, 0x00, 0x03, 0x60, 0x00, 0x0c,
];

let mut reader = Decoder::new(std::io::Cursor::new(partial_png))
.read_info()
.unwrap();
let mut row = vec![0; reader.output_buffer_size().unwrap()];
for i in 0..10 {
let result = reader.read_row(&mut row);
assert!(matches!(result, Ok(_)), "{result:?} at {i}");
}

let result = reader.read_row(&mut row);
let DecodingError::IoError(err) = result.unwrap_err() else {
panic!("Unexpected error variant");
};
assert_eq!(err.kind(), std::io::ErrorKind::UnexpectedEof);
}
Loading