Skip to content

Commit

Permalink
Better handling of chunks when decoding streaming data. (#24)
Browse files Browse the repository at this point in the history
This PR adds a test generating lots of small chunks in the encoding.
The idea is to stress this part of the code to make bugs easier to spot.

- Stop reading past input in SnappyDecompressor on chunk boundary.
- Copy buffer into correct inidex of partially created scratch
  on chunk boundary.
- Reset _scratchLength after use on chunk boundary.
  • Loading branch information
rodo-r2r authored Nov 2, 2020
1 parent 66c8ac0 commit c9ddcf0
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 7 deletions.
52 changes: 51 additions & 1 deletion Snappier.Tests/SnappyStreamTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.IO;
using System;
using System.IO;
using System.IO.Compression;
using System.Text;
using System.Threading.Tasks;
Expand Down Expand Up @@ -101,5 +102,54 @@ public async Task CompressAndDecompressAsync(string filename)

Assert.Equal(sourceText, decompressedText);
}

[Theory]
[InlineData("alice29.txt")]
[InlineData("asyoulik.txt")]
[InlineData("fireworks.jpeg")]
[InlineData("geo.protodata")]
[InlineData("html")]
[InlineData("html_x_4")]
[InlineData("kppkn.gtb")]
[InlineData("lcet10.txt")]
[InlineData("paper-100k.pdf")]
[InlineData("plrabn12.txt")]
[InlineData("urls.10K")]
// Test writing lots of small chunks to catch errors where reading needs to break mid-chunk.
public void CompressAndDecompressChunkStressTest(string filename)
{
var resource = typeof(SnappyStreamTests).Assembly.GetManifestResourceStream($"Snappier.Tests.TestData.{filename}");
using var resourceMem = new MemoryStream();
resource.CopyTo(resourceMem);
var originalBytes = resourceMem.ToArray();

var rand = new Random(123);

using var compresed = new MemoryStream();
using (var inputStream = new MemoryStream(originalBytes))
using (var compressor = new SnappyStream(compresed, CompressionMode.Compress, true))
{
// Write lots of small randomly sized chunks to increase change of hitting error conditions.
byte[] buffer = new byte[100];
var requestedSize = rand.Next(1, buffer.Length);
int n;
while ((n = inputStream.Read(buffer.AsSpan(0, requestedSize))) != 0)
{
compressor.Write(buffer.AsSpan(0, n));
// Flush after every write so we get lots of small chunks in the compressed output.
compressor.Flush();
}
}
compresed.Position = 0;

using var decompressed = new MemoryStream();
using (var decompressor = new SnappyStream(compresed, CompressionMode.Decompress, true))
{
decompressor.CopyTo(decompressed);
}

Assert.Equal(originalBytes.Length, decompressed.Length);
Assert.Equal(originalBytes, decompressed.ToArray());
}
}
}
2 changes: 1 addition & 1 deletion Snappier/Internal/SnappyDecompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public void Reset()
bool foundEnd = false;

var i = 0;
while (input.Length > 0)
while (input.Length > i)
{
byte c = input[i];
i += 1;
Expand Down
12 changes: 7 additions & 5 deletions Snappier/Internal/SnappyStreamDecompressor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ private unsafe uint ReadChunkHeader(ref byte* buffer, byte* bufferEnd)
var bytesToCopyToScratch = 4 - _scratchLength;
fixed (byte* scratch = _scratch)
{
Buffer.MemoryCopy(buffer, scratch, ScratchBufferSize, bytesToCopyToScratch);
Buffer.MemoryCopy(buffer, scratch + _scratchLength, ScratchBufferSize, bytesToCopyToScratch);

buffer += bytesToCopyToScratch;
_scratchLength += bytesToCopyToScratch;
Expand Down Expand Up @@ -280,15 +280,17 @@ private unsafe bool ReadChunkCrc(ref byte* inputPtr, byte* inputEnd, ref int chu
}

// Copy to scratch
new ReadOnlySpan<byte>(inputPtr, bytesAvailable)
int crcBytesAvailable = Math.Min(bytesAvailable, 4 - chunkBytesProcessed);
new ReadOnlySpan<byte>(inputPtr, crcBytesAvailable)
.CopyTo(_scratch.AsSpan(_scratchLength));
_scratchLength += bytesAvailable;
inputPtr += bytesAvailable;
chunkBytesProcessed += bytesAvailable;
_scratchLength += crcBytesAvailable;
inputPtr += crcBytesAvailable;
chunkBytesProcessed += crcBytesAvailable;

if (_scratchLength >= 4)
{
_expectedChunkCrc = BinaryPrimitives.ReadUInt32LittleEndian(_scratch);
_scratchLength = 0;
return true;
}

Expand Down

0 comments on commit c9ddcf0

Please sign in to comment.