Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement WriteStringValueSegment defined in Issue 67337 #101356

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9122ef8
Implement WriteStringValueSegment defined in Issue 67337
ificator Apr 21, 2024
e044b13
Fix some review comments
ificator May 26, 2024
e7abe7f
merge upstream/main
ificator May 26, 2024
b8d578c
Handle split surrogate pair
ificator May 26, 2024
181cef2
Merge remote-tracking branch 'upstream/main' into user/ificator/write…
ificator Dec 6, 2024
65006ce
Commit old changes responding to comments
ificator Dec 6, 2024
1601af8
utf8 and utf16
PranavSenthilnathan Dec 11, 2024
d6b66be
fix build error
PranavSenthilnathan Dec 16, 2024
a46a1cc
Update src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf…
PranavSenthilnathan Dec 16, 2024
b5d0c17
Update src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf…
PranavSenthilnathan Dec 16, 2024
4a0d1c6
PR comments
PranavSenthilnathan Dec 16, 2024
09d321d
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 16, 2024
96ed922
add encoding flags
PranavSenthilnathan Dec 17, 2024
a078bfd
add test for switching encoding
PranavSenthilnathan Dec 17, 2024
93e6ee9
use CoreLib Rune for polyfill instead of having a separate copy
PranavSenthilnathan Dec 17, 2024
501813f
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 17, 2024
c3b1c3b
move warning disabling to top and fix up tests
PranavSenthilnathan Dec 18, 2024
c9c4884
add fuzzer
PranavSenthilnathan Dec 19, 2024
8482b1c
Fix some tests I missed
PranavSenthilnathan Dec 19, 2024
d50bbca
clean up and add another test to fuzzer
PranavSenthilnathan Dec 19, 2024
55827d9
comment typo
PranavSenthilnathan Dec 20, 2024
a5cd855
pr comments
PranavSenthilnathan Dec 20, 2024
c82b035
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 20, 2024
4f63907
Merge branch 'user/ificator/writestringvaluesegment' of https://githu…
PranavSenthilnathan Dec 20, 2024
b7fd4a5
throw when encodings are mixed
PranavSenthilnathan Dec 24, 2024
c0a700c
update fuzzer to assert that mixing encodings always throws
PranavSenthilnathan Dec 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
Expand Down Expand Up @@ -31,7 +31,6 @@
<Compile Include="Fuzzers\UTF8Fuzzer.cs" />
<Compile Include="Fuzzers\Utf8JsonWriterFuzzer.cs" />
<Compile Include="IFuzzer.cs" />
<Compile Include="MemoryBackedStream.cs" />
<Compile Include="PooledBoundedMemory.cs" />
<Compile Include="Program.cs" />
<Compile Include="$(TestUtilities)\System\Buffers\BoundedMemory.*" Link="TestUtilities\%(Filename)%(Extension)" />
Expand All @@ -44,10 +43,6 @@
</None>
</ItemGroup>

<ItemGroup>
<None Include="MemoryBackedStream.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\System.Formats.Nrbf\src\System.Formats.Nrbf.csproj" />
</ItemGroup>
Expand Down
117 changes: 82 additions & 35 deletions src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@ internal sealed class Utf8JsonWriterFuzzer : IFuzzer
// Options for choosing between UTF-8 and UTF-16 encoding
private const byte EncodingFlag = 1 << 5;

// Options for choosing whether to poison previous or next page
private const byte PoisonFlag = 1 << 6;

public void FuzzTarget(ReadOnlySpan<byte> bytes)
{
const int minLength = 10; // 2 ints, 1 byte, and 1 padding to align chars
Expand Down Expand Up @@ -75,67 +72,90 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)

// Compute the expected result by using the encoder directly and the input
int maxExpandedSizeBytes = 6 * bytes.Length + 2;
byte[] buffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes);
int written;
Span<byte> expected = utf8
? EncodeToUtf8(bytes, buffer, options.Encoder, out written)
: EncodeToUtf8(chars, buffer, options.Encoder, out written);
byte[] expectedBuffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes);
Span<byte> expected =
expectedBuffer.AsSpan(0, utf8
? EncodeToUtf8(bytes, expectedBuffer, options.Encoder)
: EncodeToUtf8(chars, expectedBuffer, options.Encoder));

// Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same.
byte[] actualBuffer = new byte[expected.Length];
foreach (ReadOnlySpan<Range> ranges in new[]
{
new[] { 0.. },
new[] { 0..slice1, slice1.. },
new[] { 0..slice1, slice1..slice2, slice2.. },
})
{
// Use a stream backed by bounded memory to detect out-of-bounds accesses
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(expected.Length, (optionsByte & PoisonFlag) == 0 ? PoisonPagePlacement.After : PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

if (utf8)
{
WriteStringValueSegments(writer, bytes, ranges);
writer.Flush();
}
else
{
WriteStringValueSegments(writer, chars, ranges);
writer.Flush();
}

ReadOnlySpan<byte> actual = memory.Span;
writer.Flush();

// Compare the expected and actual results
Assert.SequenceEqual(expected, actual);
Assert.SequenceEqual(expected, actualBuffer);
Assert.Equal(expected.Length, writer.BytesCommitted);
Assert.Equal(0, writer.BytesPending);

Array.Clear(actualBuffer);
}

// Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that.
if (!utf8)
{
Array.Clear(expectedBuffer);

{
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(maxExpandedSizeBytes, PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
ReadOnlySpan<char> firstSegment = chars[slice1..];
ReadOnlySpan<byte> secondSegment = bytes[0..(2 * slice1)];

expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder));

actualBuffer = new byte[expected.Length];
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

writer.WriteStringValueSegment(chars[0..slice1], false);
writer.WriteStringValueSegment(bytes[(2 * slice1)..], true);
writer.WriteStringValueSegment(firstSegment, false);
writer.WriteStringValueSegment(secondSegment, true);
writer.Flush();

Assert.SequenceEqual(expected, actualBuffer);
Assert.Equal(expected.Length, writer.BytesCommitted);
Assert.Equal(0, writer.BytesPending);
}

Array.Clear(expectedBuffer);

{
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(maxExpandedSizeBytes, PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)];
ReadOnlySpan<char> secondSegment = chars[slice1..];

expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder));

actualBuffer = new byte[expected.Length];
using MemoryStream stream = new(actualBuffer);
using Utf8JsonWriter writer = new(stream, options);

writer.WriteStringValueSegment(bytes[0..(2 * slice1)], false);
writer.WriteStringValueSegment(chars[slice1..], true);
writer.WriteStringValueSegment(firstSegment, false);
writer.WriteStringValueSegment(secondSegment, true);
writer.Flush();

Assert.SequenceEqual(expected, actualBuffer);
Assert.Equal(expected.Length, writer.BytesCommitted);
Assert.Equal(0, writer.BytesPending);
}
}

ArrayPool<byte>.Shared.Return(buffer);
ArrayPool<byte>.Shared.Return(expectedBuffer);
}

private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges)
Expand All @@ -154,23 +174,50 @@ private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan
}
}

private static Span<byte> EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder, out int written)
private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true);
destBuffer[++written] = (byte)'"';
return written + 1;
}

private static int EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
int written = 1;
destBuffer[0] = (byte)'"';
destBuffer[written += EncodeTranscode(chars, destBuffer[1..], encoder)] = (byte)'"';
return written + 1;
}

private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
int written = 1;
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true);
written += writtenTemp;
destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"';
return written + 1;
}

private static int EncodeToUtf8(ReadOnlySpan<char> chars, ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
{
int written = 1;
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
destBuffer[written + 1] = (byte)'"';
return destBuffer.Slice(0, written + 2);
written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true);
encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true);
written += writtenTemp;
destBuffer[written] = (byte)'"';
return written + 1;
}

private static Span<byte> EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, out int written)
private static int EncodeTranscode(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true)
{
var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length + 2);
utf16buffer[0] = '"';
encoder.Encode(chars, utf16buffer.AsSpan(1), out _, out written, isFinalBlock: true);
utf16buffer[written + 1] = '"';
var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length);
encoder.Encode(chars, utf16buffer, out _, out int written, isFinalBlock: true);

Utf8.FromUtf16(utf16buffer.AsSpan(0, written + 2), destBuffer, out _, out written, isFinalBlock: true);
Utf8.FromUtf16(utf16buffer.AsSpan(0, written), destBuffer, out _, out written, isFinalBlock);
ArrayPool<char>.Shared.Return(utf16buffer);
return destBuffer[0..written];
return written;
}
}
142 changes: 0 additions & 142 deletions src/libraries/Fuzzing/DotnetFuzzing/MemoryBackedStream.cs

This file was deleted.

Loading