Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement WriteStringValueSegment defined in Issue 67337 #101356

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9122ef8
Implement WriteStringValueSegment defined in Issue 67337
ificator Apr 21, 2024
e044b13
Fix some review comments
ificator May 26, 2024
e7abe7f
merge upstream/main
ificator May 26, 2024
b8d578c
Handle split surrogate pair
ificator May 26, 2024
181cef2
Merge remote-tracking branch 'upstream/main' into user/ificator/write…
ificator Dec 6, 2024
65006ce
Commit old changes responding to comments
ificator Dec 6, 2024
1601af8
utf8 and utf16
PranavSenthilnathan Dec 11, 2024
d6b66be
fix build error
PranavSenthilnathan Dec 16, 2024
a46a1cc
Update src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf…
PranavSenthilnathan Dec 16, 2024
b5d0c17
Update src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf…
PranavSenthilnathan Dec 16, 2024
4a0d1c6
PR comments
PranavSenthilnathan Dec 16, 2024
09d321d
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 16, 2024
96ed922
add encoding flags
PranavSenthilnathan Dec 17, 2024
a078bfd
add test for switching encoding
PranavSenthilnathan Dec 17, 2024
93e6ee9
use CoreLib Rune for polyfill instead of having a separate copy
PranavSenthilnathan Dec 17, 2024
501813f
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 17, 2024
c3b1c3b
move warning disabling to top and fix up tests
PranavSenthilnathan Dec 18, 2024
c9c4884
add fuzzer
PranavSenthilnathan Dec 19, 2024
8482b1c
Fix some tests I missed
PranavSenthilnathan Dec 19, 2024
d50bbca
clean up and add another test to fuzzer
PranavSenthilnathan Dec 19, 2024
55827d9
comment typo
PranavSenthilnathan Dec 20, 2024
a5cd855
pr comments
PranavSenthilnathan Dec 20, 2024
c82b035
Merge branch 'main' of https://github.com/dotnet/runtime into user/if…
PranavSenthilnathan Dec 20, 2024
4f63907
Merge branch 'user/ificator/writestringvaluesegment' of https://githu…
PranavSenthilnathan Dec 20, 2024
b7fd4a5
throw when encodings are mixed
PranavSenthilnathan Dec 24, 2024
c0a700c
update fuzzer to assert that mixing encodings always throws
PranavSenthilnathan Dec 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions eng/pipelines/libraries/fuzzing/deploy-to-onefuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,12 @@ extends:
onefuzzDropDirectory: $(fuzzerProject)/deployment/UTF8Fuzzer
SYSTEM_ACCESSTOKEN: $(System.AccessToken)
displayName: Send UTF8Fuzzer to OneFuzz

- task: onefuzz-task@0
inputs:
onefuzzOSes: 'Windows'
env:
onefuzzDropDirectory: $(fuzzerProject)/deployment/Utf8JsonWriterFuzzer
SYSTEM_ACCESSTOKEN: $(System.AccessToken)
displayName: Send Utf8JsonWriterFuzzer to OneFuzz
# ONEFUZZ_TASK_WORKAROUND_END
6 changes: 6 additions & 0 deletions src/libraries/Fuzzing/DotnetFuzzing/DotnetFuzzing.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
<Compile Include="Fuzzers\TextEncodingFuzzer.cs" />
<Compile Include="Fuzzers\TypeNameFuzzer.cs" />
<Compile Include="Fuzzers\UTF8Fuzzer.cs" />
<Compile Include="Fuzzers\Utf8JsonWriterFuzzer.cs" />
<Compile Include="IFuzzer.cs" />
<Compile Include="MemoryBackedStream.cs" />
<Compile Include="PooledBoundedMemory.cs" />
<Compile Include="Program.cs" />
<Compile Include="$(TestUtilities)\System\Buffers\BoundedMemory.*" Link="TestUtilities\%(Filename)%(Extension)" />
Expand All @@ -42,6 +44,10 @@
</None>
</ItemGroup>

<ItemGroup>
<None Include="MemoryBackedStream.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\System.Formats.Nrbf\src\System.Formats.Nrbf.csproj" />
</ItemGroup>
Expand Down
176 changes: 176 additions & 0 deletions src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Buffers;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;
using SharpFuzz;

namespace DotnetFuzzing.Fuzzers;

internal sealed class Utf8JsonWriterFuzzer : IFuzzer
{
public string[] TargetAssemblies { get; } = ["System.Text.Json"];

public string[] TargetCoreLibPrefixes => [];

// One of the bytes in the input is used to set various test options.
// Each bit in that byte represents a different option as indicated here.

// Options for JsonWriterOptions
private const byte IndentFlag = 1;
PranavSenthilnathan marked this conversation as resolved.
Show resolved Hide resolved
private const byte EncoderFlag = 1 << 1;
private const byte MaxDepthFlag = 1 << 2;
private const byte NewLineFlag = 1 << 3;
private const byte SkipValidationFlag = 1 << 4;

// Options for choosing between UTF-8 and UTF-16 encoding
private const byte EncodingFlag = 1 << 5;

// Options for choosing whether to poison previous or next page
private const byte PoisonFlag = 1 << 6;

public void FuzzTarget(ReadOnlySpan<byte> bytes)
{
const int minLength = 10; // 2 ints, 1 byte, and 1 padding to align chars
if (bytes.Length < minLength)
{
return;
}

// First 2 ints are used as indices to slice the input and the following byte is used for options
ReadOnlySpan<int> ints = MemoryMarshal.Cast<byte, int>(bytes);
int slice1 = ints[0];
int slice2 = ints[1];
byte optionsByte = bytes[8];
bytes = bytes.Slice(minLength);
ReadOnlySpan<char> chars = MemoryMarshal.Cast<byte, char>(bytes);

// Validate that the indices are within bounds of the input
bool utf8 = (optionsByte & EncodingFlag) == 0;
if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length)))
{
return;
}

// Set up options based on the first byte
bool indented = (optionsByte & IndentFlag) == 0;
JsonWriterOptions options = new()
{
Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
Indented = indented,
MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0,
PranavSenthilnathan marked this conversation as resolved.
Show resolved Hide resolved
NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n",
SkipValidation = (optionsByte & SkipValidationFlag) == 0,
};

// Compute the expected result by using the encoder directly and the input
int maxExpandedSizeBytes = 6 * bytes.Length + 2;
byte[] buffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes);
int written;
Span<byte> expected = utf8
? EncodeToUtf8(bytes, buffer, options.Encoder, out written)
: EncodeToUtf8(chars, buffer, options.Encoder, out written);

// Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same.
foreach (ReadOnlySpan<Range> ranges in new[]
{
new[] { 0.. },
new[] { 0..slice1, slice1.. },
new[] { 0..slice1, slice1..slice2, slice2.. },
})
{
// Use a stream backed by bounded memory to detect out-of-bounds accesses
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(expected.Length, (optionsByte & PoisonFlag) == 0 ? PoisonPagePlacement.After : PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
using Utf8JsonWriter writer = new(stream, options);

if (utf8)
{
WriteStringValueSegments(writer, bytes, ranges);
writer.Flush();
}
else
{
WriteStringValueSegments(writer, chars, ranges);
writer.Flush();
}

ReadOnlySpan<byte> actual = memory.Span;

// Compare the expected and actual results
Assert.SequenceEqual(expected, actual);
}

// Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that.
if (!utf8)
{
{
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(maxExpandedSizeBytes, PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
using Utf8JsonWriter writer = new(stream, options);

writer.WriteStringValueSegment(chars[0..slice1], false);
writer.WriteStringValueSegment(bytes[(2 * slice1)..], true);
writer.Flush();
}

{
using PooledBoundedMemory<byte> memory = PooledBoundedMemory<byte>.Rent(maxExpandedSizeBytes, PoisonPagePlacement.Before);
using MemoryBackedStream stream = new(memory.Memory);
using Utf8JsonWriter writer = new(stream, options);

writer.WriteStringValueSegment(bytes[0..(2 * slice1)], false);
writer.WriteStringValueSegment(chars[slice1..], true);
writer.Flush();
}
}

ArrayPool<byte>.Shared.Return(buffer);
}

private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges)
{
for (int i = 0; i < ranges.Length; i++)
{
writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1);
}
}

private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<char> chars, ReadOnlySpan<Range> ranges)
{
for (int i = 0; i < ranges.Length; i++)
{
writer.WriteStringValueSegment(chars[ranges[i]], i == ranges.Length - 1);
}
}

private static Span<byte> EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder, out int written)
{
destBuffer[0] = (byte)'"';
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
destBuffer[written + 1] = (byte)'"';
return destBuffer.Slice(0, written + 2);
}

private static Span<byte> EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, out int written)
{
var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length + 2);
utf16buffer[0] = '"';
encoder.Encode(chars, utf16buffer.AsSpan(1), out _, out written, isFinalBlock: true);
utf16buffer[written + 1] = '"';

Utf8.FromUtf16(utf16buffer.AsSpan(0, written + 2), destBuffer, out _, out written, isFinalBlock: true);
ArrayPool<char>.Shared.Return(utf16buffer);
return destBuffer[0..written];
}
}
142 changes: 142 additions & 0 deletions src/libraries/Fuzzing/DotnetFuzzing/MemoryBackedStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;

namespace DotnetFuzzing;

/// <summary>
/// A stream implementation that is backed by <see cref="Memory{T}"/> instead of a byte array.
/// This is particularly useful in tests where we need a stream but we also want to detect
/// out-of-bounds accesses with <see cref="BoundedMemory{T}"/>.
/// </summary>
public class MemoryBackedStream : Stream
PranavSenthilnathan marked this conversation as resolved.
Show resolved Hide resolved
{
private Memory<byte> _memory;
private bool _writable;
private bool _disposed;
private int _position;

public MemoryBackedStream(Memory<byte> memory, bool writable = true)
{
_memory = memory;
_writable = writable;
}

public override bool CanRead => _disposed;

public override bool CanSeek => _disposed;

public override bool CanWrite => _writable;

public override long Length
{
get
{
EnsureNotClosed();
return _memory.Length;
}
}

public override long Position
{
get
{
EnsureNotClosed();
return _position;
}
set
{
ArgumentOutOfRangeException.ThrowIfGreaterThan((ulong)value, (ulong)int.MaxValue, nameof(value));
EnsureNotClosed();
_position = (int)value;
}
}

public override void Flush() { }

public override int Read(byte[] buffer, int offset, int count)
{
ValidateBufferArguments(buffer, offset, count);
EnsureNotClosed();

int n = _memory.Length - _position;
if (n > count)
n = count;
if (n <= 0)
return 0;

_memory.CopyTo(buffer.AsMemory(offset, count));
return n;
}

public override long Seek(long offset, SeekOrigin origin)
{
EnsureNotClosed();
return SeekCore(offset, origin switch
{
SeekOrigin.Begin => 0,
SeekOrigin.Current => _position,
SeekOrigin.End => _memory.Length,
_ => throw new ArgumentException(nameof(origin))
});
}

private long SeekCore(long offset, int loc)
{
ArgumentOutOfRangeException.ThrowIfGreaterThan(offset, int.MaxValue - loc);
int tempPosition = unchecked(loc + (int)offset);
if (unchecked(loc + offset) < 0 || tempPosition < 0)
throw new IOException("Seek before begin.");
_position = tempPosition;

Debug.Assert(_position >= 0);
return _position;
}

public override void SetLength(long value) => throw new NotSupportedException("Currently stream expansion is not supported.");

public override void Write(byte[] buffer, int offset, int count)
{
ValidateBufferArguments(buffer, offset, count);
EnsureNotClosed();
EnsureWriteable();

int i = _position + count;
// Check for overflow
if (i < 0)
throw new IOException("Stream too long.");

if (i > _memory.Length)
throw new NotSupportedException("Currently stream expansion is not supported.");

buffer.AsMemory(offset, count).CopyTo(_memory);
}

protected override void Dispose(bool disposing)
{
if (!_disposed)
{
_disposed = true;
_memory = Memory<byte>.Empty;
_writable = false;
}
}

private void EnsureNotClosed()
{
if (_disposed)
throw new ObjectDisposedException(nameof(MemoryBackedStream));
}

private void EnsureWriteable()
{
if (!_writable)
throw new ObjectDisposedException(nameof(MemoryBackedStream));
}
}
Loading
Loading