From ce1c49d6ec3bd304e32de77c603b5676e2f940bd Mon Sep 17 00:00:00 2001 From: Vladimir Stolyarov Date: Sun, 27 Oct 2019 13:35:35 +0300 Subject: [PATCH 1/3] Get rid of 'append's in parseEscapedString This made to reuse existing data buffer without (re)allocations when decoder tries to parse escape sequence --- decode_string.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/decode_string.go b/decode_string.go index 694359c..2cdfd15 100644 --- a/decode_string.go +++ b/decode_string.go @@ -113,23 +113,28 @@ func (dec *Decoder) parseEscapedString() error { case 't': dec.data[dec.cursor] = '\t' case 'u': - start := dec.cursor + start := dec.cursor - 1 // position of '\' for decoded string placement dec.cursor++ str, err := dec.parseUnicode() if err != nil { return err } - diff := dec.cursor - start - dec.data = append(append(dec.data[:start-1], str...), dec.data[dec.cursor:]...) - dec.length = len(dec.data) - dec.cursor += len(str) - diff - 1 + // replace '\u...' with decoded runes + // it's possible to do it in-place (without appends) because size of encoded sequence (already in buffer) + // always greater than size of decoded one (encoding overhead at least 2 bytes ('\u') per one rune) + n := copy(dec.data[start:], str) + // and shift buffer left (on difference between length of encoded and decoded runes) + end := start + n + copy(dec.data[end:], dec.data[dec.cursor:]) + dec.length -= dec.cursor - end + dec.cursor = end return nil default: return dec.raiseInvalidJSONErr(dec.cursor) } - dec.data = append(dec.data[:dec.cursor-1], dec.data[dec.cursor:]...) + copy(dec.data[dec.cursor-1:], dec.data[dec.cursor:]) dec.length-- // Since we've lost a character, our dec.cursor offset is now From c861389439fd33f193a3c49d7bf7c9d6ecf412e8 Mon Sep 17 00:00:00 2001 From: Vladimir Stolyarov Date: Sun, 27 Oct 2019 13:40:32 +0300 Subject: [PATCH 2/3] Add periodic buffer flushing to Encoder. Buffer flushing happens when target writer provided and buffer size reaches value provided in SetBufFlushThreshold --- encode.go | 30 +++++++--- encode_builder.go | 28 +++++++++- encode_builder_test.go | 123 +++++++++++++++++++++++++++++++++++++++++ encode_pool.go | 1 + go.mod | 2 +- 5 files changed, 173 insertions(+), 11 deletions(-) diff --git a/encode.go b/encode.go index 92edaaf..b575af8 100644 --- a/encode.go +++ b/encode.go @@ -166,12 +166,15 @@ type MarshalerJSONArray interface { // An Encoder writes JSON values to an output stream. type Encoder struct { - buf []byte - isPooled byte - w io.Writer - err error - hasKeys bool - keys []string + bufFlushThreshold int + buf []byte + prevRune byte // zero if no flushes made + isPooled byte + bytesFlushed int + w io.Writer + err error + hasKeys bool + keys []string } // AppendBytes allows a modular usage by appending bytes manually to the current state of the buffer. @@ -189,14 +192,27 @@ func (enc *Encoder) Buf() []byte { return enc.buf } +// SetBufFlushThreshold sets a maximal internal buffer size before flushing it to writer. +func (enc *Encoder) SetBufFlushThreshold(size int) { + enc.bufFlushThreshold = size +} + // Write writes to the io.Writer and resets the buffer. func (enc *Encoder) Write() (int, error) { + enc.prevRune = 0 + if len(enc.buf) == 0 { + // this may happen when buffer flushed previously + return enc.bytesFlushed, nil + } i, err := enc.w.Write(enc.buf) enc.buf = enc.buf[:0] - return i, err + return enc.bytesFlushed + i, err } func (enc *Encoder) getPreviousRune() byte { + if enc.prevRune > 0 { + return enc.prevRune + } last := len(enc.buf) - 1 return enc.buf[last] } diff --git a/encode_builder.go b/encode_builder.go index 2895ba3..84b8acd 100644 --- a/encode_builder.go +++ b/encode_builder.go @@ -1,5 +1,9 @@ package gojay +import ( + "github.com/modern-go/reflect2" +) + const hex = "0123456789abcdef" // grow grows b's capacity, if necessary, to guarantee space for @@ -13,32 +17,46 @@ func (enc *Encoder) grow(n int) { } } +func (enc *Encoder) tryFlush() { + if enc.bufFlushThreshold > 0 && enc.w != nil && len(enc.buf) >= enc.bufFlushThreshold { + n, err := enc.w.Write(enc.buf) + enc.bytesFlushed += n + enc.err = err + enc.prevRune = enc.buf[len(enc.buf)-1] + enc.buf = enc.buf[:0] + } +} + // Write appends the contents of p to b's Buffer. // Write always returns len(p), nil. func (enc *Encoder) writeBytes(p []byte) { enc.buf = append(enc.buf, p...) + enc.tryFlush() } func (enc *Encoder) writeTwoBytes(b1 byte, b2 byte) { enc.buf = append(enc.buf, b1, b2) + enc.tryFlush() } // WriteByte appends the byte c to b's Buffer. // The returned error is always nil. func (enc *Encoder) writeByte(c byte) { enc.buf = append(enc.buf, c) + enc.tryFlush() } // WriteString appends the contents of s to b's Buffer. // It returns the length of s and a nil error. func (enc *Encoder) writeString(s string) { enc.buf = append(enc.buf, s...) + enc.tryFlush() } -func (enc *Encoder) writeStringEscape(s string) { - l := len(s) +func (enc *Encoder) writeBytesEscape(b []byte) { + l := len(b) for i := 0; i < l; i++ { - c := s[i] + c := b[i] if c >= 0x20 && c != '\\' && c != '"' { enc.writeByte(c) continue @@ -63,3 +81,7 @@ func (enc *Encoder) writeStringEscape(s string) { continue } } + +func (enc *Encoder) writeStringEscape(s string) { + enc.writeBytesEscape(reflect2.UnsafeCastString(s)) +} diff --git a/encode_builder_test.go b/encode_builder_test.go index 53affb9..2827291 100644 --- a/encode_builder_test.go +++ b/encode_builder_test.go @@ -1 +1,124 @@ package gojay + +import ( + "bytes" + "crypto/rand" + "encoding/base64" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +type CountingWritesBuffer struct { + bytes.Buffer + WriteCalls int +} + +func (c *CountingWritesBuffer) Write(p []byte) (int, error) { + c.WriteCalls++ + return c.Buffer.Write(p) +} + +type PayloadForEncode struct { + St int + RS io.Reader + Sid int + Tt string + Gr int + Uuid string + Ip string + Ua string + Tz int + R64 io.Reader + V int +} + +func (p *PayloadForEncode) MarshalJSONObject(enc *Encoder) { + enc.AddIntKey("st", p.St) + enc.AddReaderToEscapedKey("rs", p.RS) + enc.AddIntKey("sid", p.Sid) + enc.AddStringKey("tt", p.Tt) + enc.AddIntKey("gr", p.Gr) + enc.AddStringKey("uuid", p.Uuid) + enc.AddStringKey("ip", p.Ip) + enc.AddStringKey("ua", p.Ua) + enc.AddIntKey("tz", p.Tz) + enc.AddReaderToBase64Key("r64", p.R64, base64.StdEncoding) + enc.AddIntKey("v", p.V) +} + +func (p *PayloadForEncode) IsNil() bool { return p == nil } + +func TestEncodeWithFlush(t *testing.T) { + t.Run("buffer must be flushed after threshold reached", func(t *testing.T) { + var target CountingWritesBuffer + + var randBytes [120]byte + _, err := io.ReadFull(rand.Reader, randBytes[:]) + assert.NoError(t, err) + + encoder := BorrowEncoder(&target) + defer encoder.Release() + + const bufferFlushThreshold = 64 + encoder.SetBufFlushThreshold(bufferFlushThreshold) + assert.NoError(t, encoder.EncodeObject(&PayloadForEncode{ + St: 1, + RS: bytes.NewReader(randBytes[:]), + Sid: 2, + Tt: "TestString", + Gr: 4, + Uuid: "8f9a65eb-4807-4d57-b6e0-bda5d62f1429", + Ip: "127.0.0.1", + Ua: "Mozilla", + Tz: 8, + R64: bytes.NewReader([]byte{1, 2, 3, 4}), + V: 6, + })) + + wroteBytes := len(target.Bytes()) + expectedWriteCalls := 1 + (wroteBytes-1)/bufferFlushThreshold + assert.Equal(t, expectedWriteCalls, target.WriteCalls) + }) + + t.Run("ensure that output is valid", func(t *testing.T) { + var target bytes.Buffer + + encoder := BorrowEncoder(&target) + defer encoder.Release() + + const bufferFlushThreshold = 64 + encoder.SetBufFlushThreshold(bufferFlushThreshold) + assert.NoError(t, encoder.EncodeObject(&PayloadForEncode{ + St: 1, + RS: strings.NewReader(`wkofowk[grlmgaemriogjjgivsinfvna/snbgaipw43jgh'jnsprnbigphrjizsjo;ijb;osdjtnbs'`), + Sid: 2, + Tt: "TestString", + Gr: 4, + Uuid: "8f9a65eb-4807-4d57-b6e0-bda5d62f1429", + Ip: "127.0.0.1", + Ua: "Mozilla", + Tz: 8, + R64: strings.NewReader(`aoksdfpos'agpmejriojgp'nirbnatngads lkmsalkemflsapkdfpoakdospkf`), + V: 6, + })) + + assert.JSONEq(t, ` +{ + "st": 1, + "rs": "wkofowk[grlmgaemriogjjgivsinfvna/snbgaipw43jgh'jnsprnbigphrjizsjo;ijb;osdjtnbs'", + "sid": 2, + "tt": "TestString", + "gr": 4, + "uuid": "8f9a65eb-4807-4d57-b6e0-bda5d62f1429", + "ip": "127.0.0.1", + "ua": "Mozilla", + "tz": 8, + "r64": "YW9rc2RmcG9zJ2FncG1lanJpb2pncCduaXJibmF0bmdhZHMgbGttc2Fsa2VtZmxzYXBrZGZwb2FrZG9zcGtm", + "v": 6 +} +`, target.String()) + }) +} diff --git a/encode_pool.go b/encode_pool.go index 3b26322..6523e9d 100644 --- a/encode_pool.go +++ b/encode_pool.go @@ -36,6 +36,7 @@ func BorrowEncoder(w io.Writer) *Encoder { enc := encPool.Get().(*Encoder) enc.w = w enc.buf = enc.buf[:0] + enc.prevRune = 0 enc.isPooled = 0 enc.err = nil enc.hasKeys = false diff --git a/go.mod b/go.mod index 76814eb..de88902 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/lunixbochs/vtclean v1.0.0 // indirect github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.1 // indirect + github.com/modern-go/reflect2 v1.0.1 github.com/pkg/errors v0.8.1 // indirect github.com/stretchr/testify v1.2.2 github.com/viant/assertly v0.4.8 From 1a64df118a37cc2e62dff509cf9d0dbde39b93d2 Mon Sep 17 00:00:00 2001 From: Vladimir Stolyarov Date: Sun, 27 Oct 2019 13:42:45 +0300 Subject: [PATCH 3/3] Add functions for stream encoding/decoding of big strings. --- decode_writer.go | 118 +++++++++++ decode_writer_test.go | 455 ++++++++++++++++++++++++++++++++++++++++++ encode_reader.go | 130 ++++++++++++ encode_reader_test.go | 130 ++++++++++++ 4 files changed, 833 insertions(+) create mode 100644 decode_writer.go create mode 100644 decode_writer_test.go create mode 100644 encode_reader.go create mode 100644 encode_reader_test.go diff --git a/decode_writer.go b/decode_writer.go new file mode 100644 index 0000000..30bc2db --- /dev/null +++ b/decode_writer.go @@ -0,0 +1,118 @@ +package gojay + +import ( + "encoding/base64" + "io" +) + +type stringDecodeReader Decoder + +func (dec *stringDecodeReader) copyAndShift(target []byte, start int) int { + n := copy(target, dec.data[start:dec.cursor]) + // shift buffer left to reuse it between reads + copy(dec.data[start:], dec.data[start+n:]) + dec.length -= n + dec.cursor = start + return n +} + +func (dec *stringDecodeReader) Read(b []byte) (int, error) { + start := dec.cursor + for dec.cursor-start < len(b) && (dec.cursor < dec.length || (*Decoder)(dec).read()) { + switch dec.data[dec.cursor] { + // string end + case '"': + // making copy before exit because parseEscapedString may change + // previous (before cursor) bytes of data buffer + n := dec.copyAndShift(b, start) + dec.cursor++ + return n, io.EOF + // escape sequence + case '\\': + dec.cursor++ + err := (*Decoder)(dec).parseEscapedString() + if err != nil { + return 0, err + } + default: + dec.cursor++ + } + } + + if dec.cursor-start < len(b) { + // input buffer not filled and exited before EOF + // that means json is invalid + return 0, (*Decoder)(dec).raiseInvalidJSONErr(dec.cursor) + } + + return dec.copyAndShift(b, start), nil +} + +func (dec *Decoder) decodeStringStream() (*stringDecodeReader, error) { + for ; dec.cursor < dec.length || dec.read(); dec.cursor++ { + switch dec.data[dec.cursor] { + case ' ', '\n', '\t', '\r', ',': + // is string + continue + case '"': + dec.cursor++ + return (*stringDecodeReader)(dec), nil + // is nil + case 'n': + dec.cursor++ + return nil, dec.assertNull() + default: + dec.err = dec.makeInvalidUnmarshalErr((*stringDecodeReader)(nil)) + return nil, dec.skipData() + } + } + return nil, nil +} + +// AddWriterFromEscaped decodes the JSON value within an object or an array to a provided writer. +// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned. +func (dec *Decoder) AddWriterFromEscaped(w io.Writer) error { + return dec.WriterFromEscaped(w) +} + +// WriterFromEscaped decodes the JSON value within an object or an array to a provided writer. +// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned. +func (dec *Decoder) WriterFromEscaped(w io.Writer) error { + reader, err := dec.decodeStringStream() + if err != nil { + return err + } + dec.called |= 1 + if reader == nil { + return nil + } + _, err = io.Copy(w, reader) + if err != nil { + return err + } + return nil +} + +// AddWriterFromBase64 decodes the JSON value (base64-encoded data) within an object or an array to a provided writer. +// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned. +func (dec *Decoder) AddWriterFromBase64(w io.Writer, encoding *base64.Encoding) error { + return dec.WriterFromBase64(w, encoding) +} + +// WriterFromEscaped decodes the JSON value (base64-encoded data) within an object or an array to a provided writer. +// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned. +func (dec *Decoder) WriterFromBase64(w io.Writer, encoding *base64.Encoding) error { + reader, err := dec.decodeStringStream() + if err != nil { + return err + } + dec.called |= 1 + if reader == nil { + return nil + } + _, err = io.Copy(w, base64.NewDecoder(encoding, reader)) + if err != nil { + return err + } + return nil +} diff --git a/decode_writer_test.go b/decode_writer_test.go new file mode 100644 index 0000000..6bc116f --- /dev/null +++ b/decode_writer_test.go @@ -0,0 +1,455 @@ +package gojay + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "io" + "math/rand" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +var randStringLetters = []rune("abcdefABCDEF'\"\n\t\r\b\fテュールストマ\\ーティンヤコブ 😁𠜎𠜱𠝹𠱓𠱸𠲖𠳏𠳕𠴕𠵼𠵿𝄞ۼ") + +type PayloadForDecoding struct { + Bool bool + Base64Stream bytes.Buffer + Int int + StrStream bytes.Buffer + Float float64 +} + +func (p *PayloadForDecoding) UnmarshalJSONObject(dec *Decoder, key string) error { + switch key { + case "bool": + return dec.Bool(&p.Bool) + case "base64_stream": + return dec.WriterFromBase64(&p.Base64Stream, base64.StdEncoding) + case "int": + return dec.Int(&p.Int) + case "str_stream": + return dec.WriterFromEscaped(&p.StrStream) + case "float": + return dec.Float64(&p.Float) + default: + return nil + } +} + +func (p *PayloadForDecoding) NKeys() int { return 5 } + +func makeRandString(rng *rand.Rand, size int) string { + var sb strings.Builder + for sb.Len() < size { + idx := rng.Int63() % int64(len(randStringLetters)) + sb.WriteRune(randStringLetters[idx]) + } + return sb.String() +} + +func TestDecodeWriterEscapedString(t *testing.T) { + type testCase struct { + name string + json string + expectedPayload PayloadForDecoding + expectedError error + } + + f := func(tc testCase) { + t.Helper() + + var payload PayloadForDecoding + decoder := BorrowDecoder(strings.NewReader(tc.json)) + defer decoder.Release() + + err := decoder.Decode(&payload) + + if tc.expectedError != nil { + assert.IsType(t, tc.expectedError, err) + return + } + if assert.NoError(t, err) { + assert.Equal(t, tc.expectedPayload, payload) + } + } + + f(testCase{ + name: "basic string", + json: `{"int":2,"str_stream":"hello world","bool":true}`, + expectedPayload: PayloadForDecoding{ + Int: 2, + StrStream: *bytes.NewBufferString("hello world"), + Bool: true, + }, + }) + f(testCase{ + name: "empty string", + json: `{"int":1,"float":2,"str_stream":""}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + Float: 2, + StrStream: *bytes.NewBufferString(""), + }, + }) + f(testCase{ + name: "escaped solidus", + json: `{"str_stream":"\/","int":1,"float":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + Float: 2, + StrStream: *bytes.NewBufferString("/"), + }, + }) + f(testCase{ + json: `{"str_stream":"\\/","int":1,"float":2,}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + Float: 2, + StrStream: *bytes.NewBufferString("\\/"), + }, + }) + f(testCase{ + json: `{"str_stream":"\\t"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\\t"), + }, + }) + f(testCase{ + json: `{"str_stream":"\t"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\t"), + }, + }) + f(testCase{ + json: `{"str_stream":"\\b"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\\b"), + }, + }) + f(testCase{ + json: `{"str_stream":"\b"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\b"), + }, + }) + f(testCase{ + json: `{"str_stream":"\\f"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\\f"), + }, + }) + f(testCase{ + json: `{"str_stream":"\f"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\f"), + }, + }) + f(testCase{ + json: `{"str_stream":"\\r"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\\r"), + }, + }) + f(testCase{ + json: `{"str_stream":"\r"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString("\r"), + }, + }) + f(testCase{ + json: `{"str_stream":"𠜎 𠜱 𠝹 𠱓 𠱸 𠲖 𠳏 𠳕 𠴕 𠵼 𠵿"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString(`𠜎 𠜱 𠝹 𠱓 𠱸 𠲖 𠳏 𠳕 𠴕 𠵼 𠵿`), + }, + }) + f(testCase{ + json: `{"str_stream":"\u06fc","bool":false,}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString(`ۼ`), + }, + }) + f(testCase{ + json: `{"bool":false,"str_stream":"\\u2070"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBufferString(`\u2070`), + }, + }) + f(testCase{ + json: `{"str_stream":"\uD834\uDD1E","int":10}`, + expectedPayload: PayloadForDecoding{ + Int: 10, + StrStream: *bytes.NewBufferString(`𝄞`), + }, + }) + f(testCase{ + json: `{"bool":true,"str_stream":"\uD834\\","int":11}`, + expectedPayload: PayloadForDecoding{ + Int: 11, + StrStream: *bytes.NewBufferString(`�\`), + Bool: true, + }, + }) + f(testCase{ + json: `{"str_stream":"\uD834\uD834","int":11}`, + expectedPayload: PayloadForDecoding{ + Int: 11, + StrStream: *bytes.NewBufferString("�\x00\x00\x00"), + }, + }) + f(testCase{ + json: `{"float":11,"str_stream":"\uD834"}`, + expectedPayload: PayloadForDecoding{ + Float: 11, + StrStream: *bytes.NewBufferString("�"), + }, + }) + f(testCase{ + json: `{"str_stream":"\u2Z80"}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"float":11,"str_stream":"\uD834\"}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"str_stream":"\uD834\uDZ1E"}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"str_stream":"\uD834}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\t", "key":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + StrStream: *bytes.NewBufferString("�\t"), + }, + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\n", "key":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + StrStream: *bytes.NewBufferString("�\n"), + }, + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\f", "key":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + StrStream: *bytes.NewBufferString("�\f"), + }, + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\b", "key":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + StrStream: *bytes.NewBufferString("�\b"), + }, + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\r", "key":2}`, + expectedPayload: PayloadForDecoding{ + Int: 1, + StrStream: *bytes.NewBufferString("�\r"), + }, + }) + f(testCase{ + json: `{"int": 1, "str_stream":"\uD834\h", "key":2}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"str_stream":null, "aaa":"bb"}`, + expectedPayload: PayloadForDecoding{ + StrStream: *bytes.NewBuffer(nil), + }, + }) + f(testCase{ + json: `{"str_stream":nall, "aaa":"bb"}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"key":null,"str_stream":"test string \" escaped", "float":1010.123}`, + expectedPayload: PayloadForDecoding{ + Float: 1010.123, + StrStream: *bytes.NewBufferString("test string \" escaped"), + }, + }) + f(testCase{ + json: `{"key":null,"str_stream":"test string \t escaped", "float":1010.123}`, + expectedPayload: PayloadForDecoding{ + Float: 1010.123, + StrStream: *bytes.NewBufferString("test string \t escaped"), + }, + }) + f(testCase{ + json: `{"key":null,"str_stream":"test string \r escaped", "float":1010.123}`, + expectedPayload: PayloadForDecoding{ + Float: 1010.123, + StrStream: *bytes.NewBufferString("test string \r escaped"), + }, + }) + f(testCase{ + json: `{"key":null,"str_stream":"test string \b escaped", "float":1010.123}`, + expectedPayload: PayloadForDecoding{ + Float: 1010.123, + StrStream: *bytes.NewBufferString("test string \b escaped"), + }, + }) + f(testCase{ + json: `{"key":null,"str_stream":"test string \n escaped", "float":1010.123}`, + expectedPayload: PayloadForDecoding{ + Float: 1010.123, + StrStream: *bytes.NewBufferString("test string \n escaped"), + }, + }) + f(testCase{ + json: `{"str_stream":"test string \\\" escaped, "int":1010}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"str_stream":"test string \\\l escaped", "int":1010}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"str_stream":invalid, "int":1010}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + json: `{"float":1.2,"str_stream":"string with spaces and \"escape\"d \"quotes\" and escaped line returns \n and escaped \\\\ escaped char","int":2}`, + expectedPayload: PayloadForDecoding{ + Float: 1.2, + StrStream: *bytes.NewBufferString("string with spaces and \"escape\"d \"quotes\" and escaped line returns \n and escaped \\\\ escaped char"), + Int: 2, + }, + }) +} + +func TestDecodeWriterEscapedString_multiple_reads(t *testing.T) { + randStr := makeRandString(rand.New(rand.NewSource(time.Now().Unix())), 100*1024) // 100KiB + jsonStr, err := json.Marshal(randStr) + assert.NoError(t, err) + + decoder := BorrowDecoder(strings.NewReader(`{"int":1,"bool":true,"str_stream":` + string(jsonStr) + `,"float":2.1}`)) + defer decoder.Release() + + var payload PayloadForDecoding + if assert.NoError(t, decoder.DecodeObject(&payload)) { + assert.Equal(t, PayloadForDecoding{ + Bool: true, + Int: 1, + StrStream: *bytes.NewBufferString(randStr), + Float: 2.1, + }, payload) + } +} + +func TestDecodeWriterBase64(t *testing.T) { + type testCase struct { + name string + json string + expectedPayload PayloadForDecoding + expectedError error + } + + f := func(tc testCase) { + t.Helper() + + var payload PayloadForDecoding + decoder := BorrowDecoder(strings.NewReader(tc.json)) + defer decoder.Release() + + err := decoder.Decode(&payload) + + if tc.expectedError != nil { + assert.IsType(t, tc.expectedError, err) + return + } + if assert.NoError(t, err) { + assert.Equal(t, tc.expectedPayload, payload) + } + } + + f(testCase{ + name: "basic (at the end)", + json: `{"int":10,"base64_stream": "YWttZGxzYXNsbWR5amtsLGttam5oYmdoamtsLGtqbg=="}`, + expectedPayload: PayloadForDecoding{ + Int: 10, + Base64Stream: *bytes.NewBufferString("akmdlsaslmdyjkl,kmjnhbghjkl,kjn"), + }, + }) + f(testCase{ + name: "basic (at the begin)", + json: `{"base64_stream": "YWttZGxzYXNsbWR5amtsLGttam5oYmdoamtsLGtqbg==","float":2.1}`, + expectedPayload: PayloadForDecoding{ + Float: 2.1, + Base64Stream: *bytes.NewBufferString("akmdlsaslmdyjkl,kmjnhbghjkl,kjn"), + }, + }) + f(testCase{ + name: "basic (at the middle)", + json: `{"bool":true,"base64_stream": "YWttZGxzYXNsbWR5amtsLGttam5oYmdoamtsLGtqbg==","float":2.1}`, + expectedPayload: PayloadForDecoding{ + Float: 2.1, + Base64Stream: *bytes.NewBufferString("akmdlsaslmdyjkl,kmjnhbghjkl,kjn"), + Bool: true, + }, + }) + f(testCase{ + name: "empty", + json: `{"bool":true,"base64_stream": "","float":2.1}`, + expectedPayload: PayloadForDecoding{ + Float: 2.1, + Base64Stream: *bytes.NewBufferString(""), + Bool: true, + }, + }) + f(testCase{ + name: "null", + json: `{"bool":true,"base64_stream": null}`, + expectedPayload: PayloadForDecoding{ + Base64Stream: *bytes.NewBuffer(nil), + Bool: true, + }, + }) + f(testCase{ + name: "invalid json", + json: `{"bool":true,"base64_stream": nall}`, + expectedError: InvalidJSONError(""), + }) + f(testCase{ + name: "invalid json", + json: `{"base64_stream": "A ,"aaa":"bcd"}`, + expectedError: io.ErrUnexpectedEOF, // base64 decoder converts InvalidJSONError to this + }) + f(testCase{ + name: "invalid character", + json: `{"base64_stream": "Ax" ,"aaa":"bcd"}`, + expectedError: io.ErrUnexpectedEOF, + }) + f(testCase{ + name: "invalid padding", + json: `{"base64_stream": "YWttZGxzYXNsbWR5amtsLGttam5oYmdoamtsLGtqbg=" ,"aaa":"bcd"}`, + expectedError: io.ErrUnexpectedEOF, + }) +} + +func TestDecodeWriterBase64_multiple_reads(t *testing.T) { + randBytes := make([]byte, 100*1024) // 100KiB + rand.New(rand.NewSource(time.Now().Unix())).Read(randBytes) + + decoder := BorrowDecoder(strings.NewReader(`{"base64_stream":"` + base64.StdEncoding.EncodeToString(randBytes) + `","int":20}`)) + defer decoder.Release() + + var payload PayloadForDecoding + if assert.NoError(t, decoder.DecodeObject(&payload)) { + assert.Equal(t, PayloadForDecoding{ + Int: 20, + Base64Stream: *bytes.NewBuffer(randBytes), + }, payload) + } +} diff --git a/encode_reader.go b/encode_reader.go new file mode 100644 index 0000000..958ba3f --- /dev/null +++ b/encode_reader.go @@ -0,0 +1,130 @@ +package gojay + +import ( + "encoding/base64" + "io" +) + +type writerEncoder Encoder // we can't simply implement io.Writer on Encoder because Write on Encoder already present + +func (enc *writerEncoder) Write(p []byte) (int, error) { + (*Encoder)(enc).writeBytes(p) + return len(p), nil +} + +type escapingWriterEncoder Encoder + +func (enc *escapingWriterEncoder) Write(p []byte) (int, error) { + (*Encoder)(enc).writeBytesEscape(p) + return len(p), nil +} + +func (enc *Encoder) writeBase64(r io.Reader, encoding *base64.Encoding) { + b64enc := base64.NewEncoder(encoding, (*writerEncoder)(enc)) + _, err := io.Copy(b64enc, r) + if err != nil { + enc.err = err + return + } + if err := b64enc.Close(); err != nil { + enc.err = err + return + } +} + +// AddReaderToBase64 adds a data to be base64-encoded (read from provided reader) +// must be used inside a slice or array encoding (does not encode a key) +func (enc *Encoder) AddReaderToBase64(r io.Reader, encoding *base64.Encoding) { + enc.ReaderToBase64(r, encoding) +} + +// ReaderToBase64 adds a data to be base64-encoded (read from provided reader) +// must be used inside a slice or array encoding (does not encode a key) +func (enc *Encoder) ReaderToBase64(r io.Reader, encoding *base64.Encoding) { + prevRune := enc.getPreviousRune() + if prevRune != '[' { + enc.writeTwoBytes(',', '"') + } else { + enc.writeByte('"') + } + enc.writeBase64(r, encoding) + enc.writeByte('"') +} + +// AddReaderToBase64 adds a data to be base64-encoded (read from provided reader) +// must be used inside an object as it will encode a key +func (enc *Encoder) AddReaderToBase64Key(key string, r io.Reader, encoding *base64.Encoding) { + enc.ReaderToBase64Key(key, r, encoding) +} + +// ReaderToBase64Key adds a data to be base64-encoded (read from provided reader) +// must be used inside an object as it will encode a key +func (enc *Encoder) ReaderToBase64Key(key string, r io.Reader, encoding *base64.Encoding) { + if enc.hasKeys { + if !enc.keyExists(key) { + return + } + } + enc.grow(2 + len(key)) + prevRune := enc.getPreviousRune() + if prevRune != '{' { + enc.writeTwoBytes(',', '"') + } else { + enc.writeByte('"') + } + enc.writeStringEscape(key) + enc.writeBytes(objKey) + enc.writeByte('"') + enc.writeBase64(r, encoding) + enc.writeByte('"') +} + +// AddReaderToEscaped adds a string to be encoded (read from provided reader) +// must be used inside a slice or array encoding (does not encode a key) +func (enc *Encoder) AddReaderToEscaped(r io.Reader) { + enc.ReaderToEscaped(r) +} + +// ReaderToEscaped adds a string to be encoded (read from provided reader) +// must be used inside a slice or array encoding (does not encode a key) +func (enc *Encoder) ReaderToEscaped(r io.Reader) { + prevRune := enc.getPreviousRune() + if prevRune != '[' { + enc.writeTwoBytes(',', '"') + } else { + enc.writeByte('"') + } + _, err := io.Copy((*escapingWriterEncoder)(enc), r) + if err != nil { + enc.err = err + } + enc.writeByte('"') +} + +// AddReaderToEscapedKey adds a string to be encoded (read from provided reader) +// must be used inside an object as it will encode a key +func (enc *Encoder) AddReaderToEscapedKey(key string, r io.Reader) { + enc.ReaderToEscapedKey(key, r) +} + +// ReaderToEscapedKey adds a string to be encoded (read from provided reader) +// must be used inside an object as it will encode a key +func (enc *Encoder) ReaderToEscapedKey(key string, r io.Reader) { + if enc.hasKeys { + if !enc.keyExists(key) { + return + } + } + enc.grow(2 + len(key)) + prevRune := enc.getPreviousRune() + if prevRune != '{' { + enc.writeTwoBytes(',', '"') + } else { + enc.writeByte('"') + } + enc.writeStringEscape(key) + enc.writeBytes(objKey) + enc.writeByte('"') + _, enc.err = io.Copy((*escapingWriterEncoder)(enc), r) + enc.writeByte('"') +} diff --git a/encode_reader_test.go b/encode_reader_test.go new file mode 100644 index 0000000..b664a14 --- /dev/null +++ b/encode_reader_test.go @@ -0,0 +1,130 @@ +package gojay + +import ( + "encoding/base64" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestEncodeToBase64(t *testing.T) { + type testCase struct { + source io.Reader + baseJSON string + expectedJSON string + } + + f := func(tc testCase) { + t.Helper() + + var b strings.Builder + var enc = NewEncoder(&b) + enc.writeString(tc.baseJSON) + enc.AddReaderToBase64(tc.source, base64.StdEncoding) + enc.Write() + assert.Equal(t, tc.expectedJSON, b.String()) + } + + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03"), + baseJSON: `[`, + expectedJSON: `["c29tZSBsb25nIHN0cmluZyB0byBiZSBlbmNvZGVkAAECAw=="`, + }) + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03"), + baseJSON: `["aaa",123`, + expectedJSON: `["aaa",123,"c29tZSBsb25nIHN0cmluZyB0byBiZSBlbmNvZGVkAAECAw=="`, + }) +} + +func TestEncodeToBase64Key(t *testing.T) { + type testCase struct { + source io.Reader + baseJSON string + expectedJSON string + } + + f := func(tc testCase) { + t.Helper() + + var b strings.Builder + var enc = NewEncoder(&b) + enc.writeString(tc.baseJSON) + enc.AddReaderToBase64Key("key", tc.source, base64.StdEncoding) + enc.Write() + assert.Equal(t, tc.expectedJSON, b.String()) + } + + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03"), + baseJSON: `{`, + expectedJSON: `{"key":"c29tZSBsb25nIHN0cmluZyB0byBiZSBlbmNvZGVkAAECAw=="`, + }) + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03"), + baseJSON: `{"a":"b"`, + expectedJSON: `{"a":"b","key":"c29tZSBsb25nIHN0cmluZyB0byBiZSBlbmNvZGVkAAECAw=="`, + }) +} + +func TestEncodeToEscaped(t *testing.T) { + type testCase struct { + source io.Reader + baseJSON string + expectedJSON string + } + + f := func(tc testCase) { + t.Helper() + + var b strings.Builder + var enc = NewEncoder(&b) + enc.writeString(tc.baseJSON) + enc.AddReaderToEscaped(tc.source) + enc.Write() + assert.Equal(t, tc.expectedJSON, b.String()) + } + + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03テュールスト マ\\ーテ\nィン ヤコブ 😁\t"), + baseJSON: `[`, + expectedJSON: `["some long string to be encoded\u0000\u0001\u0002\u0003テュールスト マ\\ーテ\nィン ヤコブ 😁\t"`, + }) + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03テュールスト マ\\ーテ\nィン ヤコブ 😁\t"), + baseJSON: `["aaa",123`, + expectedJSON: `["aaa",123,"some long string to be encoded\u0000\u0001\u0002\u0003テュールスト マ\\ーテ\nィン ヤコブ 😁\t"`, + }) +} + +func TestEncodeToEscapedKey(t *testing.T) { + type testCase struct { + source io.Reader + baseJSON string + expectedJSON string + } + + f := func(tc testCase) { + t.Helper() + + var b strings.Builder + var enc = NewEncoder(&b) + enc.writeString(tc.baseJSON) + enc.AddReaderToEscapedKey("key", tc.source) + enc.Write() + assert.Equal(t, tc.expectedJSON, b.String()) + } + + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03テュールスト マ\\ーテ\nィン ヤコブ 😁\t"), + baseJSON: `{`, + expectedJSON: `{"key":"some long string to be encoded\u0000\u0001\u0002\u0003テュールスト マ\\ーテ\nィン ヤコブ 😁\t"`, + }) + f(testCase{ + source: strings.NewReader("some long string to be encoded\x00\x01\x02\x03テュールスト マ\\ーテ\nィン ヤコブ 😁\t"), + baseJSON: `{"a":"b"`, + expectedJSON: `{"a":"b","key":"some long string to be encoded\u0000\u0001\u0002\u0003テュールスト マ\\ーテ\nィン ヤコブ 😁\t"`, + }) +}