Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions for stream encoding/decoding of big strings #133

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions decode_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,28 @@ func (dec *Decoder) parseEscapedString() error {
case 't':
dec.data[dec.cursor] = '\t'
case 'u':
start := dec.cursor
start := dec.cursor - 1 // position of '\' for decoded string placement
dec.cursor++
str, err := dec.parseUnicode()
if err != nil {
return err
}
diff := dec.cursor - start
dec.data = append(append(dec.data[:start-1], str...), dec.data[dec.cursor:]...)
dec.length = len(dec.data)
dec.cursor += len(str) - diff - 1
// replace '\u...' with decoded runes
// it's possible to do it in-place (without appends) because size of encoded sequence (already in buffer)
// always greater than size of decoded one (encoding overhead at least 2 bytes ('\u') per one rune)
n := copy(dec.data[start:], str)
// and shift buffer left (on difference between length of encoded and decoded runes)
end := start + n
copy(dec.data[end:], dec.data[dec.cursor:])
dec.length -= dec.cursor - end
dec.cursor = end

return nil
default:
return dec.raiseInvalidJSONErr(dec.cursor)
}

dec.data = append(dec.data[:dec.cursor-1], dec.data[dec.cursor:]...)
copy(dec.data[dec.cursor-1:], dec.data[dec.cursor:])
dec.length--

// Since we've lost a character, our dec.cursor offset is now
Expand Down
118 changes: 118 additions & 0 deletions decode_writer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package gojay

import (
"encoding/base64"
"io"
)

type stringDecodeReader Decoder

func (dec *stringDecodeReader) copyAndShift(target []byte, start int) int {
n := copy(target, dec.data[start:dec.cursor])
// shift buffer left to reuse it between reads
copy(dec.data[start:], dec.data[start+n:])
dec.length -= n
dec.cursor = start
return n
}

func (dec *stringDecodeReader) Read(b []byte) (int, error) {
start := dec.cursor
for dec.cursor-start < len(b) && (dec.cursor < dec.length || (*Decoder)(dec).read()) {
switch dec.data[dec.cursor] {
// string end
case '"':
// making copy before exit because parseEscapedString may change
// previous (before cursor) bytes of data buffer
n := dec.copyAndShift(b, start)
dec.cursor++
return n, io.EOF
// escape sequence
case '\\':
dec.cursor++
err := (*Decoder)(dec).parseEscapedString()
if err != nil {
return 0, err
}
default:
dec.cursor++
}
}

if dec.cursor-start < len(b) {
// input buffer not filled and exited before EOF
// that means json is invalid
return 0, (*Decoder)(dec).raiseInvalidJSONErr(dec.cursor)
}

return dec.copyAndShift(b, start), nil
}

func (dec *Decoder) decodeStringStream() (*stringDecodeReader, error) {
for ; dec.cursor < dec.length || dec.read(); dec.cursor++ {
switch dec.data[dec.cursor] {
case ' ', '\n', '\t', '\r', ',':
// is string
continue
case '"':
dec.cursor++
return (*stringDecodeReader)(dec), nil
// is nil
case 'n':
dec.cursor++
return nil, dec.assertNull()
default:
dec.err = dec.makeInvalidUnmarshalErr((*stringDecodeReader)(nil))
return nil, dec.skipData()
}
}
return nil, nil
}

// AddWriterFromEscaped decodes the JSON value within an object or an array to a provided writer.
// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned.
func (dec *Decoder) AddWriterFromEscaped(w io.Writer) error {
return dec.WriterFromEscaped(w)
}

// WriterFromEscaped decodes the JSON value within an object or an array to a provided writer.
// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned.
func (dec *Decoder) WriterFromEscaped(w io.Writer) error {
reader, err := dec.decodeStringStream()
if err != nil {
return err
}
dec.called |= 1
if reader == nil {
return nil
}
_, err = io.Copy(w, reader)
if err != nil {
return err
}
return nil
}

// AddWriterFromBase64 decodes the JSON value (base64-encoded data) within an object or an array to a provided writer.
// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned.
func (dec *Decoder) AddWriterFromBase64(w io.Writer, encoding *base64.Encoding) error {
return dec.WriterFromBase64(w, encoding)
}

// WriterFromEscaped decodes the JSON value (base64-encoded data) within an object or an array to a provided writer.
// If next key is not a JSON string nor null, InvalidUnmarshalError will be returned.
func (dec *Decoder) WriterFromBase64(w io.Writer, encoding *base64.Encoding) error {
reader, err := dec.decodeStringStream()
if err != nil {
return err
}
dec.called |= 1
if reader == nil {
return nil
}
_, err = io.Copy(w, base64.NewDecoder(encoding, reader))
if err != nil {
return err
}
return nil
}
Loading