Skip to content

Commit

Permalink
feat(java/python/golang): Implement the magicNumber feature (#1539)
Browse files Browse the repository at this point in the history
<!--
**Thanks for contributing to Fury.**

**If this is your first time opening a PR on fury, you can refer to
[CONTRIBUTING.md](https://github.com/apache/incubator-fury/blob/main/CONTRIBUTING.md).**

Contribution Checklist

- The **Apache Fury (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/incubator-fury/blob/main/CONTRIBUTING.md).

- Fury has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->

## What does this PR do?

<!-- Describe the purpose of this PR. -->
This PR implements the MagicNumer feature in
[xlang_serialization_spec.md](https://github.com/apache/incubator-fury/blob/main/docs/specification/xlang_serialization_spec.md#fury-header).


## Related issues

<!--
Is there any related issue? Please attach here.

- #xxxx0
- #xxxx1
- #xxxx2
-->


## Does this PR introduce any user-facing change?

<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/incubator-fury/issues/new/choose)
describing the need to do so and update the document if necessary.
-->

- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?


## Benchmark

<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->

---------

Signed-off-by: LiangliangSui <[email protected]>
  • Loading branch information
LiangliangSui authored Apr 19, 2024
1 parent 66b7598 commit ec01574
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 0 deletions.
18 changes: 18 additions & 0 deletions go/fury/fury.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ const (
isOutOfBandFlag
)

const MAGIC_NUMBER int16 = 0x62D4

type Fury struct {
typeResolver *typeResolver
refResolver *RefResolver
Expand Down Expand Up @@ -121,6 +123,11 @@ func (f *Fury) Serialize(buf *ByteBuffer, v interface{}, callback BufferCallback
buffer = f.buffer
buffer.writerIndex = 0
}
if f.language == XLANG {
buffer.WriteInt16(MAGIC_NUMBER)
} else {
return fmt.Errorf("%d language is not supported", f.language)
}
var bitmap byte = 0
if isNil(reflect.ValueOf(v)) {
bitmap |= isNilFlag
Expand Down Expand Up @@ -310,6 +317,17 @@ func (f *Fury) Unmarshal(data []byte, v interface{}) error {

func (f *Fury) Deserialize(buf *ByteBuffer, v interface{}, buffers []*ByteBuffer) error {
defer f.resetRead()
if f.language == XLANG {
magicNumber := buf.ReadInt16()
if magicNumber != MAGIC_NUMBER {
return fmt.Errorf(
"the fury xlang serialization must start with magic number 0x%x. "+
"Please check whether the serialization is based on the xlang protocol and the data didn't corrupt",
MAGIC_NUMBER)
}
} else {
return fmt.Errorf("%d language is not supported", f.language)
}
var bitmap = buf.ReadByte_()
if bitmap&isNilFlag == isNilFlag {
return nil
Expand Down
12 changes: 12 additions & 0 deletions go/fury/fury_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,18 @@ func TestSerializeStructSimple(t *testing.T) {
}
}

func TestSerializeBeginWithMagicNumber(t *testing.T) {
strSlice := []string{"str1", "str1", "", "", "str2"}
fury := NewFury(true)
bytes, err := fury.Marshal(strSlice)
require.Nil(t, err, fmt.Sprintf("serialize value %s with type %s failed: %s",
reflect.ValueOf(strSlice), reflect.TypeOf(strSlice), err))
// Contains at least two bytes.
require.True(t, len(bytes) > 2)
magicNumber := int16(bytes[0]) | (int16(bytes[1]) << 8)
require.Equal(t, magicNumber, MAGIC_NUMBER)
}

type Foo struct {
F1 int32
F2 string
Expand Down
12 changes: 12 additions & 0 deletions java/fury-core/src/main/java/org/apache/fury/Fury.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ public final class Fury implements BaseFury {
private static final boolean isLittleEndian = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
private static final byte BITMAP = isLittleEndian ? isLittleEndianFlag : 0;
private static final int BUFFER_SIZE_LIMIT = 128 * 1024;
private static final short MAGIC_NUMBER = 0x62D4;

private final Config config;
private final boolean refTracking;
Expand Down Expand Up @@ -221,6 +222,9 @@ public MemoryBuffer serialize(MemoryBuffer buffer, Object obj) {

@Override
public MemoryBuffer serialize(MemoryBuffer buffer, Object obj, BufferCallback callback) {
if (language == Language.XLANG) {
buffer.writeInt16(MAGIC_NUMBER);
}
byte bitmap = BITMAP;
if (language != Language.JAVA) {
bitmap |= isCrossLanguageFlag;
Expand Down Expand Up @@ -711,6 +715,14 @@ public Object deserialize(MemoryBuffer buffer, Iterable<MemoryBuffer> outOfBandB
if (depth != 0) {
throwDepthDeserializationException();
}
if (language == Language.XLANG) {
short magicNumber = buffer.readInt16();
assert magicNumber == MAGIC_NUMBER
: String.format(
"The fury xlang serialization must start with magic number 0x%x. Please "
+ "check whether the serialization is based on the xlang protocol and the data didn't corrupt.",
MAGIC_NUMBER);
}
byte bitmap = buffer.readByte();
if ((bitmap & isNilFlag) == isNilFlag) {
return null;
Expand Down
11 changes: 11 additions & 0 deletions python/pyfury/_fury.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@
DEFAULT_DYNAMIC_WRITE_STRING_ID = -1


MAGIC_NUMBER = 0x62D4


class MetaStringBytes:
__slots__ = (
"data",
Expand Down Expand Up @@ -697,6 +700,8 @@ def _serialize(
else:
self.buffer.writer_index = 0
buffer = self.buffer
if self.language == Language.XLANG:
buffer.write_int16(MAGIC_NUMBER)
mask_index = buffer.writer_index
# 1byte used for bit mask
buffer.grow(1)
Expand Down Expand Up @@ -843,6 +848,12 @@ def _deserialize(
self.unpickler = Unpickler(buffer)
if unsupported_objects is not None:
self._unsupported_objects = iter(unsupported_objects)
if self.language == Language.XLANG:
magic_numer = buffer.read_int16()
assert magic_numer == MAGIC_NUMBER, (
f"The fury xlang serialization must start with magic number {hex(MAGIC_NUMBER)}. "
"Please check whether the serialization is based on the xlang protocol and the data didn't corrupt."
)
reader_index = buffer.reader_index
buffer.reader_index = reader_index + 1
if get_bit(buffer, reader_index, 0):
Expand Down
10 changes: 10 additions & 0 deletions python/pyfury/_serialization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ cdef int8_t STRING_CLASS_ID = 4
cdef int8_t PICKLE_CLASS_ID = 5
cdef int8_t PICKLE_STRONG_CACHE_CLASS_ID = 6
cdef int8_t PICKLE_CACHE_CLASS_ID = 7
cdef int16_t MAGIC_NUMBER = 0x62D4
# `NOT_NULL_VALUE_FLAG` + `CLASS_ID<<1` in little-endian order
cdef int32_t NOT_NULL_PYINT_FLAG = NOT_NULL_VALUE_FLAG & 0b11111111 | \
(PYINT_CLASS_ID << 9)
Expand Down Expand Up @@ -899,6 +900,8 @@ cdef class Fury:
else:
self.buffer.writer_index = 0
buffer = self.buffer
if self.language == Language.XLANG:
buffer.write_int16(MAGIC_NUMBER)
cdef int32_t mask_index = buffer.writer_index
# 1byte used for bit mask
buffer.grow(1)
Expand Down Expand Up @@ -1058,6 +1061,13 @@ cdef class Fury:
self.unpickler = Unpickler(buffer)
if unsupported_objects is not None:
self._unsupported_objects = iter(unsupported_objects)
if self.language == Language.XLANG:
magic_numer = buffer.read_int16()
assert magic_numer == MAGIC_NUMBER, (
f"The fury xlang serialization must start with magic number {hex(MAGIC_NUMBER)}. "
"Please check whether the serialization is based on the xlang protocol and the "
"data didn't corrupt."
)
cdef int32_t reader_index = buffer.reader_index
buffer.reader_index = reader_index + 1
if get_bit(buffer, reader_index, 0):
Expand Down

0 comments on commit ec01574

Please sign in to comment.