Skip to content

Commit

Permalink
feat(scala): optimize scala class serialization (#1853)
Browse files Browse the repository at this point in the history
## What does this PR do?

- Optimize scala Iterable type serialization
- Reduce type name writing for msot scala collection and factory types
- Add serializers for ToFactory type
## Related issues

<!--
Is there any related issue? Please attach here.

- #xxxx0
- #xxxx1
- #xxxx2
-->

## Does this PR introduce any user-facing change?

<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->

- [ ] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?

## Benchmark

<!--
When the PR has an impact on performance (if you don't know whether the
PR will have an impact on performance, you can submit the PR first, and
if it will have impact on performance, the code reviewer will explain
it), be sure to attach a benchmark data here.
-->
  • Loading branch information
chaokunyang authored Sep 25, 2024
1 parent df5bd78 commit 3d55929
Show file tree
Hide file tree
Showing 9 changed files with 421 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.fury.meta.DeflaterMetaCompressor;
import org.apache.fury.meta.MetaCompressor;
import org.apache.fury.pool.ThreadPoolFury;
import org.apache.fury.reflect.ReflectionUtils;
import org.apache.fury.resolver.ClassResolver;
import org.apache.fury.serializer.JavaSerializer;
import org.apache.fury.serializer.ObjectStreamSerializer;
Expand Down Expand Up @@ -316,6 +317,16 @@ public FuryBuilder withAsyncCompilation(boolean asyncCompilation) {
/** Whether enable scala-specific serialization optimization. */
public FuryBuilder withScalaOptimizationEnabled(boolean enableScalaOptimization) {
this.scalaOptimizationEnabled = enableScalaOptimization;
if (enableScalaOptimization) {
try {
Class.forName(
ReflectionUtils.getPackage(Fury.class) + ".serializer.scala.ScalaSerializers");
} catch (ClassNotFoundException e) {
LOG.warn(
"`fury-scala` library is not in the classpath, please add it to class path and invoke "
+ "`org.apache.fury.serializer.scala.ScalaSerializers.registerSerializers` for peek performance");
}
}
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,10 @@ public void register(Class<?> cls) {
}
}

public void register(String className) {
register(loadClass(className, false, 0, false));
}

public void register(Class<?>... classes) {
for (Class<?> cls : classes) {
register(cls);
Expand Down Expand Up @@ -481,13 +485,21 @@ public void register(Class<?> cls, int classId) {
extRegistry.classIdGenerator++;
}

public void register(String className, int classId) {
register(loadClass(className, false, 0, false), classId);
}

public void register(Class<?> cls, Short id, boolean createSerializer) {
register(cls, id);
if (createSerializer) {
createSerializerAhead(cls);
}
}

public void register(String className, Short classId, boolean createSerializer) {
register(loadClass(className, false, 0, false), classId, createSerializer);
}

public boolean isRegistered(Class<?> cls) {
return extRegistry.registeredClassIdMap.get(cls) != null;
}
Expand Down Expand Up @@ -1807,6 +1819,11 @@ private Class<?> loadClass(ClassSpec classSpec) {
}

private Class<?> loadClass(String className, boolean isEnum, int arrayDims) {
return loadClass(className, isEnum, arrayDims, fury.getConfig().deserializeNonexistentClass());
}

private Class<?> loadClass(
String className, boolean isEnum, int arrayDims, boolean deserializeNonexistentClass) {
extRegistry.classChecker.checkClass(this, className);
try {
return Class.forName(className, false, fury.getClassLoader());
Expand All @@ -1818,7 +1835,7 @@ private Class<?> loadClass(String className, boolean isEnum, int arrayDims) {
String.format(
"Class %s not found from classloaders [%s, %s]",
className, fury.getClassLoader(), Thread.currentThread().getContextClassLoader());
if (fury.getConfig().deserializeNonexistentClass()) {
if (deserializeNonexistentClass) {
LOG.warn(msg);
return NonexistentClass.getNonexistentClass(
className, isEnum, arrayDims, metaContextShareEnabled);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public Serializer createSerializer(Fury fury, Class<?> clz) {
return new ScalaSortedSetSerializer(fury, clz);
} else if (scala.collection.Seq.class.isAssignableFrom(clz)) {
return new ScalaSeqSerializer(fury, clz);
} else if (scala.collection.Iterable.class.isAssignableFrom(clz)) {
return new ScalaCollectionSerializer(fury, clz);
}
if (DefaultSerializable.class.isAssignableFrom(clz)) {
Method replaceMethod = JavaSerializer.getWriteReplaceMethod(clz);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fury.serializer.scala;

import org.apache.fury.Fury;
import org.apache.fury.resolver.ClassResolver;
import org.apache.fury.serializer.Serializer;
import org.apache.fury.serializer.SerializerFactory;
import scala.collection.immutable.NumericRange;
import scala.collection.immutable.Range;

import static org.apache.fury.serializer.scala.ToFactorySerializers.IterableToFactoryClass;
import static org.apache.fury.serializer.scala.ToFactorySerializers.MapToFactoryClass;

public class ScalaSerializers {

public static void registerSerializers(Fury fury) {
ClassResolver resolver = setSerializerFactory(fury);

resolver.registerSerializer(IterableToFactoryClass, new ToFactorySerializers.IterableToFactorySerializer(fury));
resolver.registerSerializer(MapToFactoryClass, new ToFactorySerializers.MapToFactorySerializer(fury));

// Seq
resolver.register(scala.collection.immutable.Seq.class);
resolver.register(scala.collection.immutable.Nil$.class);
resolver.register(scala.collection.immutable.List$.class);
resolver.register(scala.collection.immutable.$colon$colon.class);
// StrictOptimizedSeqFactory -> ... extends -> IterableFactory
resolver.register(scala.collection.immutable.Vector$.class);
resolver.register("scala.collection.immutable.VectorImpl");
resolver.register("scala.collection.immutable.Vector0");
resolver.register("scala.collection.immutable.Vector1");
resolver.register("scala.collection.immutable.Vector2");
resolver.register("scala.collection.immutable.Vector3");
resolver.register("scala.collection.immutable.Vector4");
resolver.register("scala.collection.immutable.Vector5");
resolver.register("scala.collection.immutable.Vector6");
resolver.register(scala.collection.immutable.Queue.class);
resolver.register(scala.collection.immutable.Queue$.class);
resolver.register(scala.collection.immutable.LazyList.class);
resolver.register(scala.collection.immutable.LazyList$.class);
resolver.register(scala.collection.immutable.ArraySeq.class);
resolver.register(scala.collection.immutable.ArraySeq$.class);

// Set
resolver.register(scala.collection.immutable.Set.class);
// IterableFactory
resolver.register(scala.collection.immutable.Set$.class);
resolver.register(scala.collection.immutable.Set.Set1.class);
resolver.register(scala.collection.immutable.Set.Set2.class);
resolver.register(scala.collection.immutable.Set.Set3.class);
resolver.register(scala.collection.immutable.Set.Set4.class);
resolver.register(scala.collection.immutable.HashSet.class);
resolver.register(scala.collection.immutable.TreeSet.class);
// SortedIterableFactory
resolver.register(scala.collection.immutable.TreeSet$.class);
// IterableFactory
resolver.register(scala.collection.immutable.HashSet$.class);
resolver.register(scala.collection.immutable.ListSet.class);
resolver.register(scala.collection.immutable.ListSet$.class);
resolver.register("scala.collection.immutable.Set$EmptySet$");
resolver.register("scala.collection.immutable.SetBuilderImpl");
resolver.register("scala.collection.immutable.SortedMapOps$ImmutableKeySortedSet");

// Map
resolver.register(scala.collection.immutable.Map.class);
resolver.register(scala.collection.immutable.Map$.class);
resolver.register(scala.collection.immutable.Map.Map1.class);
resolver.register(scala.collection.immutable.Map.Map2.class);
resolver.register(scala.collection.immutable.Map.Map3.class);
resolver.register(scala.collection.immutable.Map.Map4.class);
resolver.register(scala.collection.immutable.Map.WithDefault.class);
resolver.register("scala.collection.immutable.MapBuilderImpl");
resolver.register("scala.collection.immutable.Map$EmptyMap$");
resolver.register("scala.collection.immutable.SeqMap$EmptySeqMap$");
resolver.register(scala.collection.immutable.HashMap.class);
resolver.register(scala.collection.immutable.HashMap$.class);
resolver.register(scala.collection.immutable.TreeMap.class);
resolver.register(scala.collection.immutable.TreeMap$.class);
resolver.register(scala.collection.immutable.SortedMap$.class);
resolver.register(scala.collection.immutable.TreeSeqMap.class);
resolver.register(scala.collection.immutable.TreeSeqMap$.class);
resolver.register(scala.collection.immutable.ListMap.class);
resolver.register(scala.collection.immutable.ListMap$.class);
resolver.register(scala.collection.immutable.IntMap.class);
resolver.register(scala.collection.immutable.IntMap$.class);
resolver.register(scala.collection.immutable.LongMap.class);
resolver.register(scala.collection.immutable.LongMap$.class);

// Range
resolver.register(Range.Inclusive.class);
resolver.register(Range.Exclusive.class);
resolver.register(NumericRange.class);
resolver.register(NumericRange.Inclusive.class);
resolver.register(NumericRange.Exclusive.class);

resolver.register(scala.collection.generic.SerializeEnd$.class);
resolver.register(scala.collection.generic.DefaultSerializationProxy.class);
resolver.register(scala.runtime.ModuleSerializationProxy.class);

// mutable collection types
resolver.register(scala.collection.mutable.StringBuilder.class);
resolver.register(scala.collection.mutable.ArrayBuffer.class);
resolver.register(scala.collection.mutable.ArrayBuffer$.class);
resolver.register(scala.collection.mutable.ArraySeq.class);
resolver.register(scala.collection.mutable.ArraySeq$.class);
resolver.register(scala.collection.mutable.ListBuffer.class);
resolver.register(scala.collection.mutable.ListBuffer$.class);
resolver.register(scala.collection.mutable.Buffer$.class);
resolver.register(scala.collection.mutable.ArrayDeque.class);
resolver.register(scala.collection.mutable.ArrayDeque$.class);

resolver.register(scala.collection.mutable.HashSet.class);
resolver.register(scala.collection.mutable.HashSet$.class);
resolver.register(scala.collection.mutable.TreeSet.class);
resolver.register(scala.collection.mutable.TreeSet$.class);

resolver.register(scala.collection.mutable.HashMap.class);
resolver.register(scala.collection.mutable.HashMap$.class);
resolver.register(scala.collection.mutable.TreeMap.class);
resolver.register(scala.collection.mutable.TreeMap$.class);
resolver.register(scala.collection.mutable.LinkedHashMap.class);
resolver.register(scala.collection.mutable.LinkedHashMap$.class);
resolver.register(scala.collection.mutable.LinkedHashSet.class);
resolver.register(scala.collection.mutable.LinkedHashSet$.class);
resolver.register(scala.collection.mutable.LongMap.class);
resolver.register(scala.collection.mutable.LongMap$.class);

resolver.register(scala.collection.mutable.Queue.class);
resolver.register(scala.collection.mutable.Queue$.class);
resolver.register(scala.collection.mutable.Stack.class);
resolver.register(scala.collection.mutable.Stack$.class);
resolver.register(scala.collection.mutable.BitSet.class);
resolver.register(scala.collection.mutable.BitSet$.class);
}

private static ClassResolver setSerializerFactory(Fury fury) {
ClassResolver resolver = fury.getClassResolver();
ScalaDispatcher dispatcher = new ScalaDispatcher();
SerializerFactory factory = resolver.getSerializerFactory();
if (factory != null) {
SerializerFactory newFactory = (f, cls) -> {
Serializer serializer = factory.createSerializer(f, cls);
if (serializer == null) {
serializer = dispatcher.createSerializer(f, cls);
}
return serializer;
};
resolver.setSerializerFactory(newFactory);
} else {
resolver.setSerializerFactory(dispatcher);
}
return resolver;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.fury.serializer.scala;

import org.apache.fury.Fury;
import org.apache.fury.memory.MemoryBuffer;
import org.apache.fury.memory.Platform;
import org.apache.fury.reflect.ReflectionUtils;
import org.apache.fury.serializer.Serializer;

import java.lang.reflect.Field;

public class ToFactorySerializers {
static final Class<?> IterableToFactoryClass = ReflectionUtils.loadClass(
"scala.collection.IterableFactory$ToFactory");
static final Class<?> MapToFactoryClass = ReflectionUtils.loadClass(
"scala.collection.MapFactory$ToFactory");

public static class IterableToFactorySerializer extends Serializer {
private static final long fieldOffset;

static {
try {
// for graalvm field offset auto rewrite
Field field = Class.forName("scala.collection.IterableFactory$ToFactory").getDeclaredField("factory");
fieldOffset = Platform.objectFieldOffset(field);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}

public IterableToFactorySerializer(Fury fury) {
super(fury, IterableToFactoryClass);
}

@Override
public void write(MemoryBuffer buffer, Object value) {
fury.writeRef(buffer, Platform.getObject(value, fieldOffset));
}

@Override
public Object read(MemoryBuffer buffer) {
Object o = Platform.newInstance(type);
Platform.putObject(o, fieldOffset, fury.readRef(buffer));
return o;
}
}

public static class MapToFactorySerializer extends Serializer {
private static final long fieldOffset;

static {
try {
// for graalvm field offset auto rewrite
Field field = Class.forName("scala.collection.MapFactory$ToFactory").getDeclaredField("factory");
fieldOffset = Platform.objectFieldOffset(field);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}

public MapToFactorySerializer(Fury fury) {
super(fury, MapToFactoryClass);
}

@Override
public void write(MemoryBuffer buffer, Object value) {
fury.writeRef(buffer, Platform.getObject(value, fieldOffset));
}

@Override
public Object read(MemoryBuffer buffer) {
Object o = Platform.newInstance(type);
Platform.putObject(o, fieldOffset, fury.readRef(buffer));
return o;
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ class CollectionSerializerTest extends AnyWordSpec with Matchers {
.withLanguage(Language.JAVA)
.withRefTracking(true)
.withScalaOptimizationEnabled(setOpt)
.requireClassRegistration(false).build()
.requireClassRegistration(false)
.suppressClassRegistrationWarnings(false)
.build()
ScalaSerializers.registerSerializers(fury1)
if (setFactory) {
fury1.getClassResolver.setSerializerFactory(new ScalaDispatcher())
}
Expand All @@ -43,6 +46,8 @@ class CollectionSerializerTest extends AnyWordSpec with Matchers {
"serialize/deserialize List" in {
val list = List(100, 10000L)
fury1.deserialize(fury1.serialize(list)) shouldEqual list
val list2 = List(100, 10000L, 10000L, 10000L)
fury1.deserialize(fury1.serialize(list2)) shouldEqual list2
}
"serialize/deserialize empty List" in {
fury1.deserialize(fury1.serialize(List.empty)) shouldEqual List.empty
Expand Down
Loading

0 comments on commit 3d55929

Please sign in to comment.