Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for nested serializers, e.g., a dict with dfs #136

Merged
merged 1 commit into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions app/serializers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import json

from . import default_serializer, numpy_serializer, pandas_serializer
from .framework import Serializer, datetime_encoder, serializers
from .framework import Serializer, custom_encoder, serializers


def serialize(obj, serializer_name="default"):
serializer = serializers.get(type(obj), serializers.get(serializer_name))
if serializer is None:
raise ValueError(f"No serializer registered for object of type {type(obj)}")
return json.dumps(serializer.serialize(obj), default=datetime_encoder)
return json.dumps(serializer.serialize(obj), default=custom_encoder)


def deserialize(payload, serializer_name="default"):
payload = json.loads(payload)
serializer_name = payload.get("serializer", serializer_name)
serializer = serializers.get(serializer_name, serializers.get(serializer_name))
serializer = serializers.get(serializer_name)
if serializer is None:
raise ValueError(f"No serializer registered with name '{serializer_name}'")
return serializer.deserialize(payload)
4 changes: 2 additions & 2 deletions app/serializers/default_serializer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .framework import Serializer, convert_iso_strings_to_datetime
from .framework import Serializer, custom_decoder


class DefaultSerializer(Serializer):
Expand All @@ -13,7 +13,7 @@ def serialize(cls, obj):

@classmethod
def deserialize(cls, payload):
return convert_iso_strings_to_datetime(payload["data"])
return custom_decoder(payload["data"])


DefaultSerializer.register()
18 changes: 12 additions & 6 deletions app/serializers/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,25 @@ def register(cls, *types):


# Custom encoders/decoders
def datetime_encoder(obj):
def custom_encoder(obj):
if isinstance(obj, dt.datetime):
return obj.isoformat()
valid_types = tuple(cls for cls in serializers.keys() if isinstance(cls, type))
if isinstance(obj, valid_types):
serializer = serializers.get(type(obj))
if serializer:
return serializer.serialize(obj)
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")


def convert_iso_strings_to_datetime(obj):
def custom_decoder(obj):
if isinstance(obj, list):
return [convert_iso_strings_to_datetime(item) for item in obj]
return [custom_decoder(item) for item in obj]
elif isinstance(obj, dict):
return {
key: convert_iso_strings_to_datetime(value) for key, value in obj.items()
}
serializer = serializers.get(obj.get("serializer"))
if serializer:
return serializer.deserialize(obj)
return {key: custom_decoder(value) for key, value in obj.items()}
elif isinstance(obj, str):
try:
return dt.datetime.fromisoformat(obj)
Expand Down
2 changes: 1 addition & 1 deletion docs/custom_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ This turns an existing Excel range into a DataFrame. Using an Excel table as you
- For development purposes, you don't need Redis, but the cache is in-memory and thus only works with a single worker/process for as long as the app runs. More importantly, there won't be any automatic cache purging happening.
```

Right now, you can return the majority of Python data types such as simple lists, dictionaries, and tuples. NumPy arrays and pandas DataFrames/Series are also supported. However, more complex objects like a dictionary that holds a pandas DataFrame isn't supported yet.
You can return the majority of Python data types such as simple lists, dictionaries, and tuples. NumPy arrays and pandas DataFrames/Series are also supported. For unsupported data types, a custom serializer can be written and registered (see `app/serializers/pandas_serializer.py` for an example).

The object handles are stored in the cache using a key that derives from the add-in installation, workbook name and cell address, i.e, objects are not shared across different Excel installations or users.

Expand Down
58 changes: 58 additions & 0 deletions tests/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,61 @@ def test_series(test_input):
def test_numpy():
data = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
assert_array_equal(data, deserialize(serialize(data)))


def test_dict_of_df():
df1 = pd.DataFrame(
{
"ints": [1, 2],
"b": ["a", "b"],
"date time": [
dt.datetime(2022, 12, 1, 10, 33),
dt.datetime(2022, 12, 2, 10, 34),
],
"bool": [True, False],
"floats": [1.1, 2.2],
}
)
df2 = pd.DataFrame(
{
"ints": [11, 22],
"b": ["a", "b"],
"date time": [
dt.datetime(2022, 12, 1, 10, 33),
dt.datetime(2022, 12, 2, 10, 34),
],
"bool": [True, False],
"floats": [1.1, 2.2],
}
)
data = {"df1": df1, "df2": df2}
data2 = deserialize(serialize(data))
assert_frame_equal(data["df1"], data2["df1"])
assert_frame_equal(data["df2"], data2["df2"])


def test_dict_of_ndarray():
arr1 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
arr2 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
data1 = {"arr1": arr1, "arr2": arr2}
data2 = deserialize(serialize(data1))
assert_array_equal(data1["arr1"], data2["arr1"])
assert_array_equal(data1["arr1"], data2["arr2"])


def test_list_of_ndarray():
arr1 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
arr2 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
data1 = [arr1, arr2]
data2 = deserialize(serialize(data1))
assert_array_equal(data1[0], data2[0])
assert_array_equal(data1[1], data2[1])


def test_tuple_of_ndarray():
arr1 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
arr2 = np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]])
data1 = (arr1, arr2)
data2 = deserialize(serialize(data1))
assert_array_equal(data1[0], data2[0])
assert_array_equal(data1[1], data2[1])
Loading