Skip to content

Commit

Permalink
Packer tests for reused offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
hombit committed May 30, 2024
1 parent a251f2c commit 952c98d
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions tests/nested_pandas/series/test_packer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
from pandas.testing import assert_frame_equal, assert_series_equal


def offsets_reused(nested_series):
"""Check if the offset buffers are reused for all columns of the nested series"""
lists_df = nested_series.nest.to_lists()
first_offset_buffers = None
for column in lists_df.columns:
offset_buffers = pa.array(lists_df[column]).offsets.buffers()
if first_offset_buffers is None:
first_offset_buffers = offset_buffers
assert offset_buffers == first_offset_buffers


def test_pack_with_flat_df():
"""Test pack(pd.DataFrame)."""
df = pd.DataFrame(
Expand All @@ -28,6 +39,7 @@ def test_pack_with_flat_df():
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
name="series",
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand All @@ -51,6 +63,7 @@ def test_pack_with_flat_df_and_index():
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
name="series",
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand Down Expand Up @@ -85,6 +98,7 @@ def test_pack_with_series_of_dfs():
name="nested",
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand All @@ -109,7 +123,7 @@ def test_pack_flat():
index=[1, 2, 3, 4],
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
)

offsets_reused(actual)
assert_series_equal(actual, desired)


Expand All @@ -134,7 +148,7 @@ def test_pack_sorted_df_into_struct():
index=[1, 2, 3, 4],
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
)

offsets_reused(actual)
assert_series_equal(actual, desired)


Expand Down Expand Up @@ -172,6 +186,7 @@ def test_pack_lists():
dtype=pd.ArrowDtype(pa.list_(pa.int64())),
)
series = packer.pack_lists(packed_df)
offsets_reused(series)

for field_name in packed_df.columns:
assert_series_equal(series.nest.get_list_series(field_name), packed_df[field_name])
Expand Down Expand Up @@ -221,6 +236,7 @@ def test_pack_seq_with_dfs_and_index():
index=[100, 101, 102, 103],
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand Down Expand Up @@ -249,6 +265,7 @@ def test_pack_seq_with_different_elements_and_index():
index=[100, 101, 102, 103],
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand Down Expand Up @@ -290,6 +307,7 @@ def test_pack_seq_with_series_of_dfs():
dtype=NestedDtype.from_fields(dict(a=pa.int64(), b=pa.int64())),
name="series",
)
offsets_reused(series)
assert_series_equal(series, desired)


Expand Down

0 comments on commit 952c98d

Please sign in to comment.