Skip to content
This repository has been archived by the owner on Apr 1, 2024. It is now read-only.

Commit

Permalink
Merge pull request #16 from lincc-frameworks/ts-ext-array-meta-data
Browse files Browse the repository at this point in the history
TsExtensionArray.list_offsets
  • Loading branch information
hombit authored Feb 2, 2024
2 parents 60fff6f + 72d9ebf commit 2c01f15
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/pandas_ts/packer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def pack_flat_into_df(df: pd.DataFrame, name=None) -> pd.DataFrame:
Output dataframe.
"""
# TODO: we can optimize name=None case a bit
struct_series = pack_flat(df)
struct_series = pack_flat(df, name=name)
packed_df = struct_series.struct.explode()
if name is not None:
packed_df[name] = struct_series
Expand Down
9 changes: 9 additions & 0 deletions src/pandas_ts/ts_ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,12 @@ def _validate(array: pa.ChunkedArray) -> None:
# compare offsets from the first list array with the current one
if not first_list_array.offsets.equals(list_array.offsets):
raise ValueError("Offsets of all ListArrays must be the same")

@property
def list_offsets(self) -> pa.ChunkedArray:
"""The list offsets of the field arrays.
It is a chunk array of list offsets of the first field array.
(All fields must have the same offsets.)
"""
return pa.chunked_array([chunk.field(0).offsets for chunk in self._pa_array.iterchunks()])
15 changes: 15 additions & 0 deletions tests/pandas_ts/test_ts_ext_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import pyarrow as pa
import pytest
from numpy.testing import assert_array_equal
from pandas.testing import assert_series_equal

from pandas_ts import TsDtype
Expand Down Expand Up @@ -91,3 +92,17 @@ def test_series_built_raises(data):
pa_array = pa.array(data)
with pytest.raises(ValueError):
_array = TsExtensionArray(pa_array)


def test_list_offsets():
struct_array = pa.StructArray.from_arrays(
arrays=[
pa.array([np.array([1, 2, 3]), np.array([1, 2, 1])], type=pa.list_(pa.uint8())),
pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0])]),
],
names=["a", "b"],
)
ext_array = TsExtensionArray(struct_array)

desired = pa.chunked_array([pa.array([0, 3, 6])])
assert_array_equal(ext_array.list_offsets, desired)

0 comments on commit 2c01f15

Please sign in to comment.