Skip to content

Commit

Permalink
Merge pull request #37 from lincc-frameworks/sean/from_ddf_index
Browse files Browse the repository at this point in the history
Keep index when generating meta
  • Loading branch information
smcguire-cmu authored Jun 27, 2024
2 parents cb844e6 + af6febe commit 0f4963e
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/nested_dask/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@ def make_meta_frame(x, index=None) -> npd.NestedFrame:
"""Create an empty NestedFrame to use as Dask's underlying object meta."""

dtypes = x.dtypes.to_dict()
result = npd.NestedFrame({key: pd.Series(dtype=d) for key, d in dtypes.items()})
index = index if index is not None else x.index
index = index[:0].copy()
result = npd.NestedFrame({key: pd.Series(dtype=d) for key, d in dtypes.items()}, index=index)
return result


@meta_nonempty.register(npd.NestedFrame)
def _nonempty_nestedframe(x, index=None) -> npd.NestedFrame:
"""Construct a new NestedFrame with the same underlying data."""
df = meta_nonempty_dataframe(x)
if index is not None:
df.index = index
return npd.NestedFrame(df)


Expand Down
4 changes: 2 additions & 2 deletions src/nested_dask/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def from_nested_pandas(
return NestedFrame.from_dask_dataframe(result)

@classmethod
def from_dask_dataframe(cls, df) -> NestedFrame:
def from_dask_dataframe(cls, df: dd.DataFrame) -> NestedFrame:
"""Converts a Dask Dataframe to a Dask-Nested NestedFrame
Parameters
Expand All @@ -110,7 +110,7 @@ def from_dask_dataframe(cls, df) -> NestedFrame:
-------
`nested_dask.NestedFrame`
"""
return df.map_partitions(npd.NestedFrame)
return df.map_partitions(npd.NestedFrame, meta=npd.NestedFrame(df._meta.copy()))

def compute(self, **kwargs):
"""Compute this Dask collection, returning the underlying dataframe or series."""
Expand Down
13 changes: 13 additions & 0 deletions tests/nested_dask/test_nestedframe.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dask.dataframe as dd
import nested_dask as nd
import numpy as np
import pandas as pd
Expand All @@ -12,6 +13,18 @@ def test_nestedframe_construction(test_dataset):
assert isinstance(test_dataset["nested"].dtype, NestedDtype)


def test_nestedframe_from_dask_keeps_index_name():
"""test index name is set in from_dask_dataframe"""
index_name = "test"
a = pd.DataFrame({"a": [1, 2, 3]})
a.index.name = index_name
ddf = dd.from_pandas(a)
assert ddf.index.name == index_name
ndf = nd.NestedFrame.from_dask_dataframe(ddf)
assert isinstance(ndf, nd.NestedFrame)
assert ndf.index.name == index_name


def test_all_columns(test_dataset):
"""all_columns property test"""
all_cols = test_dataset.all_columns
Expand Down

0 comments on commit 0f4963e

Please sign in to comment.