Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Append mode with resizing [WIP] #3148

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
NotCreateableError,
SOMAError,
)
from .._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED
from .._soma_array import SOMAArray
from .._soma_object import AnySOMAObject, SOMAObject
from .._tdb_handles import RawHandle
Expand Down Expand Up @@ -1203,9 +1204,13 @@ def _write_dataframe_impl(
arrow_table = _extract_new_values_for_append(df_uri, arrow_table, context)

try:
domain = None
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
domain = ((0, int(df.shape[0]) - 1),)
soma_df = DataFrame.create(
df_uri,
schema=arrow_table.schema,
domain=domain,
platform_config=platform_config,
context=context,
)
Expand Down Expand Up @@ -1304,8 +1309,14 @@ def _create_from_matrix(
logging.log_io(None, f"START WRITING {uri}")

try:
shape: Sequence[Union[int, None]] = ()
# A SparseNDArray must be appendable in soma.io.
shape = [None for _ in matrix.shape] if cls.is_sparse else matrix.shape
if NEW_SHAPE_FEATURE_FLAG_ENABLED:
shape = tuple(int(e) for e in matrix.shape)
elif cls.is_sparse:
shape = tuple(None for _ in matrix.shape)
else:
shape = matrix.shape
soma_ndarray = cls.create(
uri,
type=pa.from_numpy_dtype(matrix.dtype),
Expand Down
5 changes: 5 additions & 0 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,11 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids):
var_field_name="var_id",
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars)

# Append the second anndata object
tiledbsoma.io.from_anndata(
experiment_uri=SOMA_URI,
Expand Down
47 changes: 47 additions & 0 deletions apis/python/tests/test_registration_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,11 @@ def test_multiples_without_experiment(
var_field_name=var_field_name,
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
nobs = rd.get_obs_shape()
nvars = rd.get_var_shapes()
tiledbsoma.io.resize_experiment(experiment_uri, nobs=nobs, nvars=nvars)

else:
# "Append" all the H5ADs where no experiment exists yet.
rd = registration.ExperimentAmbientLabelMapping.from_h5ad_appends_on_experiment(
Expand All @@ -392,6 +397,8 @@ def test_multiples_without_experiment(
var_field_name=var_field_name,
)

# XXX TO DO

assert rd.obs_axis.id_mapping_from_values(["AGAG", "GGAG"]).data == (2, 8)
assert rd.var_axes["measname"].id_mapping_from_values(["ESR1", "VEGFA"]).data == (
2,
Expand Down Expand Up @@ -451,6 +458,15 @@ def test_multiples_without_experiment(
h5ad_file_names[permutation[2]],
h5ad_file_names[permutation[3]],
]:
if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
if tiledbsoma.Experiment.exists(experiment_uri):
tiledbsoma.io.resize_experiment(
experiment_uri,
nobs=rd.get_obs_shape(),
nvars=rd.get_var_shapes(),
)

# XXX FIXME
tiledbsoma.io.from_h5ad(
experiment_uri,
h5ad_file_name,
Expand Down Expand Up @@ -713,6 +729,13 @@ def test_append_items_with_experiment(obs_field_name, var_field_name):

original = adata2.copy()

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
tiledbsoma.io.resize_experiment(
soma1,
nobs=rd.get_obs_shape(),
nvars=rd.get_var_shapes(),
)

with tiledbsoma.Experiment.open(soma1, "w") as exp1:
tiledbsoma.io.append_obs(
exp1,
Expand Down Expand Up @@ -836,6 +859,14 @@ def test_append_with_disjoint_measurements(
var_field_name=var_field_name,
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
# XXX FIXME
tiledbsoma.io.resize_experiment(
soma_uri,
nobs=rd.get_obs_shape(),
nvars=rd.get_var_shapes(),
)

tiledbsoma.io.from_anndata(
soma_uri,
anndata2,
Expand Down Expand Up @@ -1190,6 +1221,15 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
tiledbsoma.io.from_anndata(
soma_uri, adata, measurement_name=measurement_name, registration_mapping=rd
)

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
# XXX FIXME
tiledbsoma.io.resize_experiment(
soma_uri,
nobs=rd.get_obs_shape(),
nvars=rd.get_var_shapes(),
)

tiledbsoma.io.from_anndata(
soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
)
Expand All @@ -1208,6 +1248,13 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b):
assert rd.get_obs_shape() == nobs_a + nobs_b
assert rd.get_var_shapes() == {"meas": 4}

if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED:
tiledbsoma.io.resize_experiment(
soma_uri,
nobs=rd.get_obs_shape(),
nvars=rd.get_var_shapes(),
)

tiledbsoma.io.from_anndata(
soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd
)
Expand Down