From 4f18b25961a7584ceeb2877b1f851450af5a43ed Mon Sep 17 00:00:00 2001 From: John Kerl Date: Tue, 8 Oct 2024 21:09:14 -0400 Subject: [PATCH 1/2] Use correct shapes on ingest, when feature-flag enabled --- apis/python/src/tiledbsoma/io/ingest.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index caccd0a699..e905389dac 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -64,6 +64,7 @@ NotCreateableError, SOMAError, ) +from .._flags import NEW_SHAPE_FEATURE_FLAG_ENABLED from .._soma_array import SOMAArray from .._soma_object import AnySOMAObject, SOMAObject from .._tdb_handles import RawHandle @@ -1203,9 +1204,13 @@ def _write_dataframe_impl( arrow_table = _extract_new_values_for_append(df_uri, arrow_table, context) try: + domain = None + if NEW_SHAPE_FEATURE_FLAG_ENABLED: + domain = ((0, int(df.shape[0]) - 1),) soma_df = DataFrame.create( df_uri, schema=arrow_table.schema, + domain=domain, platform_config=platform_config, context=context, ) @@ -1304,8 +1309,14 @@ def _create_from_matrix( logging.log_io(None, f"START WRITING {uri}") try: + shape: Sequence[Union[int, None]] = () # A SparseNDArray must be appendable in soma.io. - shape = [None for _ in matrix.shape] if cls.is_sparse else matrix.shape + if NEW_SHAPE_FEATURE_FLAG_ENABLED: + shape = tuple(int(e) for e in matrix.shape) + elif cls.is_sparse: + shape = tuple(None for _ in matrix.shape) + else: + shape = matrix.shape soma_ndarray = cls.create( uri, type=pa.from_numpy_dtype(matrix.dtype), From 08ac2d72f687e3c57c541bd5858940a739692c16 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 9 Oct 2024 19:39:10 -0400 Subject: [PATCH 2/2] tests/test_registration_mappings.py WIP [skip ci] --- apis/python/tests/test_basic_anndata_io.py | 5 ++ .../tests/test_registration_mappings.py | 47 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 67cfeca562..0e246c3b75 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -1345,6 +1345,11 @@ def test_nan_append(conftest_pbmc_small, dtype, nans, new_obs_ids): var_field_name="var_id", ) + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + nobs = rd.get_obs_shape() + nvars = rd.get_var_shapes() + tiledbsoma.io.resize_experiment(SOMA_URI, nobs=nobs, nvars=nvars) + # Append the second anndata object tiledbsoma.io.from_anndata( experiment_uri=SOMA_URI, diff --git a/apis/python/tests/test_registration_mappings.py b/apis/python/tests/test_registration_mappings.py index d5fd8b392a..e3a957d9e9 100644 --- a/apis/python/tests/test_registration_mappings.py +++ b/apis/python/tests/test_registration_mappings.py @@ -382,6 +382,11 @@ def test_multiples_without_experiment( var_field_name=var_field_name, ) + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + nobs = rd.get_obs_shape() + nvars = rd.get_var_shapes() + tiledbsoma.io.resize_experiment(experiment_uri, nobs=nobs, nvars=nvars) + else: # "Append" all the H5ADs where no experiment exists yet. rd = registration.ExperimentAmbientLabelMapping.from_h5ad_appends_on_experiment( @@ -392,6 +397,8 @@ def test_multiples_without_experiment( var_field_name=var_field_name, ) + # XXX TO DO + assert rd.obs_axis.id_mapping_from_values(["AGAG", "GGAG"]).data == (2, 8) assert rd.var_axes["measname"].id_mapping_from_values(["ESR1", "VEGFA"]).data == ( 2, @@ -451,6 +458,15 @@ def test_multiples_without_experiment( h5ad_file_names[permutation[2]], h5ad_file_names[permutation[3]], ]: + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + if tiledbsoma.Experiment.exists(experiment_uri): + tiledbsoma.io.resize_experiment( + experiment_uri, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + + # XXX FIXME tiledbsoma.io.from_h5ad( experiment_uri, h5ad_file_name, @@ -713,6 +729,13 @@ def test_append_items_with_experiment(obs_field_name, var_field_name): original = adata2.copy() + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + tiledbsoma.io.resize_experiment( + soma1, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + with tiledbsoma.Experiment.open(soma1, "w") as exp1: tiledbsoma.io.append_obs( exp1, @@ -836,6 +859,14 @@ def test_append_with_disjoint_measurements( var_field_name=var_field_name, ) + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + # XXX FIXME + tiledbsoma.io.resize_experiment( + soma_uri, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + tiledbsoma.io.from_anndata( soma_uri, anndata2, @@ -1190,6 +1221,15 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b): tiledbsoma.io.from_anndata( soma_uri, adata, measurement_name=measurement_name, registration_mapping=rd ) + + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + # XXX FIXME + tiledbsoma.io.resize_experiment( + soma_uri, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + tiledbsoma.io.from_anndata( soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd ) @@ -1208,6 +1248,13 @@ def test_enum_bit_width_append(tmp_path, all_at_once, nobs_a, nobs_b): assert rd.get_obs_shape() == nobs_a + nobs_b assert rd.get_var_shapes() == {"meas": 4} + if tiledbsoma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED: + tiledbsoma.io.resize_experiment( + soma_uri, + nobs=rd.get_obs_shape(), + nvars=rd.get_var_shapes(), + ) + tiledbsoma.io.from_anndata( soma_uri, bdata, measurement_name=measurement_name, registration_mapping=rd )