angelolab · camisowers · Apr 9, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 13, 2024
diff --git a/src/ark/segmentation/marker_quantification.py b/src/ark/segmentation/marker_quantification.py
@@ -455,7 +455,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
 def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                         is_mibitiff=False, fovs=None,
                         extraction='total_intensity', nuclear_counts=False,
-                        fast_extraction=False, **kwargs):
+                        fast_extraction=False, mask_types=['whole_cell'], **kwargs):
     """This function takes the segmented data and computes the expression matrices batch-wise
     while also validating inputs
 
@@ -478,6 +478,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
             set to True, the compartments coordinate in segmentation_labels must contain 'nuclear'
         fast_extraction (bool):
             if set, skips the custom regionprops and expensive base regionprops extraction steps
+        mask_types (list):
+            list of masks to extract data for, defaults to ['whole_cell']
         **kwargs:
             arbitrary keyword arguments for signal and regionprops extraction
 
@@ -526,19 +528,6 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                                                         img_sub_folder=img_sub_folder,
                                                         fovs=[fov_name])
 
-        # define the files for whole cell and nuclear
-        whole_cell_file = fov_name + '_whole_cell.tiff'
-        nuclear_file = fov_name + '_nuclear.tiff'
-
-        # for each label given in the argument, read in that mask for the fov, and proceed with
-        # label and table appending
-        mask_files = io_utils.list_files(segmentation_dir, substrs=fov_name)
-        mask_types = get_existing_mask_types(fov_names=fovs, mask_names=mask_files)
-
-        # remove nuclear from mask_types if nuclear_counts False
-        if not nuclear_counts and "nuclear" in mask_types:
-            mask_types.remove("nuclear")
-
         for mask_type in mask_types:
             # load the segmentation labels in
             fov_mask_name = fov_name + '_' + mask_type + ".tiff"
@@ -551,7 +540,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
             compartments = ['whole_cell']
             segmentation_labels = current_labels_cell.values
 
-            if nuclear_counts:
+            if nuclear_counts and mask_type == 'whole_cell':
+                nuclear_file = fov_name + '_nuclear.tiff'
                 current_labels_nuc = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
                                                                    files=[nuclear_file],
                                                                    xr_dim_name='compartments',

diff --git a/templates/ez_segmenter.ipynb b/templates/ez_segmenter.ipynb
@@ -571,8 +571,7 @@
     "* `cell_dir`: the final mask directory\n",
     "* `cell_mask_suffix`: Suffix name of the cell mask files. Usually \"whole_cell\"\n",
     "* `merged_masks_dir`: the directory to store the merged masks"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
@@ -772,7 +771,10 @@
     "table_name = \"cell_and_objects\"\n",
     "\n",
     "# set to True to add nuclear cell properties to the expression matrix\n",
-    "nuclear_counts = False"
+    "nuclear_counts = False\n",
+    "\n",
+    "# Enter the names of masks you would like to include in the final cell table if different than mask_names above, e.g. [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"].\n",
+    "mask_names = [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"]"
    ]
   },
   {
@@ -806,6 +808,7 @@
     "    batch_size=5,\n",
     "    nuclear_counts=nuclear_counts,\n",
     "    fast_extraction=fast_extraction,\n",
+    "    mask_types=mask_names\n",
     ")"
    ]
   },
@@ -908,7 +911,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.8"
   }
  },
  "nbformat": 4,

diff --git a/tests/segmentation/marker_quantification_test.py b/tests/segmentation/marker_quantification_test.py
@@ -736,15 +736,15 @@ def test_generate_cell_table_tree_loading():
             nuclear_counts=True)
 
         # setting nuclear_counts True generates data for both whole_cell and nuclear
-        # so there should be double the number of rows
-        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
+        # so there should be double the number of columns, but not rows
+        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
         assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
             nuc_cell_table_cols=norm_data_nuc.columns.values
         )
 
-        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
+        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
         assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
@@ -824,14 +824,14 @@ def test_generate_cell_table_mibitiff_loading():
 
         # setting nuclear_counts True generates data for both whole_cell and nuclear
         # so there should be double the number of rows
-        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
+        assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
         assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
             nuc_cell_table_cols=norm_data_nuc.columns.values
         )
 
-        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
+        assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
         assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
         misc_utils.verify_in_list(
             nuclear_col='nc_ratio',
@@ -879,15 +879,19 @@ def test_generate_cell_table_extractions():
             nuclear_counts=True
         )
 
-        # verify total intensity extraction, same for whole_cell and nuclear mask types
-        for mask_type in ["whole_cell", "nuclear"]:
-            assert np.all(
-                default_norm_data.loc[
-                    (default_norm_data[settings.CELL_LABEL] == 1) &
-                    (default_norm_data["mask_type"] == mask_type)
-                ][chans].values
-                == np.arange(9).reshape(3, 3)
-            )
+        # verify total intensity extraction
+        assert np.all(
+            default_norm_data.loc[
+                (default_norm_data[settings.CELL_LABEL] == 1) &
+                (default_norm_data["mask_type"] == "whole_cell")
+            ][chans].values
+            == np.arange(9).reshape(3, 3)
+        )
+        assert np.unique(default_norm_data.mask_type) == ["whole_cell"]
+
+        # check for nuclear extractions
+        nuc_columns = [col for col in default_norm_data.columns if '_nuclear' in col]
+        assert nuc_columns
 
         # define a specific threshold for positive pixel extraction
         thresh_kwargs = {
@@ -911,25 +915,9 @@ def test_generate_cell_table_extractions():
         assert np.all(positive_pixel_data_wc.iloc[:4][['chan0', 'chan1']].values == 0)
         assert np.all(positive_pixel_data_wc.iloc[4:][chans].values == 1)
 
-        # verify thresh kwarg passes through and nuclear counts True
-        positive_pixel_data, _ = marker_quantification.generate_cell_table(
-            segmentation_dir=temp_dir, tiff_dir=tiff_dir,
-            img_sub_folder=img_sub_folder, is_mibitiff=False,
-            extraction='positive_pixel', nuclear_counts=True, **thresh_kwargs
-        )
-
-        # check explicitly for nuclear mask types
-        positive_pixel_data_nuc = positive_pixel_data[
-            positive_pixel_data["mask_type"] == "nuclear"
-        ]
-        assert np.all(positive_pixel_data_nuc.iloc[:4][['chan0', 'chan1']].values == 0)
-        assert np.all(positive_pixel_data_nuc.iloc[4:][chans].values == 1)
-        assert positive_pixel_data_nuc.shape[0] == positive_pixel_data.shape[0] / 2
-        assert positive_pixel_data_nuc.shape[1] == positive_pixel_data.shape[1]
-        misc_utils.verify_in_list(
-            nuclear_col='nc_ratio',
-            nuc_cell_table_cols=positive_pixel_data_nuc.columns.values
-        )
+        # check that nuclear counts not extracted
+        nuc_columns = [col for col in positive_pixel_data_wc.columns if '_nuclear' in col]
+        assert not nuc_columns
 
 
 def test_get_existing_mask_types():