angelolab · alex-l-kong · Nov 7, 2024
diff --git a/src/ark/segmentation/marker_quantification.py b/src/ark/segmentation/marker_quantification.py
@@ -454,7 +454,8 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
 
 def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                         is_mibitiff=False, fovs=None, extraction='total_intensity',
-                        nuclear_counts=False, fast_extraction=False, mask_types=['whole_cell'],
+                        nuclear_counts=False, split_large_nuclei=False,
+                        fast_extraction=False, mask_types=['whole_cell'],
                         add_underscore=True, **kwargs):
     """This function takes the segmented data and computes the expression matrices batch-wise
     while also validating inputs
@@ -476,6 +477,9 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
         nuclear_counts (bool):
             boolean flag to determine whether nuclear counts are returned, note that if
             set to True, the compartments coordinate in segmentation_labels must contain 'nuclear'
+        split_large_nuclei (bool):
+            boolean flag to determine whether nuclei which are larger than their assigned cell
+            will get split into two different nuclear objects
         fast_extraction (bool):
             if set, skips the custom regionprops and expensive base regionprops extraction steps
         mask_types (list):
@@ -570,6 +574,7 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
                 image_data=image_data,
                 extraction=extraction,
                 nuclear_counts=nuclear_counts,
+                split_large_nuclei=split_large_nuclei,
                 fast_extraction=fast_extraction,
                 **kwargs
             )

diff --git a/templates/1_Segment_Image_Data.ipynb b/templates/1_Segment_Image_Data.ipynb
@@ -331,6 +331,10 @@
     "# set to True to add nuclear cell properties to the expression matrix\n",
     "nuclear_counts = False\n",
     "\n",
+    "# set to True to split nuclei which are larger than their assigned cell\n",
+    "# into two separate nuclear objects\n",
+    "split_large_nuclei = False\n",
+    "\n",
     "# set to True to bypass expensive cell property calculations\n",
     "# only cell label, size, and centroid will be extracted if True\n",
     "fast_extraction = False"
@@ -363,6 +367,7 @@
     "                                              fovs=fovs,\n",
     "                                              batch_size=5,\n",
     "                                              nuclear_counts=nuclear_counts,\n",
+    "                                              split_large_nuclei=split_large_nuclei,\n",
     "                                              fast_extraction=fast_extraction)"
    ]
   },
@@ -406,7 +411,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.11"
+   "version": "3.10.14"
   },
   "vscode": {
    "interpreter": {

diff --git a/tests/segmentation/segmentation_utils_test.py b/tests/segmentation/segmentation_utils_test.py
@@ -189,6 +189,13 @@ def test_transform_expression_matrix_multiple_compartments():
             )
             assert np.array_equal(normalized_data.loc['whole_cell', cell, modified_cols].values,
                                   normalized_vals)
+        if cell_data.loc['nuclear', cell, settings.CELL_SIZE] != 0:
+            normalized_vals = np.divide(
+                cell_data.loc['nuclear', cell, modified_cols].values,
+                cell_data.loc['nuclear', cell, settings.CELL_SIZE].values
+            )
+            assert np.array_equal(normalized_data.loc['nuclear', cell, modified_cols].values,
+                                  normalized_vals)
 
     # test arcsinh transform
     transform_kwargs = {'linear_factor': 1}