Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manual mask names #1126

Merged
merged 13 commits into from
Apr 9, 2024
20 changes: 5 additions & 15 deletions src/ark/segmentation/marker_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def create_marker_count_matrices(segmentation_labels, image_data, nuclear_counts
def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
is_mibitiff=False, fovs=None,
extraction='total_intensity', nuclear_counts=False,
fast_extraction=False, **kwargs):
fast_extraction=False, mask_types=['whole_cell'], **kwargs):
"""This function takes the segmented data and computes the expression matrices batch-wise
while also validating inputs

Expand All @@ -478,6 +478,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
set to True, the compartments coordinate in segmentation_labels must contain 'nuclear'
fast_extraction (bool):
if set, skips the custom regionprops and expensive base regionprops extraction steps
mask_types (list):
list of masks to extract data for, defaults to ['whole_cell']
**kwargs:
arbitrary keyword arguments for signal and regionprops extraction

Expand Down Expand Up @@ -526,19 +528,6 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
img_sub_folder=img_sub_folder,
fovs=[fov_name])

# define the files for whole cell and nuclear
whole_cell_file = fov_name + '_whole_cell.tiff'
nuclear_file = fov_name + '_nuclear.tiff'

# for each label given in the argument, read in that mask for the fov, and proceed with
# label and table appending
mask_files = io_utils.list_files(segmentation_dir, substrs=fov_name)
mask_types = get_existing_mask_types(fov_names=fovs, mask_names=mask_files)

# remove nuclear from mask_types if nuclear_counts False
if not nuclear_counts and "nuclear" in mask_types:
mask_types.remove("nuclear")

for mask_type in mask_types:
# load the segmentation labels in
fov_mask_name = fov_name + '_' + mask_type + ".tiff"
Expand All @@ -551,7 +540,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs",
compartments = ['whole_cell']
segmentation_labels = current_labels_cell.values

if nuclear_counts:
if nuclear_counts and mask_type == 'whole_cell':
nuclear_file = fov_name + '_nuclear.tiff'
current_labels_nuc = load_utils.load_imgs_from_dir(data_dir=segmentation_dir,
files=[nuclear_file],
xr_dim_name='compartments',
Expand Down
11 changes: 7 additions & 4 deletions templates/ez_segmenter.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,7 @@
"* `cell_dir`: the final mask directory\n",
"* `cell_mask_suffix`: Suffix name of the cell mask files. Usually \"whole_cell\"\n",
"* `merged_masks_dir`: the directory to store the merged masks"
],
"outputs": []
]
},
{
"cell_type": "code",
Expand Down Expand Up @@ -772,7 +771,10 @@
"table_name = \"cell_and_objects\"\n",
"\n",
"# set to True to add nuclear cell properties to the expression matrix\n",
"nuclear_counts = False"
"nuclear_counts = False\n",
"\n",
"# Enter the names of masks you would like to include in the final cell table if different than mask_names above, e.g. [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"].\n",
"mask_names = [\"astrocyte-arms_merged\", \"microglia-arms_merged\", \"final_cells_remaining\", \"amyloid-plaques\"]"
]
},
{
Expand Down Expand Up @@ -806,6 +808,7 @@
" batch_size=5,\n",
" nuclear_counts=nuclear_counts,\n",
" fast_extraction=fast_extraction,\n",
" mask_types=mask_names\n",
")"
]
},
Expand Down Expand Up @@ -908,7 +911,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.11.8"
}
},
"nbformat": 4,
Expand Down
54 changes: 21 additions & 33 deletions tests/segmentation/marker_quantification_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,15 +736,15 @@ def test_generate_cell_table_tree_loading():
nuclear_counts=True)

# setting nuclear_counts True generates data for both whole_cell and nuclear
# so there should be double the number of rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
# so there should be double the number of columns, but not rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=norm_data_nuc.columns.values
)

assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
Expand Down Expand Up @@ -824,14 +824,14 @@ def test_generate_cell_table_mibitiff_loading():

# setting nuclear_counts True generates data for both whole_cell and nuclear
# so there should be double the number of rows
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0] * 2
assert norm_data_nuc.shape[0] == norm_data_fov_sub.shape[0]
assert norm_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=norm_data_nuc.columns.values
)

assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0] * 2
assert arcsinh_data_nuc.shape[0] == arcsinh_data_fov_sub.shape[0]
assert arcsinh_data_nuc.shape[1] == norm_data_fov_sub.shape[1] * 2
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
Expand Down Expand Up @@ -879,15 +879,19 @@ def test_generate_cell_table_extractions():
nuclear_counts=True
)

# verify total intensity extraction, same for whole_cell and nuclear mask types
for mask_type in ["whole_cell", "nuclear"]:
assert np.all(
default_norm_data.loc[
(default_norm_data[settings.CELL_LABEL] == 1) &
(default_norm_data["mask_type"] == mask_type)
][chans].values
== np.arange(9).reshape(3, 3)
)
# verify total intensity extraction
assert np.all(
default_norm_data.loc[
(default_norm_data[settings.CELL_LABEL] == 1) &
(default_norm_data["mask_type"] == "whole_cell")
][chans].values
== np.arange(9).reshape(3, 3)
)
assert np.unique(default_norm_data.mask_type) == ["whole_cell"]

# check for nuclear extractions
nuc_columns = [col for col in default_norm_data.columns if '_nuclear' in col]
assert nuc_columns

# define a specific threshold for positive pixel extraction
thresh_kwargs = {
Expand All @@ -911,25 +915,9 @@ def test_generate_cell_table_extractions():
assert np.all(positive_pixel_data_wc.iloc[:4][['chan0', 'chan1']].values == 0)
assert np.all(positive_pixel_data_wc.iloc[4:][chans].values == 1)

# verify thresh kwarg passes through and nuclear counts True
positive_pixel_data, _ = marker_quantification.generate_cell_table(
segmentation_dir=temp_dir, tiff_dir=tiff_dir,
img_sub_folder=img_sub_folder, is_mibitiff=False,
extraction='positive_pixel', nuclear_counts=True, **thresh_kwargs
)

# check explicitly for nuclear mask types
positive_pixel_data_nuc = positive_pixel_data[
positive_pixel_data["mask_type"] == "nuclear"
]
assert np.all(positive_pixel_data_nuc.iloc[:4][['chan0', 'chan1']].values == 0)
assert np.all(positive_pixel_data_nuc.iloc[4:][chans].values == 1)
assert positive_pixel_data_nuc.shape[0] == positive_pixel_data.shape[0] / 2
assert positive_pixel_data_nuc.shape[1] == positive_pixel_data.shape[1]
misc_utils.verify_in_list(
nuclear_col='nc_ratio',
nuc_cell_table_cols=positive_pixel_data_nuc.columns.values
)
# check that nuclear counts not extracted
nuc_columns = [col for col in positive_pixel_data_wc.columns if '_nuclear' in col]
assert not nuc_columns


def test_get_existing_mask_types():
Expand Down
Loading