angelolab · alex-l-kong · Apr 25, 2024 · Dec 4, 2023 · Dec 5, 2023 · Dec 5, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,6 @@ dependencies = [
     "seaborn>=0.12,<1",
     "spatial-lda>=0.1.3,<1",
     "statsmodels>=0.13.2,<1",
-    "squidpy",
     "tifffile>=2022",
     "torchdata",
     "tqdm>=4,<5",

diff --git a/src/ark/segmentation/ez_seg/composites.py b/src/ark/segmentation/ez_seg/composites.py
@@ -14,9 +14,9 @@ def composite_builder(
     images_to_subtract: list[str],
     image_type: str,
     composite_method: str,
-    composite_directory: Union[str, pathlib.Path],
-    composite_name: str,
-    log_dir: Union[str, pathlib.Path],
+    composite_directory: Union[str, pathlib.Path] = None,
+    composite_name: str = None,
+    log_dir: Union[str, pathlib.Path] = None,
 ) -> None:
     """
     Adds tiffs together, either pixel clusters or base signal tiffs and returns a composite channel or mask.
@@ -38,7 +38,10 @@ def composite_builder(
 
     Returns:
         np.ndarray: Saves the composite array, either as a binary mask, or as a scaled intensity array.
+        If composite_directory is None, return a dictionary with keys being FOV names and values
+        are np.ndarray of the composite image.
     """
+    composite_images = {}
     for fov in fov_list:
         # load in tiff images and verify channels are present
         fov_data = load_utils.load_imgs_from_tree(
@@ -73,28 +76,34 @@ def composite_builder(
                 fov_data, composite_array, images_to_subtract, image_type, composite_method
             )
 
-        # Create the fov dir within the composite dir
-        composite_fov_dir = composite_directory / fov
-        composite_fov_dir.mkdir(parents=True, exist_ok=True)
+        if composite_directory:
+            # Create the fov dir within the composite dir
+            composite_fov_dir = composite_directory / fov
+            composite_fov_dir.mkdir(parents=True, exist_ok=True)
 
-        # Save the composite image
-        image_utils.save_image(
-            fname=composite_directory / fov / f"{composite_name}.tiff",
-            data=composite_array.astype(np.float32)
-        )
+            # Save the composite image
+            image_utils.save_image(
+                fname=composite_directory / fov / f"{composite_name}.tiff",
+                data=composite_array.astype(np.uint32)
+            )
 
+        composite_images[fov] = composite_array.astype(np.float32)
+
     # Write a log saving composite builder info
-    variables_to_log = {
-        "image_data_dir": image_data_dir,
-        "fov_list": fov_list,
-        "images_to_add": images_to_add,
-        "images_to_subtract": images_to_subtract,
-        "image_type": image_type,
-        "composite_method": composite_method,
-        "composite_directory": composite_directory,
-        "composite_name": composite_name,
-    }
-    log_creator(variables_to_log, log_dir, f"{composite_name}_composite_log.txt")
+    if log_dir:
+        variables_to_log = {
+            "image_data_dir": image_data_dir,
+            "fov_list": fov_list,
+            "images_to_add": images_to_add,
+            "images_to_subtract": images_to_subtract,
+            "image_type": image_type,
+            "composite_method": composite_method,
+            "composite_directory": composite_directory,
+            "composite_name": composite_name,
+        }
+        log_creator(variables_to_log, log_dir, f"{composite_name}_composite_log.txt")
+    else:
+        return composite_images
 
     print("Composites built and saved")
 

diff --git a/src/ark/segmentation/ez_seg/ez_object_segmentation.py b/src/ark/segmentation/ez_seg/ez_object_segmentation.py
@@ -154,7 +154,9 @@ def _create_object_mask(
     misc_utils.verify_in_list(object_shape_type=[object_shape_type], object_shape_options=["blob", "projection"])
 
     # Copy the input image, and get its shape
-    img2mask: np.ndarray = input_image.copy().to_numpy()
+    img2mask: np.ndarray = input_image.copy()
+    if type(input_image) != np.ndarray:
+        img2mask = img2mask.to_numpy()
     img2mask_shape: Tuple[int, int] = img2mask.shape
 
     # Blur the input mask using given sigma value

diff --git a/src/ark/utils/data_utils.py b/src/ark/utils/data_utils.py
@@ -18,7 +18,6 @@
 import xarray as xr
 from ark import settings
 from skimage.segmentation import find_boundaries
-import dask.dataframe as dd
 from pandas.core.groupby.generic import DataFrameGroupBy
 from anndata import AnnData, read_zarr
 from anndata.experimental import AnnCollection
@@ -31,7 +30,6 @@
     from typing_extensions import TypedDict, Unpack
 
 
-
 def save_fov_mask(fov, data_dir, mask_data, sub_dir=None, name_suffix=''):
     """Saves a provided cluster label mask overlay for a FOV.
 
@@ -871,25 +869,25 @@ def _convert_ct_fov_to_adata(fov_group: DataFrameGroupBy, var_names: list[str],
         The path of the saved `AnnData` object.
     """
 
-    fov_dd: dd.DataFrame = fov_group.sort_values(by=settings.CELL_LABEL, key=ns.natsort_key).reset_index()
-    fov_id: str = fov_dd[settings.FOV_ID].iloc[0]
+    fov_pd: pd.DataFrame = fov_group.sort_values(by=settings.CELL_LABEL, key=ns.natsort_key).reset_index()
+    fov_id: str = fov_pd[settings.FOV_ID].iloc[0]
 
     # Set the index to be the FOV and the segmentation label to create a unique index
-    fov_dd.index = list(map(lambda label: f"{fov_id}_{int(label)}", fov_dd[settings.CELL_LABEL]))
+    fov_pd.index = list(map(lambda label: f"{fov_id}_{int(label)}", fov_pd[settings.CELL_LABEL]))
 
     # Extract the X matrix
-    X_dd: dd.DataFrame = fov_dd[var_names]
+    X_dd: pd.DataFrame = fov_pd[var_names]
 
     # Extract the obs dataframe and convert the cell label to integer
-    obs_dd: dd.DataFrame = fov_dd[obs_names].astype({settings.CELL_LABEL: int, settings.FOV_ID: str})
-    obs_dd["cell_meta_cluster"] = pd.Categorical(obs_dd["cell_meta_cluster"].astype(str))
+    obs_pd: pd.DataFrame = fov_pd[obs_names].astype({settings.CELL_LABEL: int, settings.FOV_ID: str})
+    obs_pd["cell_meta_cluster"] = pd.Categorical(obs_pd["cell_meta_cluster"].astype(str))
 
     # Move centroids from obs to obsm["spatial"]
-    obsm_dd = obs_dd[[settings.CENTROID_0, settings.CENTROID_1]].rename(columns={settings.CENTROID_0: "centroid_y", settings.CENTROID_1: "centroid_x"})
-    obs_dd = obs_dd.drop(columns=[settings.CENTROID_0, settings.CENTROID_1])
+    obsm_pd = obs_pd[[settings.CENTROID_0, settings.CENTROID_1]].rename(columns={settings.CENTROID_0: "centroid_y", settings.CENTROID_1: "centroid_x"})
+    obs_pd = obs_pd.drop(columns=[settings.CENTROID_0, settings.CENTROID_1])
 
     # Create the AnnData object
-    adata: AnnData = AnnData(X=X_dd, obs=obs_dd, obsm={"spatial": obsm_dd})
+    adata: AnnData = AnnData(X=X_dd, obs=obs_pd, obsm={"spatial": obsm_pd})
 
     # Convert any extra string labels to categorical if it's beneficial.
     adata.strings_to_categoricals()

diff --git a/src/ark/utils/masking_utils.py b/src/ark/utils/masking_utils.py
@@ -0,0 +1,123 @@
+import os
+import numpy as np
+
+from alpineer import io_utils, misc_utils, load_utils
+from ark.utils import data_utils
+from ark.segmentation.ez_seg.composites import composite_builder
+from ark.segmentation.ez_seg.ez_object_segmentation import _create_object_mask
+
+
+def generate_signal_masks(img_dir, mask_dir, channels, mask_name, intensity_thresh_perc="auto",
+                          sigma=2, min_mask_size=5000, max_hole_size=1000):
+    """Creates a single signal mask for each FOV when given the channels to aggregate.
+
+    Args:
+        img_dir (str): path to the image tiff directory
+        mask_dir (str): path where the masks will be saved
+        channels (list): list of channels to combine to create a single mask for
+        mask_name (str): name for the new mask file created
+        intensity_thresh_perc (int): percentile to threshold intensity values in the image,
+            defaults to "auto" which will calculate an appropriate percentile for the user
+        sigma (float): sigma for gaussian blur
+        min_mask_size (int): minimum size of masked objects to include
+        max_hole_size (int): maximum size of holes to leave in masked objects
+    """
+    # check correct image directory path
+    io_utils.validate_paths([img_dir])
+    fovs = io_utils.list_folders(img_dir)
+
+    # check valid channel name
+    channel_list = io_utils.remove_file_extensions(
+        io_utils.list_files(os.path.join(img_dir, fovs[0])))
+    misc_utils.verify_in_list(input_channels=channels, all_channels=channel_list)
+
+    # create composite image (or read in single image)
+    composite_imgs = composite_builder(
+        img_dir, img_sub_folder='', fov_list=fovs, images_to_add=channels, images_to_subtract=[],
+        image_type='total', composite_method='total')
+
+    for fov in fovs:
+        # create mask
+        img = composite_imgs[fov]
+        img_size = img.shape[0] * img.shape[1]
+        mask = _create_object_mask(img, 'blob', sigma, intensity_thresh_perc, max_hole_size,
+                                   fov_dim=400, min_object_area=min_mask_size,
+                                   max_object_area=img_size)
+
+        # save mask
+        save_dir = os.path.join(mask_dir, fov)
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+
+        data_utils.save_fov_mask(mask_name, save_dir, mask)
+
+
+def create_cell_mask(seg_mask, cell_table, fov_name, cell_types, cluster_col, sigma,
+                     min_mask_size=0, max_hole_size=1000):
+    """Generates a binary from the cells listed in `cell_types`
+
+    Args:
+        seg_mask (numpy.ndarray): segmentation mask
+        cell_table (pandas.DataFrame): cell table containing segmentation IDs and cell types
+        fov_name (str): name of the fov to process
+        cell_types (list): list of cell types to include in the mask
+        cluster_col (str): column in cell table containing cell cluster
+        sigma (float): sigma for gaussian smoothing
+        min_mask_size (int): minimum size of a mask to include, default 0
+        max_hole_size (int): maximum size of a hole to leave without filling, default 0
+
+    Returns:
+        numpy.ndarray: binary mask
+    """
+    # get cell labels for fov and cell type
+    cell_subset = cell_table[cell_table['fov'] == fov_name]
+    cell_subset = cell_subset[cell_subset[cluster_col].isin(cell_types)]
+    cell_labels = cell_subset['label'].values
+
+    # create mask for cell type
+    cell_mask = np.isin(seg_mask, cell_labels).astype(np.int32)
+    img_size = cell_mask.shape[0] * cell_mask.shape[1]
+
+    # binarize and blur mask, no minimum size requirement or hole removal for cell masks
+    cell_mask = _create_object_mask(cell_mask, 'blob', sigma, None, max_hole_size,
+                                    fov_dim=0, min_object_area=min_mask_size,
+                                    max_object_area=img_size)
+    cell_mask[cell_mask > 0] = 1
+
+    return cell_mask
+
+
+def generate_cell_masks(seg_dir, mask_dir, cell_table, cell_types, cluster_col, mask_name,
+                        sigma=10, min_mask_size=0, max_hole_size=1000):
+    """Creates a single cell mask for each FOV when given the cell types to aggregate.
+
+    Args:
+        seg_dir (str): path to the cell segmentation tiff directory
+        mask_dir (str): path where the masks will be saved
+        cell_table (pd.DataFrame): Dataframe containing all cell labels and their cell type
+        cell_types (list): list of cell phenotypes that will be used to create the mask
+        cluster_col (str): column in cell table containing cell cluster
+        mask_name (str): name for the new mask file created
+        sigma (float): sigma for gaussian smoothing
+        min_mask_size (int): minimum size of a mask to include, default 0
+        max_hole_size (int): maximum size of a hole to leave without filling, default 0
+    """
+
+    fovs = np.unique(cell_table.fov)
+
+    for fov in fovs:
+        seg_mask = load_utils.load_imgs_from_dir(
+            data_dir=seg_dir, files=[fov + '_whole_cell.tiff'], xr_dim_name='compartments',
+            xr_channel_names=['whole_cell']
+        )
+
+        # create mask
+        mask = create_cell_mask(
+            np.array(seg_mask[0, :, :, 0]), cell_table, fov, cell_types, cluster_col, sigma,
+            min_mask_size, max_hole_size)
+
+        # save mask
+        save_dir = os.path.join(mask_dir, fov)
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        data_utils.save_fov_mask(mask_name, save_dir, mask)