added multiple pattern support

PolusAI · Apr 2, 2024 · 3a2f287 · 3a2f287
1 parent e9547e2
commit 3a2f287
Show file tree

Hide file tree

Showing 12 changed files with 166 additions and 117 deletions.
diff --git a/formats/image-dimension-stacking-plugin/.bumpversion.cfg b/formats/image-dimension-stacking-plugin/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0-dev
+current_version = 0.1.1-dev
 commit = False
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?

diff --git a/formats/image-dimension-stacking-plugin/README.md b/formats/image-dimension-stacking-plugin/README.md
@@ -1,13 +1,17 @@
-# Image dimension stacking(0.1.0-dev)
+# Image dimension stacking(0.1.1-dev)
+
+This plugin leverages the [filepattern](https://filepattern2.readthedocs.io/en/latest/Home.html) library and employs the filepattern `groupBy` functionality to enable the matching of image filenames, facilitating their stacking into multi-dimensional images.
+
+The filepattern must include the variables `c`, `t`, and `z`. If all these variables are present in the pattern, the plugin will group images according to the order `z, c, t`. If only one variable is present in the file pattern, the plugin will group images according to that variable.
+
 
-This plugin leverages the [filepattern](https://filepattern2.readthedocs.io/en/latest/Home.html) library and employs the `groupBy` variable to enable the matching of image filenames, facilitating their stacking into multi-dimensional images.
 Currently, the plugin supports the following dimensions and user can choose the relevant variable for the `groupBy` input argument.
-1. multi-channel  `groupBy=c`\
-   For example `filePattern=x01_y01_p01_c{c:d+}.ome.tif`
-2. multi-zplanes  `groupBy=z`\
-   For example `filePattern=tubhiswt_C1-z{z:d+}.ome.tif`
-3. multi-timepoints  `groupBy=t`\
-   For example `filePattern=img00001_t{t:d+}_ch0.ome.tif`
+1.  `tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif`\
+   Images are grouped based on `z` variable
+2. `tubhiswt_.*_.*_t{t:d+}.ome.tif`\
+   Images are grouped based on `t` variable
+3. `00001_01_{c:d+}.ome.tif`\
+   Images are grouped based on `c` variable
 
 #### Note:
 Filename patterns may consist of any other filepattern variables, combined with other valid regular expression arguments, excluding the `groupBy` variable.
@@ -33,16 +37,14 @@ This plugin takes three input argument and one output argument:
 |---------------|-------------------------|--------|--------|
 | `--inpDir`      | Input image collection  | Input  | Collection   |
 | `--filePattern` | Pattern to parse image files           | Input  | String |
-| `--groupBy` | A variable to group image files           | Input  | String |
 | `--outDir`      | Output image collection | Output | Collection   |
 | `--preview`        | Generate a JSON file to view outputs | Output | Boolean   |
 
 ### Run the Docker Container
 
 ```bash
-docker run -v /path/to/data:/data polusai/image-dimension-stacking-plugin:0.1.0-dev \
+docker run -v /path/to/data:/data polusai/image-dimension-stacking-plugin:0.1.1-dev \
   --inpDir "Path/To/Data" \
   --filePattern "tubhiswt_C1-z{z:d+}.ome.tif" \
-  --groupBy "z" \
   --outDir "Path/To/Output/Dir"
 ```
diff --git a/formats/image-dimension-stacking-plugin/VERSION b/formats/image-dimension-stacking-plugin/VERSION
@@ -1 +1 @@
-0.1.0-dev
+0.1.1-dev
diff --git a/formats/image-dimension-stacking-plugin/plugin.json b/formats/image-dimension-stacking-plugin/plugin.json
@@ -1,13 +1,13 @@
 {
   "name": "Image Dimension Stacking",
-  "version": "0.1.0-dev",
+  "version": "0.1.1-dev",
   "title": "Stacking of image dimensions (c, z, t) to create a multidimensional image.",
   "description": "Stacking of image dimensions (c, z, t) to create a multidimensional image.",
   "author": "Nick Schaub ([email protected]), Hamdah Shafqat Abbasi ([email protected])",
   "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
   "repository": "https://github.com/PolusAI/polus-plugins",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "containerId": "polusai/image-dimension-stacking-plugin",
+  "containerId": "polusai/image-dimension-stacking-plugin:0.1.1-dev",
   "baseCommand": [
     "python3",
     "-m",
@@ -26,12 +26,6 @@
       "description": "Filename pattern used to separate data.",
       "required": "True"
     },
-    "groupBy": {
-      "type": "string",
-      "title": "groupBy",
-      "description": "String variable to group image filenames by.",
-      "required": "True"
-    },
     "preview": {
       "type": "boolean",
       "title": "Preview",
@@ -58,12 +52,6 @@
       "description": "Filename pattern used to separate data.",
       "required": "True"
     },
-    "groupBy": {
-      "type": "string",
-      "title": "groupBy",
-      "description": "String variable to group image filenames by.",
-      "required": "True"
-    },
     "preview": {
       "type": "boolean",
       "title": "Preview example output of this plugin",

diff --git a/formats/image-dimension-stacking-plugin/pyproject.toml b/formats/image-dimension-stacking-plugin/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "polus-plugins-formats-image-dimension-stacking"
-version = "0.1.0-dev"
+version = "0.1.1-dev"
 description = "Stacking multi-dimensional images"
 authors = [
            "Hamdah Shafqat abbasi <[email protected]>"

diff --git a/formats/image-dimension-stacking-plugin/run-plugin.sh b/formats/image-dimension-stacking-plugin/run-plugin.sh
@@ -6,13 +6,7 @@ datapath=$(readlink --canonicalize ../data)
 # Inputs
 inpDir=/data/path_to_files
 
-## stacking z dimension
-filePattern="tubhiswt_C1-z{z:d+}.ome.tif"
-# ## stacking c dimension
-# filePattern="x{x+}_y{y+}_p01_c{c+}.ome.tif"
-# ## stacking t dimension
-# filePattern="img00001_t{t:d+}_ch0.ome.tif"
-groupBy = "z"
+filePattern="tubhiswt_z{z:d+}_c{c:d+}_t{t:d+}.ome.tif"
 # Output paths
 outDir=/data/path_to_output
 
@@ -25,5 +19,4 @@ docker run --mount type=bind,source=${datapath},target=/data/ \
             polusai/image-dimension-stacking-plugin:${version} \
             --inpDir ${inpDir} \
             --filePattern ${filePattern} \
-            --groupBy ${groupBy} \
             --outDir ${outDir}
diff --git a/...-dimension-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/__init__.py b/...-dimension-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/__init__.py
@@ -1,2 +1,2 @@
 """Image dimension stacking package."""
-__version__ = "0.1.0-dev"
+__version__ = "0.1.1-dev"
diff --git a/...-dimension-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/__main__.py b/...-dimension-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/__main__.py
@@ -53,12 +53,6 @@ def main(
         "-f",
         help="Filename pattern used to separate data.",
     ),
-    group_by: str = typer.Option(
-        ...,
-        "--groupBy",
-        "-g",
-        help="String variable to group image filenames by.",
-    ),
     out_dir: Path = typer.Option(
         ...,
         "--outDir",
@@ -75,7 +69,6 @@ def main(
     """Image dimension stacking plugin."""
     logger.info(f"--inpDir: {inp_dir}")
     logger.info(f"--filePattern: {file_pattern}")
-    logger.info(f"--groupBy: {group_by}")
     logger.info(f"--outDir: {out_dir}")
 
     if not inp_dir.exists():
@@ -86,14 +79,23 @@ def main(
         msg = "outDir does not exist"
         raise ValueError(msg, out_dir)
 
-    if group_by not in ["c", "t", "z"]:
-        msg = "Dimensions are not properly defined, Select c, t or z"
-        raise ValueError(msg)
+    fps = fp.FilePattern(inp_dir, file_pattern)
+    list_val = ["c", "t", "z"]
+    variables = sorted([f for f in fps.get_variables() if f in list_val])
 
-    if len(group_by) != 1:
-        msg = f"{group_by} Only one variable is selected for dimension stacking"
+    if len(variables) == 0:
+        msg = "Could not detect c, t or z variables in a pattern"
         raise ValueError(msg)
 
+    if variables == list_val or variables == ["z"]:
+        group_by = "z"
+
+    if variables == ["c", "t"] or variables == ["c"]:
+        group_by = "c"
+
+    if variables == ["t"]:
+        group_by = "t"
+
     if preview:
         generate_preview(out_dir=out_dir, file_pattern=file_pattern)
 

diff --git a/...-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/dimension_stacking.py b/...-stacking-plugin/src/polus/plugins/formats/image_dimension_stacking/dimension_stacking.py
@@ -1,11 +1,13 @@
 """Image dimension stacking package."""
 import logging
+import re
 import time
 from concurrent.futures import as_completed
 from multiprocessing import cpu_count
 from pathlib import Path
 
 import filepattern as fp
+import numpy as np
 import preadator
 from bfio import BioReader
 from bfio import BioWriter
@@ -121,69 +123,74 @@ def dimension_stacking(
         out_dir : Path to output directory.
 
     """
-    dimensions = []
-    input_files = []
-
     fps = fp.FilePattern(inp_dir, file_pattern)
-    out_name = fps.output_name()
-
-    for fl in fps(group_by=group_by):
-        f1, f2 = fl
-        if f1[0][0] == group_by:
-            file_dim = f1[0][1]
-            file = f2[0][1][0]
-            input_files.append(file)
-            dimensions.append(file_dim)
+    groups = [fi[0] for fi, _ in fps(group_by=group_by)]
+    dimensions = [v for t in groups for v in t if isinstance(v, int)]
+    dim_min = min(dimensions)
+    dim_max = max(dimensions)
+    replace_value = f"({dim_min}-{dim_max})"
 
     # Get the number of layers to stack
     dim_size = len(dimensions)
 
-    with BioReader(input_files[0]) as br:
-        metadata = br.metadata
-
-    with BioWriter(
-        out_dir.joinpath(out_name),
-        metadata=metadata,
-        max_workers=num_workers,
-    ) as bw:
-        # Adjust the dimensions before writing
-        if group_by == "c":
-            bw.C = dim_size
-        if group_by == "t":
-            bw.T = dim_size
-        if group_by == "z":
-            bw.Z = dim_size
-            bw.ps_z = z_distance(input_files[0])
-
-        starttime = time.time()
-
-        with preadator.ProcessManager(
-            name=f"Stacking images of {group_by} dimensions",
-            num_processes=num_workers,
-            threads_per_process=4,
-        ) as pm:
-            threads = []
+    group_range = np.unique([len(f) for gp, f in fps(group_by=group_by)])[0]
+
+    for gi in range(0, group_range):
+        images = [f2[gi][1][0].name for f1, f2 in fps(group_by=group_by)]
+        input_files = [f2[gi][1][0] for f1, f2 in fps(group_by=group_by)]
+        pattern = fp.infer_pattern(files=images)
+        out_name = re.sub(r"\{(.*?)\}", replace_value, pattern)
+        with BioReader(input_files[0]) as br:
+            metadata = br.metadata
+        with BioWriter(
+            out_dir.joinpath(out_name),
+            metadata=metadata,
+            max_workers=num_workers,
+        ) as bw:
+            # Adjust the dimensions before writing
+            if group_by == "c":
+                bw.C = dim_size
+            if group_by == "t":
+                bw.T = dim_size
+            if group_by == "z":
+                bw.Z = dim_size
+                bw.ps_z = z_distance(Path(input_files[0]))
+
             for file, di in zip(input_files, range(0, dim_size)):
-                thread = pm.submit_thread(
-                    write_image_stack,
-                    file,
-                    di=di,
-                    group_by=group_by,
-                    bw=bw,
-                )
-                threads.append(thread)
-            pm.join_threads()
-
-            for f in tqdm(
-                as_completed(threads),
-                total=len(threads),
-                mininterval=5,
-                desc=f"Stacking images of {group_by} dimensions",
-                initial=0,
-                unit_scale=True,
-                colour="cyan",
-            ):
-                f.result()
-
-            endtime = (time.time() - starttime) / 60
-            logger.info(f"Total time taken for execution: {endtime:.4f} minutes")
+                starttime = time.time()
+
+                with preadator.ProcessManager(
+                    name=f"Stacking images of {group_by} dimensions",
+                    num_processes=num_workers,
+                    threads_per_process=4,
+                ) as pm:
+                    threads = []
+                    for file, di in zip(  # noqa: PLW2901
+                        input_files,
+                        range(0, dim_size),
+                    ):
+                        thread = pm.submit_thread(
+                            write_image_stack,
+                            file,
+                            di=di,
+                            group_by=group_by,
+                            bw=bw,
+                        )
+                        threads.append(thread)
+                    pm.join_threads()
+
+                    for f in tqdm(
+                        as_completed(threads),
+                        total=len(threads),
+                        mininterval=5,
+                        desc=f"Stacking images of {group_by} dimensions",
+                        initial=0,
+                        unit_scale=True,
+                        colour="cyan",
+                    ):
+                        f.result()
+
+                    endtime = (time.time() - starttime) / 60
+                    logger.info(
+                        f"Total time taken for execution: {endtime:.4f} minutes",
+                    )
diff --git a/formats/image-dimension-stacking-plugin/tests/fixture.py b/formats/image-dimension-stacking-plugin/tests/fixture.py
@@ -41,7 +41,6 @@ def inp_dir() -> Union[str, Path]:
         ("t", "image_x01_y01_t{t:d+}.ome.tif"),
     ],
 )
-# @pytest.fixture(params=[("c", "image_x01_y01_c{c:d+}.ome.tif")])
 def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest:
     """To get the parameter of the fixture."""
     return request.param
@@ -77,3 +76,41 @@ def synthetic_images(
             Path.unlink(inp)
 
     return inp_dir, variable, pattern
+
+
+@pytest.fixture()
+def synthetic_multi_images(
+    inp_dir: Union[str, Path],
+) -> Union[str, Path]:
+    """Generate random synthetic images."""
+    image_sizes = 1024
+
+    for i in range(0, 4):
+        im = np.zeros((image_sizes, image_sizes))
+        points = image_sizes * np.random.random((2, 10**2))
+        im[(points[0]).astype(int), (points[1]).astype(int)] = 1
+        im = filters.gaussian(im, sigma=image_sizes / (20.0 * 10))
+        outname_1 = f"tubhiswt_z00_c00_t{str(i).zfill(2)}.tif"
+        outname_2 = f"tubhiswt_z01_c00_t{str(i).zfill(2)}.tif"
+        outname_3 = f"tubhiswt_z00_c01_t{str(i).zfill(2)}.tif"
+        outname_4 = f"tubhiswt_z01_c01_t{str(i).zfill(2)}.tif"
+
+        io.imsave(Path(inp_dir, outname_1), im)
+        io.imsave(Path(inp_dir, outname_2), im)
+        io.imsave(Path(inp_dir, outname_3), im)
+        io.imsave(Path(inp_dir, outname_4), im)
+
+    for inp in Path(inp_dir).iterdir():
+        if inp.suffix == ".tif":
+            with BioReader(inp) as br:
+                img = br.read().squeeze()
+                outname = inp.stem + ".ome.tif"
+                with BioWriter(
+                    file_path=Path(inp_dir, outname),
+                    metadata=br.metadata,
+                ) as bw:
+                    bw[:] = img
+                    bw.close()
+            Path.unlink(inp)
+
+    return inp_dir