Compressed Tiff Files (#280)

* compressed tiff files * removed files created by tests? * variable incorrectly named * missed one
angelolab · Nov 1, 2022 · 12c9de4 · 12c9de4
1 parent fd334ff
commit 12c9de4
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 23 deletions.
diff --git a/toffy/file_hash_test.py b/toffy/file_hash_test.py
@@ -7,6 +7,7 @@
 import numpy as np
 
 from toffy import file_hash
+from toffy.image_utils import save_image
 
 
 def test_get_hash():
@@ -15,7 +16,7 @@ def test_get_hash():
         for img in range(2):
             array = np.random.rand(36).reshape((6, 6))
             temp_file_path = os.path.join(temp_dir, 'test_file_{}.tiff'.format(img))
-            io.imsave(temp_file_path, array, check_contrast=False)
+            save_image(temp_file_path, array)
 
         shutil.copy(os.path.join(temp_dir, 'test_file_0.tiff'),
                     os.path.join(temp_dir, 'test_file_0_copy.tiff'))
@@ -37,7 +38,7 @@ def test_compare_directories():
         for img in range(5):
             array = np.random.rand(36).reshape((6, 6))
             temp_file_path = os.path.join(dir_1, 'test_file_{}.tiff'.format(img))
-            io.imsave(temp_file_path, array, check_contrast=False)
+            save_image(temp_file_path, array)
 
         # copy same data into second directory
         dir_2 = os.path.join(top_level_dir, 'dir_2')

diff --git a/toffy/image_stitching.py b/toffy/image_stitching.py
@@ -8,6 +8,8 @@
 from ark.utils import data_utils, load_utils, io_utils, misc_utils
 from mibi_bin_tools.io_utils import remove_file_extensions
 
+from toffy.image_utils import save_image
+
 
 def get_max_img_size(tiff_out_dir, img_sub_folder='', run_dir=None, fov_list=None):
     """Retrieves the maximum FOV image size listed in the run file, or for the given FOVs
@@ -100,5 +102,5 @@ def stitch_images(tiff_out_dir, run_dir=None, channels=None, img_sub_folder=None
                                                     max_image_size=max_img_size, dtype='float32')
         stitched = data_utils.stitch_images(image_data, num_cols)
         current_img = stitched.loc['stitched_image', :, :, chan].values
-        io.imsave(os.path.join(stitched_dir, chan + '_stitched.tiff'),
-                  current_img.astype('float32'), check_contrast=False)
+        fname = os.path.join(stitched_dir, chan + "_stitched.tiff")
+        save_image(fname, current_img.astype("float32"))
diff --git a/toffy/image_utils.py b/toffy/image_utils.py
@@ -0,0 +1,26 @@
+import skimage.io as io
+import pathlib
+import numpy as np
+from typing import Union
+
+
+def save_image(fname: Union[str, pathlib.Path], data: np.ndarray,
+               compression_level: int = 6) -> None:
+    """
+    A thin wrapper around `skimage.io.imsave()`.
+
+    Args:
+        fname (str): The location to save the tiff file.
+        data (np.ndarray): The Numpy array to save.
+        compression_level (int, optional): The compression level for skimage.io.imsave. Increasing
+            `compress` increases memory consumption, decreases compression speed and moderately
+            increases compression ratio. The range of compress is `[1,9]`. Defaults to 6.
+    """
+    # Compression Config:
+    plugin_args: dict[str, any] = {
+        'compress': compression_level,
+    }
+    if isinstance(fname, pathlib.Path):
+        fname: str = fname.as_posix()
+
+    io.imsave(fname=fname, arr=data, plugin="tifffile", check_contrast=False, **plugin_args)
diff --git a/toffy/image_utils_test.py b/toffy/image_utils_test.py
@@ -0,0 +1,58 @@
+import skimage.io as io
+import numpy as np
+from toffy import image_utils
+import os
+import pytest
+import pathlib
+
+
+@pytest.fixture(scope="session")
+def create_img_data() -> np.ndarray:
+    """
+    A Fixture which creates a numpy array for tiff file compression testing.
+
+    Returns:
+        np.ndarray: Returns a randomly generated (1000 x 1000) numpy array.
+    """
+
+    # Initialize a new generator - set seed for reproducibility
+    rng = np.random.default_rng(12345)
+
+    # Create testing data array
+    data: np.ndarray = rng.integers(low=0, high=256, size=(1000, 1000), dtype=np.int16)
+
+    yield data
+
+
+class TestSaveImage():
+    @pytest.fixture(autouse=True)
+    def _setup(self, tmp_path, create_img_data):
+        self.uncompressed_fname: pathlib.Path = (tmp_path / "test_img.tiff")
+        self.compressed_fname: pathlib.Path = (tmp_path / "test_img_compressed.tiff")
+        self.data: np.ndarray = create_img_data
+
+        # save uncompressed image
+        io.imsave(self.uncompressed_fname.as_posix(), arr=self.data, plugin="tifffile",
+                  check_contrast=False)
+
+    @pytest.mark.parametrize("compress_level", [1, 6, 9,
+                                                pytest.param(10, marks=pytest.mark.xfail)])
+    def test_save_compressed_img(self, compress_level):
+
+        # Fails when compression_level > 9
+        image_utils.save_image(fname=self.compressed_fname, data=self.data,
+                               compression_level=compress_level)
+
+        # Assert that the compressed tiff file is smaller than the uncompressed tiff file
+        uncompressed_tiff_file_size: int = os.path.getsize(self.uncompressed_fname)
+        compressed_tiff_file_size: int = os.path.getsize(self.compressed_fname)
+
+        assert compressed_tiff_file_size < uncompressed_tiff_file_size
+
+        # Assert that the values in the compressed tiff file and the uncompressed
+        # tiff file are equal.
+
+        uncompressed_data: np.ndarary = io.imread(self.uncompressed_fname)
+        compressed_data: np.ndarray = io.imread(self.compressed_fname)
+
+        np.testing.assert_array_equal(compressed_data, uncompressed_data)
diff --git a/toffy/normalize.py b/toffy/normalize.py
@@ -19,6 +19,7 @@
 from mibi_bin_tools.io_utils import remove_file_extensions
 from mibi_bin_tools.bin_files import extract_bin_files, get_median_pulse_height
 from mibi_bin_tools.panel_utils import make_panel
+from toffy.image_utils import save_image
 
 from toffy.json_utils import read_json_file, write_json_file
 
@@ -653,8 +654,8 @@ def normalize_fov(img_data, norm_vals, norm_dir, fov, channels, extreme_vals):
     normalized_images = img_data / norm_vals.reshape((1, 1, 1, len(norm_vals)))
 
     for idx, chan in enumerate(channels):
-        io.imsave(os.path.join(output_fov_dir, chan + '.tiff'),
-                  normalized_images[0, :, :, idx], check_contrast=False)
+        fname = os.path.join(output_fov_dir, chan + ".tiff")
+        save_image(fname, normalized_images[0, :, :, idx])
 
     # save logs
     log_df = pd.DataFrame({'channels': channels,

diff --git a/toffy/qc_comp.py b/toffy/qc_comp.py
@@ -7,11 +7,10 @@
 from scipy.ndimage import gaussian_filter
 import seaborn as sns
 from shutil import rmtree
-from skimage.io import imsave
 
 from toffy.mibitracker_utils import MibiTrackerError
 from toffy.mibitracker_utils import MibiRequests
-from toffy import settings
+from toffy import settings, image_utils
 
 import ark.utils.io_utils as io_utils
 import ark.utils.misc_utils as misc_utils
@@ -182,10 +181,8 @@ def download_mibitracker_data(email, password, run_name, run_label, base_dir, ti
             chan_file = '%s.tiff' % chan
 
             # write the data to a .tiff file in the FOV directory structure
-            imsave(
-                os.path.join(base_dir, tiff_dir, img['number'], img_sub_folder, chan_file),
-                chan_data, check_contrast=False
-            )
+            fname: str = os.path.join(base_dir, tiff_dir, img['number'], img_sub_folder, chan_file)
+            image_utils.save_image(fname, chan_data)
 
         # append the run name and run id to the list
         run_order.append((img['number'], img['id']))

diff --git a/toffy/rosetta.py b/toffy/rosetta.py
@@ -14,6 +14,7 @@
 from ark.utils.load_utils import load_imgs_from_tree, load_imgs_from_dir
 from ark.utils.io_utils import list_folders, validate_paths, list_files, remove_file_extensions
 from ark.utils.misc_utils import verify_same_elements, verify_in_list
+from toffy.image_utils import save_image
 
 from toffy.streak_detection import streak_correction
 from toffy.json_utils import read_json_file
@@ -294,12 +295,11 @@ def compensate_image_data(raw_data_dir, comp_data_dir, comp_mat_path, panel_info
                 # save tifs to appropriate directories
                 if save_format in ['rescaled', 'both']:
                     save_path = os.path.join(rescale_folder, channel_name)
-                    io.imsave(save_path, comp_data[j, :, :, idx] / norm_const,
-                              check_contrast=False)
+                    save_image(save_path, comp_data[j, :, :, idx] / norm_const)
 
                 if save_format in ['raw', 'both']:
                     save_path = os.path.join(raw_folder, channel_name)
-                    io.imsave(save_path, comp_data[j, :, :, idx], check_contrast=False)
+                    save_image(save_path, comp_data[j, :, :, idx])
 
 
 def create_tiled_comparison(input_dir_list, output_dir, max_img_size,
@@ -351,9 +351,8 @@ def create_tiled_comparison(input_dir_list, output_dir, max_img_size,
                                                max_image_size=max_img_size)
                 tiled_image[(max_img_size * idx):(max_img_size * (idx + 1)), start:end] = \
                     dir_data.values[i, :, :, 0]
-
-        io.imsave(os.path.join(output_dir, channels[j] + '_comparison.tiff'),
-                  tiled_image, check_contrast=False)
+        fname = os.path.join(output_dir, channels[j] + "_comparison.tiff")
+        save_image(fname, tiled_image)
 
 
 def add_source_channel_to_tiled_image(raw_img_dir, tiled_img_dir, output_dir, source_channel,
@@ -399,7 +398,7 @@ def add_source_channel_to_tiled_image(raw_img_dir, tiled_img_dir, output_dir, so
         # combine together and save
         combined_tile = np.concatenate([rescaled_source, current_tile])
         save_name = tile_name.split('.tiff')[0] + '_source_' + source_channel + '.tiff'
-        io.imsave(os.path.join(output_dir, save_name), combined_tile, check_contrast=False)
+        save_image(os.path.join(output_dir, save_name), combined_tile)
 
 
 def replace_with_intensity_image(run_dir, channel='Au', replace=True, fovs=None):
@@ -573,7 +572,8 @@ def rescale_raw_imgs(img_out_dir, scale=200):
         for chan in chans:
             img = io.imread(os.path.join(fov_dir, chan))
             img = (img / scale).astype('float32')
-            io.imsave(os.path.join(sub_dir, chan), img, check_contrast=False)
+            fname = os.path.join(sub_dir, chan)
+            save_image(fname, img)
 
 
 def generate_rosetta_test_imgs(rosetta_mat_path, img_out_dir,  multipliers, folder_path, panel,

diff --git a/toffy/rosetta_test.py b/toffy/rosetta_test.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 from toffy import rosetta
+from toffy.image_utils import save_image
 
 import toffy.rosetta_test_cases as test_cases
 from ark.utils import test_utils
@@ -312,7 +313,8 @@ def test_add_source_channel_to_tiled_image():
         os.makedirs(tiled_dir)
         for i in range(2):
             vals = np.random.rand(im_size * 3 * im_size * num_fovs).reshape(tiled_shape)
-            io.imsave(os.path.join(tiled_dir, 'tiled_image_{}.tiff'.format(i)), vals)
+            fname = os.path.join(tiled_dir, f"tiled_image_{i}.tiff")
+            save_image(fname, vals)
 
         output_dir = os.path.join(top_level_dir, 'output_dir')
         os.makedirs(output_dir)

diff --git a/toffy/streak_detection.py b/toffy/streak_detection.py
@@ -17,6 +17,8 @@
 from functools import partial
 import xarray as xr
 
+from toffy.image_utils import save_image
+
 
 @dataclass
 class StreakData:
@@ -83,7 +85,7 @@ def _save_streak_data(streak_data: StreakData, name: str):
     st = partial(_get_save_dir, data_dir, name)
 
     if type(data) is np.ndarray:
-        io.imsave(st("tiff"), data, check_contrast=False)
+        save_image(st("tiff"), data)
     elif type(data) is pd.DataFrame:
         data.to_csv(st("csv"), index=True)
 
@@ -383,7 +385,7 @@ def save_corrected_channels(
     for channel in corrected_channels.channels.values:
         img: np.ndarray = corrected_channels.loc[:, :, channel].values
         fp = Path(streak_data.corrected_dir, channel + ".tiff")
-        io.imsave(fp, img, check_contrast=False)
+        save_image(fp, img)
 
     # Save streak masks
     if save_streak_data: