From 12c9de46b97080cfeef755fceb609eefc92be65b Mon Sep 17 00:00:00 2001 From: Sricharan Reddy Varra Date: Mon, 31 Oct 2022 18:19:55 -0700 Subject: [PATCH] Compressed Tiff Files (#280) * compressed tiff files * removed files created by tests? * variable incorrectly named * missed one --- toffy/file_hash_test.py | 5 ++-- toffy/image_stitching.py | 6 ++-- toffy/image_utils.py | 26 ++++++++++++++++++ toffy/image_utils_test.py | 58 +++++++++++++++++++++++++++++++++++++++ toffy/normalize.py | 5 ++-- toffy/qc_comp.py | 9 ++---- toffy/rosetta.py | 16 +++++------ toffy/rosetta_test.py | 4 ++- toffy/streak_detection.py | 6 ++-- 9 files changed, 112 insertions(+), 23 deletions(-) create mode 100644 toffy/image_utils.py create mode 100644 toffy/image_utils_test.py diff --git a/toffy/file_hash_test.py b/toffy/file_hash_test.py index 5fbc582a..a353a621 100644 --- a/toffy/file_hash_test.py +++ b/toffy/file_hash_test.py @@ -7,6 +7,7 @@ import numpy as np from toffy import file_hash +from toffy.image_utils import save_image def test_get_hash(): @@ -15,7 +16,7 @@ def test_get_hash(): for img in range(2): array = np.random.rand(36).reshape((6, 6)) temp_file_path = os.path.join(temp_dir, 'test_file_{}.tiff'.format(img)) - io.imsave(temp_file_path, array, check_contrast=False) + save_image(temp_file_path, array) shutil.copy(os.path.join(temp_dir, 'test_file_0.tiff'), os.path.join(temp_dir, 'test_file_0_copy.tiff')) @@ -37,7 +38,7 @@ def test_compare_directories(): for img in range(5): array = np.random.rand(36).reshape((6, 6)) temp_file_path = os.path.join(dir_1, 'test_file_{}.tiff'.format(img)) - io.imsave(temp_file_path, array, check_contrast=False) + save_image(temp_file_path, array) # copy same data into second directory dir_2 = os.path.join(top_level_dir, 'dir_2') diff --git a/toffy/image_stitching.py b/toffy/image_stitching.py index 379dbeb3..94bfa0f8 100644 --- a/toffy/image_stitching.py +++ b/toffy/image_stitching.py @@ -8,6 +8,8 @@ from ark.utils import data_utils, load_utils, io_utils, misc_utils from mibi_bin_tools.io_utils import remove_file_extensions +from toffy.image_utils import save_image + def get_max_img_size(tiff_out_dir, img_sub_folder='', run_dir=None, fov_list=None): """Retrieves the maximum FOV image size listed in the run file, or for the given FOVs @@ -100,5 +102,5 @@ def stitch_images(tiff_out_dir, run_dir=None, channels=None, img_sub_folder=None max_image_size=max_img_size, dtype='float32') stitched = data_utils.stitch_images(image_data, num_cols) current_img = stitched.loc['stitched_image', :, :, chan].values - io.imsave(os.path.join(stitched_dir, chan + '_stitched.tiff'), - current_img.astype('float32'), check_contrast=False) + fname = os.path.join(stitched_dir, chan + "_stitched.tiff") + save_image(fname, current_img.astype("float32")) diff --git a/toffy/image_utils.py b/toffy/image_utils.py new file mode 100644 index 00000000..be7a0eb6 --- /dev/null +++ b/toffy/image_utils.py @@ -0,0 +1,26 @@ +import skimage.io as io +import pathlib +import numpy as np +from typing import Union + + +def save_image(fname: Union[str, pathlib.Path], data: np.ndarray, + compression_level: int = 6) -> None: + """ + A thin wrapper around `skimage.io.imsave()`. + + Args: + fname (str): The location to save the tiff file. + data (np.ndarray): The Numpy array to save. + compression_level (int, optional): The compression level for skimage.io.imsave. Increasing + `compress` increases memory consumption, decreases compression speed and moderately + increases compression ratio. The range of compress is `[1,9]`. Defaults to 6. + """ + # Compression Config: + plugin_args: dict[str, any] = { + 'compress': compression_level, + } + if isinstance(fname, pathlib.Path): + fname: str = fname.as_posix() + + io.imsave(fname=fname, arr=data, plugin="tifffile", check_contrast=False, **plugin_args) diff --git a/toffy/image_utils_test.py b/toffy/image_utils_test.py new file mode 100644 index 00000000..2d33f191 --- /dev/null +++ b/toffy/image_utils_test.py @@ -0,0 +1,58 @@ +import skimage.io as io +import numpy as np +from toffy import image_utils +import os +import pytest +import pathlib + + +@pytest.fixture(scope="session") +def create_img_data() -> np.ndarray: + """ + A Fixture which creates a numpy array for tiff file compression testing. + + Returns: + np.ndarray: Returns a randomly generated (1000 x 1000) numpy array. + """ + + # Initialize a new generator - set seed for reproducibility + rng = np.random.default_rng(12345) + + # Create testing data array + data: np.ndarray = rng.integers(low=0, high=256, size=(1000, 1000), dtype=np.int16) + + yield data + + +class TestSaveImage(): + @pytest.fixture(autouse=True) + def _setup(self, tmp_path, create_img_data): + self.uncompressed_fname: pathlib.Path = (tmp_path / "test_img.tiff") + self.compressed_fname: pathlib.Path = (tmp_path / "test_img_compressed.tiff") + self.data: np.ndarray = create_img_data + + # save uncompressed image + io.imsave(self.uncompressed_fname.as_posix(), arr=self.data, plugin="tifffile", + check_contrast=False) + + @pytest.mark.parametrize("compress_level", [1, 6, 9, + pytest.param(10, marks=pytest.mark.xfail)]) + def test_save_compressed_img(self, compress_level): + + # Fails when compression_level > 9 + image_utils.save_image(fname=self.compressed_fname, data=self.data, + compression_level=compress_level) + + # Assert that the compressed tiff file is smaller than the uncompressed tiff file + uncompressed_tiff_file_size: int = os.path.getsize(self.uncompressed_fname) + compressed_tiff_file_size: int = os.path.getsize(self.compressed_fname) + + assert compressed_tiff_file_size < uncompressed_tiff_file_size + + # Assert that the values in the compressed tiff file and the uncompressed + # tiff file are equal. + + uncompressed_data: np.ndarary = io.imread(self.uncompressed_fname) + compressed_data: np.ndarray = io.imread(self.compressed_fname) + + np.testing.assert_array_equal(compressed_data, uncompressed_data) diff --git a/toffy/normalize.py b/toffy/normalize.py index a7cf5375..f3c69750 100644 --- a/toffy/normalize.py +++ b/toffy/normalize.py @@ -19,6 +19,7 @@ from mibi_bin_tools.io_utils import remove_file_extensions from mibi_bin_tools.bin_files import extract_bin_files, get_median_pulse_height from mibi_bin_tools.panel_utils import make_panel +from toffy.image_utils import save_image from toffy.json_utils import read_json_file, write_json_file @@ -653,8 +654,8 @@ def normalize_fov(img_data, norm_vals, norm_dir, fov, channels, extreme_vals): normalized_images = img_data / norm_vals.reshape((1, 1, 1, len(norm_vals))) for idx, chan in enumerate(channels): - io.imsave(os.path.join(output_fov_dir, chan + '.tiff'), - normalized_images[0, :, :, idx], check_contrast=False) + fname = os.path.join(output_fov_dir, chan + ".tiff") + save_image(fname, normalized_images[0, :, :, idx]) # save logs log_df = pd.DataFrame({'channels': channels, diff --git a/toffy/qc_comp.py b/toffy/qc_comp.py index d79ed715..50ef3ee9 100644 --- a/toffy/qc_comp.py +++ b/toffy/qc_comp.py @@ -7,11 +7,10 @@ from scipy.ndimage import gaussian_filter import seaborn as sns from shutil import rmtree -from skimage.io import imsave from toffy.mibitracker_utils import MibiTrackerError from toffy.mibitracker_utils import MibiRequests -from toffy import settings +from toffy import settings, image_utils import ark.utils.io_utils as io_utils import ark.utils.misc_utils as misc_utils @@ -182,10 +181,8 @@ def download_mibitracker_data(email, password, run_name, run_label, base_dir, ti chan_file = '%s.tiff' % chan # write the data to a .tiff file in the FOV directory structure - imsave( - os.path.join(base_dir, tiff_dir, img['number'], img_sub_folder, chan_file), - chan_data, check_contrast=False - ) + fname: str = os.path.join(base_dir, tiff_dir, img['number'], img_sub_folder, chan_file) + image_utils.save_image(fname, chan_data) # append the run name and run id to the list run_order.append((img['number'], img['id'])) diff --git a/toffy/rosetta.py b/toffy/rosetta.py index 5c8cc6c2..d6df464f 100644 --- a/toffy/rosetta.py +++ b/toffy/rosetta.py @@ -14,6 +14,7 @@ from ark.utils.load_utils import load_imgs_from_tree, load_imgs_from_dir from ark.utils.io_utils import list_folders, validate_paths, list_files, remove_file_extensions from ark.utils.misc_utils import verify_same_elements, verify_in_list +from toffy.image_utils import save_image from toffy.streak_detection import streak_correction from toffy.json_utils import read_json_file @@ -294,12 +295,11 @@ def compensate_image_data(raw_data_dir, comp_data_dir, comp_mat_path, panel_info # save tifs to appropriate directories if save_format in ['rescaled', 'both']: save_path = os.path.join(rescale_folder, channel_name) - io.imsave(save_path, comp_data[j, :, :, idx] / norm_const, - check_contrast=False) + save_image(save_path, comp_data[j, :, :, idx] / norm_const) if save_format in ['raw', 'both']: save_path = os.path.join(raw_folder, channel_name) - io.imsave(save_path, comp_data[j, :, :, idx], check_contrast=False) + save_image(save_path, comp_data[j, :, :, idx]) def create_tiled_comparison(input_dir_list, output_dir, max_img_size, @@ -351,9 +351,8 @@ def create_tiled_comparison(input_dir_list, output_dir, max_img_size, max_image_size=max_img_size) tiled_image[(max_img_size * idx):(max_img_size * (idx + 1)), start:end] = \ dir_data.values[i, :, :, 0] - - io.imsave(os.path.join(output_dir, channels[j] + '_comparison.tiff'), - tiled_image, check_contrast=False) + fname = os.path.join(output_dir, channels[j] + "_comparison.tiff") + save_image(fname, tiled_image) def add_source_channel_to_tiled_image(raw_img_dir, tiled_img_dir, output_dir, source_channel, @@ -399,7 +398,7 @@ def add_source_channel_to_tiled_image(raw_img_dir, tiled_img_dir, output_dir, so # combine together and save combined_tile = np.concatenate([rescaled_source, current_tile]) save_name = tile_name.split('.tiff')[0] + '_source_' + source_channel + '.tiff' - io.imsave(os.path.join(output_dir, save_name), combined_tile, check_contrast=False) + save_image(os.path.join(output_dir, save_name), combined_tile) def replace_with_intensity_image(run_dir, channel='Au', replace=True, fovs=None): @@ -573,7 +572,8 @@ def rescale_raw_imgs(img_out_dir, scale=200): for chan in chans: img = io.imread(os.path.join(fov_dir, chan)) img = (img / scale).astype('float32') - io.imsave(os.path.join(sub_dir, chan), img, check_contrast=False) + fname = os.path.join(sub_dir, chan) + save_image(fname, img) def generate_rosetta_test_imgs(rosetta_mat_path, img_out_dir, multipliers, folder_path, panel, diff --git a/toffy/rosetta_test.py b/toffy/rosetta_test.py index c05523e7..3bd1588b 100644 --- a/toffy/rosetta_test.py +++ b/toffy/rosetta_test.py @@ -8,6 +8,7 @@ from pathlib import Path from toffy import rosetta +from toffy.image_utils import save_image import toffy.rosetta_test_cases as test_cases from ark.utils import test_utils @@ -312,7 +313,8 @@ def test_add_source_channel_to_tiled_image(): os.makedirs(tiled_dir) for i in range(2): vals = np.random.rand(im_size * 3 * im_size * num_fovs).reshape(tiled_shape) - io.imsave(os.path.join(tiled_dir, 'tiled_image_{}.tiff'.format(i)), vals) + fname = os.path.join(tiled_dir, f"tiled_image_{i}.tiff") + save_image(fname, vals) output_dir = os.path.join(top_level_dir, 'output_dir') os.makedirs(output_dir) diff --git a/toffy/streak_detection.py b/toffy/streak_detection.py index 0369b074..a1fc1fac 100644 --- a/toffy/streak_detection.py +++ b/toffy/streak_detection.py @@ -17,6 +17,8 @@ from functools import partial import xarray as xr +from toffy.image_utils import save_image + @dataclass class StreakData: @@ -83,7 +85,7 @@ def _save_streak_data(streak_data: StreakData, name: str): st = partial(_get_save_dir, data_dir, name) if type(data) is np.ndarray: - io.imsave(st("tiff"), data, check_contrast=False) + save_image(st("tiff"), data) elif type(data) is pd.DataFrame: data.to_csv(st("csv"), index=True) @@ -383,7 +385,7 @@ def save_corrected_channels( for channel in corrected_channels.channels.values: img: np.ndarray = corrected_channels.loc[:, :, channel].values fp = Path(streak_data.corrected_dir, channel + ".tiff") - io.imsave(fp, img, check_contrast=False) + save_image(fp, img) # Save streak masks if save_streak_data: