diff --git a/.github/scripts/get_example_dataset.py b/.github/scripts/get_example_dataset.py index 0a388ef73..079d9e271 100644 --- a/.github/scripts/get_example_dataset.py +++ b/.github/scripts/get_example_dataset.py @@ -1,33 +1,28 @@ -import pathlib +from pathlib import Path +import os import datasets datasets.disable_progress_bar() -valid_datasets = [ - "segment_image_data", - "cluster_pixels", - "cluster_cells", - "post_clustering", - "fiber_segmentation", - "LDA_preprocessing", - "LDA_training_inference", - "neighborhood_analysis", - "pairwise_spatial_enrichment", - "ome_tiff", -] +DATASET_PATH = "angelolab/ark_example" -def load_dataset(cache_dir: pathlib.Path, name: str): +valid_configs = datasets.get_dataset_config_names(DATASET_PATH) + +def load_dataset(cache_dir: Path, name: str): _ = datasets.load_dataset( - path="angelolab/ark_example", + path=DATASET_PATH, cache_dir=cache_dir, name=name, - use_auth_token=False, - revision="main" + token=False, + revision="main", + trust_remote_code=True, ) -# Make the cache directory if it doesn't exist. -cache_dir = pathlib.Path("./data/cache/") +# Create the cache directory +cache_dir = Path(os.environ.get("GITHUB_WORKSPACE")).resolve() / "data" / "cache" cache_dir.mkdir(parents=True, exist_ok=True) -for dataset_config in valid_datasets: - load_dataset(cache_dir=cache_dir.as_posix(), name=dataset_config) + +# Download all available datasets +for dataset_config in valid_configs: + load_dataset(cache_dir=cache_dir, name=dataset_config) \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 75b530782..97a1a1c6a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,31 +14,31 @@ jobs: # Ensure that a wheel builder finishes even if another fails fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-12] + os: [ubuntu-latest, windows-latest, macos-13] steps: - name: Checkout ${{ github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up QEMU (For Linux ARM) if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 with: platforms: arm64 - name: Build Wheels - uses: pypa/cibuildwheel@v2.13.0 + uses: pypa/cibuildwheel@v2.16.5 with: package-dir: . output-dir: wheelhouse config-file: "{package}/pyproject.toml" - name: Store Wheel Artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: distributions + name: dist-${{ matrix.os }} path: wheelhouse/*.whl build_sdist: @@ -46,7 +46,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout ${{ github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Build sdist run: pipx run build --sdist @@ -55,7 +55,7 @@ jobs: run: pipx run twine check dist/* - name: Store sdist Artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: distributions + name: dist-sdist path: dist/*.tar.gz diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 301a7d006..fa1fb16cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,12 +25,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout ${{github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Restore Example Dataset Cache - uses: actions/cache@v3 + uses: actions/cache@v4 id: dataset-cache with: path: ./data/cache/ @@ -38,7 +38,7 @@ jobs: enableCrossOsArchive: true - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: "3.11" cache-dependency-path: "**/pyproject.toml" @@ -56,7 +56,7 @@ jobs: pull-requests: write secrets: inherit uses: ./.github/workflows/test.yml - + build: name: Build permissions: @@ -71,13 +71,15 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout ${{github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Download Coverage Artifact - uses: actions/download-artifact@v3 - # if `name` is not specified, all artifacts are downloaded. + uses: actions/download-artifact@v4 + with: + pattern: coverage-* + merge-multiple: true - name: Upload Coverage to Coveralls uses: coverallsapp/github-action@v2 diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index fe91801d8..7d93a2a91 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -36,31 +36,31 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout ${{ github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 # Checkout the latest release branch ref: ${{ github.event.workflow_run.head_sha }} - name: Set up QEMU (For Linux Arm Containers) - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 with: platforms: arm64 # Uses the latest version of Buildx and Buildkit - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 with: buildkitd-flags: --debug - name: Login to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Docker Metadata Information - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5 id: docker_metadata with: github-token: ${{ github.token }} @@ -74,7 +74,7 @@ jobs: type=semver,pattern={{raw}} - name: Build and Push Docker Image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5 with: context: . file: Dockerfile diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml index be46f101c..70bfc0a48 100644 --- a/.github/workflows/pypi_publish.yml +++ b/.github/workflows/pypi_publish.yml @@ -35,7 +35,7 @@ jobs: needs: [test, build_wheels_sdist] runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 with: name: distributions path: dist @@ -57,15 +57,16 @@ jobs: if: github.event_name == 'release' && github.event.action == 'published' steps: - name: Download Artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: - name: distributions + pattern: dist-* + merge-multiple: true path: dist - name: PYPI Publish - uses: pypa/gh-action-pypi-publish@release/v1.6 + uses: pypa/gh-action-pypi-publish@release/v1.8 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - packages_dir: dist/ + packages-dir: dist/ verbose: true diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 21dfd9aaf..0a3ed3789 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,6 +2,11 @@ name: Test on: workflow_call: + inputs: + debug_enabled: + type: boolean + required: false + default: false permissions: contents: read # to fetch code (actions/checkout) @@ -14,26 +19,28 @@ jobs: fail-fast: false matrix: python-version: ["3.9", "3.10", "3.11"] - os: [ubuntu-latest, macos-latest, windows-latest] + os: [ubuntu-latest, macos-13, windows-latest] + env: + GITHUB_WORKSPACE: ${{ github.workspace }} steps: - name: Checkout ${{ github.repository }} - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Download Example Dataset Cache - uses: actions/cache/restore@v3 - id: dataset-cache + uses: actions/cache/restore@v4 env: cache-name: dataset with: path: ./data/cache/ - key: huggingface-${{ env.cache-name }} + key: huggingface-${{env.cache-name}} enableCrossOsArchive: true + fail-on-cache-miss: true - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache-dependency-path: "**/pyproject.toml" @@ -45,11 +52,11 @@ jobs: - name: Run Tests run: | - pytest + pytest --cov-report=lcov:coverage-${{ join(matrix.*, '-') }}.lcov - name: Archive Coverage - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-${{ join(matrix.*, '-') }} path: | - coverage.lcov + coverage-${{ join(matrix.*, '-') }}.lcov diff --git a/conftest.py b/conftest.py index 4a15b53e5..85c8ad9c1 100644 --- a/conftest.py +++ b/conftest.py @@ -1,5 +1,5 @@ import os -import pathlib +from pathlib import Path from typing import Generator, Iterator, Union import numpy as np @@ -10,7 +10,7 @@ def dataset_cache_dir() -> Iterator[Union[str, None]]: # Change cache directory if running on CI if os.environ.get("CI", None): - cache_dir = pathlib.Path("./data/cache/") + cache_dir = (Path(os.environ.get("GITHUB_WORKSPACE")) / "data" / "cache").resolve() else: cache_dir = None yield cache_dir diff --git a/pyproject.toml b/pyproject.toml index 0fc7a5f8f..c7fba64ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,17 +1,17 @@ [build-system] requires = [ "setuptools", - "Cython>=0.29,<1", + "Cython>=3", "numpy>=1.20,<1.24", - "setuptools_scm[toml]>=6.2", + "setuptools_scm[toml]>=6", ] build-backend = "setuptools.build_meta" [project] dependencies = [ - "alpineer==0.1.10", + "alpineer==0.1.12", "anndata", - "Cython>=0.29,<1", + "Cython>=3", "dask[distributed]", "datasets>=2.6,<3.0", "dill>=0.3.5,<0.4", @@ -27,9 +27,9 @@ dependencies = [ "palettable>=3.3.0,<4", "pandas>=2", "pillow>=9,<10", - "pyFlowSOM>=0.1.15", + "pyFlowSOM>=0.1.16", "requests>=2.20,<3", - "scikit-image<=0.19.3", + "scikit-image<0.19.3", "scikit-learn>=1.1,<2", "graphviz", "scipy>=1.7,<2", @@ -99,6 +99,7 @@ version_scheme = "release-branch-semver" local_scheme = "no-local-version" [tool.cibuildwheel] +before-all = "uname -a" build = ["cp39-*", "cp310-*", "cp311-*"] skip = [ "cp36-*", # Python 3.6 @@ -115,14 +116,6 @@ skip = [ build-frontend = "build" -# Avoid testing on emulated architectures -test-skip = [ - "*-win_arm64", # Skip testing emulated arm64 biulds on Windows - "*-*linux_aarch64", # Skip testing emulated Linux builds - "*-macosx_arm64", # Skip testing emulated arm64 builds on Intel Macs - "*-macosx_universal2:arm64", # Skip testing emulated arm64 portion of universal2 builds -] - # "manylinux" versioning # PEP 600: https://peps.python.org/pep-0600/ # Build using the manylinux_2_28 image @@ -144,9 +137,8 @@ repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest # Build for Windows x86_64, and ARM 64 [tool.cibuildwheel.windows] archs = ["AMD64", "ARM64"] -# might not need to repair with delvewheel? -# before-build = "pip install delvewheel" # Use delvewheel on windows -# repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" +before-build = "pip install delvewheel" # Use delvewheel on windows +repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" # Coverage [tool.coverage.paths] @@ -177,7 +169,6 @@ addopts = [ "--randomly-seed=24", "--randomly-dont-reorganize", "--cov=ark", - "--cov-report=lcov", "--pycodestyle", ] console_output_style = "count" diff --git a/src/ark/phenotyping/cluster_helpers.py b/src/ark/phenotyping/cluster_helpers.py index 94cb918fa..8265dbc34 100644 --- a/src/ark/phenotyping/cluster_helpers.py +++ b/src/ark/phenotyping/cluster_helpers.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod from itertools import combinations from typing import List, Literal, Protocol, runtime_checkable - +import natsort as ns import feather import numpy as np import pandas as pd @@ -413,12 +413,10 @@ def assign_som_clusters(self, num_parallel_cells=1000000) -> pd.DataFrame: # define a template class for type hinting cluster param in ConsensusCluster constructor @runtime_checkable class ClusterClassTemplate(Protocol): - def fit_predict(self) -> None: - ... + def fit_predict(self) -> None: ... @property - def n_clusters(self) -> int: - return n_cluster + def n_clusters(self) -> int: ... ############################################### # Copyright Žiga Sajovic, XLAB 2019 # diff --git a/src/ark/phenotyping/pixel_cluster_utils.py b/src/ark/phenotyping/pixel_cluster_utils.py index 9e119de46..10418c0d5 100644 --- a/src/ark/phenotyping/pixel_cluster_utils.py +++ b/src/ark/phenotyping/pixel_cluster_utils.py @@ -10,6 +10,7 @@ from alpineer import image_utils, io_utils, load_utils, misc_utils from pyarrow.lib import ArrowInvalid from skimage.io import imread +import natsort as ns def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder, @@ -54,6 +55,7 @@ def calculate_channel_percentiles(tiff_dir, fovs, channels, img_sub_folder, percentile_means.append(np.mean(percentile_list)) percentile_df = pd.DataFrame(np.expand_dims(percentile_means, axis=0), columns=channels) + percentile_df.sort_index(axis="columns", key=ns.natsort_key, inplace=True) return percentile_df diff --git a/src/ark/phenotyping/pixel_som_clustering.py b/src/ark/phenotyping/pixel_som_clustering.py index e682596d7..e6c674b63 100644 --- a/src/ark/phenotyping/pixel_som_clustering.py +++ b/src/ark/phenotyping/pixel_som_clustering.py @@ -136,7 +136,7 @@ def run_pixel_som_assignment(pixel_data_path, pixel_pysom_obj, overwrite, num_pa return fov, 0 -def cluster_pixels(fovs, channels, base_dir, pixel_pysom, data_dir='pixel_mat_data', +def cluster_pixels(fovs, base_dir, pixel_pysom, data_dir='pixel_mat_data', multiprocess=False, batch_size=5, num_parallel_pixels=1000000, overwrite=False): """Uses trained SOM weights to assign cluster labels on full pixel data. @@ -146,8 +146,6 @@ def cluster_pixels(fovs, channels, base_dir, pixel_pysom, data_dir='pixel_mat_da Args: fovs (list): The list of fovs to subset on - channels (list): - The list of channels to subset on base_dir (str): The path to the data directory pixel_pysom (cluster_helpers.PixelSOMCluster): diff --git a/src/ark/phenotyping/pixie_preprocessing.py b/src/ark/phenotyping/pixie_preprocessing.py index 1b5e97c08..a01e6265f 100644 --- a/src/ark/phenotyping/pixie_preprocessing.py +++ b/src/ark/phenotyping/pixie_preprocessing.py @@ -2,7 +2,7 @@ import os from functools import partial from shutil import rmtree - +import natsort as ns import feather import numpy as np import pandas as pd @@ -42,7 +42,7 @@ def create_fov_pixel_data(fov, channels, img_data, seg_labels, pixel_thresh_val, - `pandas.DataFrame`: Gaussian blurred and channel sum normalized pixel data for a fov - `pandas.DataFrame`: subset of the preprocessed pixel dataset for a fov """ - + channels.sort(key=ns.natsort_key) # for each marker, compute the Gaussian blur for marker in range(len(channels)): img_data[:, :, marker] = ndimage.gaussian_filter(img_data[:, :, marker], @@ -251,6 +251,8 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir, The number of FOVs to process in parallel, ignored if `multiprocess` is `False` """ + channels.sort(key=ns.natsort_key) + # if the subset_proportion specified is out of range if subset_proportion <= 0 or subset_proportion > 1: raise ValueError('Invalid subset percentage entered: must be in (0, 1]') @@ -425,8 +427,13 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir, quant_dat_fov.index.name = "channel" # update the file with the newly processed fov quantile values - quant_dat_all = quant_dat_all.merge(quant_dat_fov, how="outer", - left_index=True, right_index=True) + quant_dat_all = quant_dat_all.merge( + quant_dat_fov, + how="outer", + left_index=True, + right_index=True + ) + quant_dat_all.to_csv(quantile_path) # update number of fovs processed @@ -439,6 +446,12 @@ def create_pixel_matrix(fovs, channels, base_dir, tiff_dir, seg_dir, # get mean 99.9% across all fovs for all markers, check that none are missing mean_quant = pd.DataFrame(quant_dat_all.mean(axis=1)) + # Bug in Pandas w/ natsort_key, so we have to use this ugly workaround ¯\_(ツ)_/¯ + # See https://github.com/pandas-dev/pandas/issues/56081 + mean_quant.sort_index(axis="index", + key=lambda v: pd.Index(ns.natsort_key(v), tupleize_cols=False), + inplace=True + ) # save 99.9% normalization values feather.write_dataframe(mean_quant.T, os.path.join(base_dir, norm_vals_name_post_rownorm), diff --git a/src/ark/segmentation/marker_quantification.py b/src/ark/segmentation/marker_quantification.py index 8032d2ecf..5f393ca35 100644 --- a/src/ark/segmentation/marker_quantification.py +++ b/src/ark/segmentation/marker_quantification.py @@ -489,7 +489,7 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", # if no fovs are specified, then load all the fovs if fovs is None: if is_mibitiff: - fovs = io_utils.list_files(tiff_dir, substrs=['.tif']) + fovs = io_utils.list_files(tiff_dir, substrs=[".tif", ".tiff"]) else: fovs = io_utils.list_folders(tiff_dir) @@ -532,8 +532,8 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", # for each label given in the argument, read in that mask for the fov, and proceed with # label and table appending mask_files = io_utils.list_files(segmentation_dir, substrs=fov_name) - mask_types = process_lists(fov_names=fovs, mask_names=mask_files) - + mask_types = get_existing_mask_types(fov_names=fovs, mask_names=mask_files) + # remove nuclear from mask_types if nuclear_counts False if not nuclear_counts and "nuclear" in mask_types: mask_types.remove("nuclear") @@ -596,7 +596,7 @@ def generate_cell_table(segmentation_dir, tiff_dir, img_sub_folder="TIFs", return combined_cell_table_size_normalized, combined_cell_table_arcsinh_transformed -def process_lists(fov_names: List[str], mask_names: List[str]) -> List[str]: +def get_existing_mask_types(fov_names: List[str], mask_names: List[str]) -> List[str]: """ Function to strip prefixes from list: fov_names, strip '.tiff' suffix from list: mask names, and remove underscore prefixes, returning unique mask values (i.e. categories of masks). @@ -609,7 +609,7 @@ def process_lists(fov_names: List[str], mask_names: List[str]) -> List[str]: List[str]: Unique mask names (i.e. categories of masks) """ stripped_mask_names = io_utils.remove_file_extensions(mask_names) - result = [itemB[len(prefix):] for itemB in stripped_mask_names for prefix in fov_names if itemB.startswith(prefix)] + result = [mask_name[len(fov_name):] for mask_name in stripped_mask_names for fov_name in fov_names if mask_name.startswith(f"{fov_name}_")] # Remove underscore prefixes and return unique values cleaned_result = [item.lstrip('_') for item in result] unique_result = list(set(cleaned_result)) diff --git a/src/ark/utils/example_dataset.py b/src/ark/utils/example_dataset.py index 685450f2c..6e3a476fc 100644 --- a/src/ark/utils/example_dataset.py +++ b/src/ark/utils/example_dataset.py @@ -9,7 +9,7 @@ from ark.settings import EXAMPLE_DATASET_REVISION -class ExampleDataset(): +class ExampleDataset: def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str = None, revision: str = None) -> None: """ @@ -29,6 +29,7 @@ def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str * `"neighborhood_analysis"` * `"pairwise_spatial_enrichment"` * `"ome_tiff"` + * `"ez_seg_data"` overwrite_existing (bool): A flag to overwrite existing data. Defaults to `True`. cache_dir (str, optional): The directory to save the cache dir. Defaults to `None`, which internally in Hugging Face defaults to `~/.cache/huggingface/datasets`. @@ -38,9 +39,10 @@ def __init__(self, dataset: str, overwrite_existing: bool = True, cache_dir: str defaults to the latest version in the `main` branch. (https://huggingface.co/datasets/angelolab/ark_example/tree/main). """ + self.dataset_paths = None self.dataset = dataset self.overwrite_existing = overwrite_existing - self.cache_dir = cache_dir + self.cache_dir = cache_dir if cache_dir else pathlib.Path("~/.cache/huggingface/datasets").expanduser() self.revision = revision self.path_suffixes = { @@ -68,11 +70,27 @@ def download_example_dataset(self): The dataset will be downloaded to the Hugging Face default cache `~/.cache/huggingface/datasets`. """ - self.dataset_paths = datasets.load_dataset(path="angelolab/ark_example", + ds_paths = datasets.load_dataset(path="angelolab/ark_example", revision=self.revision, name=self.dataset, cache_dir=self.cache_dir, - use_auth_token=False) + token=False, + trust_remote_code=True) + + # modify the paths to be relative to the os + # For example: + # '/Users/user/.cache/huggingface/datasets/downloads/extracted/' + # becomes 'pathlib.path(self.dataset_cache) / downloads/extracted//' + self.dataset_paths = {} + for ds_name,ds in ds_paths.items(): + self.dataset_paths[ds_name] = {} + for feature in ds.features: + p, = ds[feature] + # extract the path relative to the cache_dir (last 3 parts of the path) + p = pathlib.Path(*pathlib.Path(p).parts[-3:]) + # Set the start of the path to the cache_dir (for the user's machine) + self.dataset_paths[ds_name][feature] = self.cache_dir / p / feature + def check_empty_dst(self, dst_path: pathlib.Path) -> bool: """ @@ -100,20 +118,19 @@ def move_example_dataset(self, move_dir: Union[str, pathlib.Path]): Moves the downloaded example data from the `cache_dir` to the `save_dir`. Args: - save_dir (Union[str, pathlib.Path]): The path to save the dataset files in. + move_dir (Union[str, pathlib.Path]): The path to save the dataset files in. """ if type(move_dir) is not pathlib.Path: move_dir = pathlib.Path(move_dir) - dataset_names = list(self.dataset_paths[self.dataset].features.keys()) + dataset_names = list(self.dataset_paths[self.dataset].keys()) for ds_n in dataset_names: ds_n_suffix: str = pathlib.Path(self.path_suffixes[ds_n]) # The path where the dataset is saved in the Hugging Face Cache post-download, # Necessary to copy + move the data from the cache to the user specified `move_dir`. - dataset_cache_path = pathlib.Path(self.dataset_paths[self.dataset][ds_n][0]) - src_path: pathlib.Path = dataset_cache_path / ds_n + src_path = pathlib.Path(self.dataset_paths[self.dataset][ds_n]) dst_path: pathlib.Path = move_dir / ds_n_suffix # Overwrite the existing dataset when `overwrite_existing` == `True` diff --git a/templates/2_Pixie_Cluster_Pixels.ipynb b/templates/2_Pixie_Cluster_Pixels.ipynb index 42aa78f20..5e126df40 100644 --- a/templates/2_Pixie_Cluster_Pixels.ipynb +++ b/templates/2_Pixie_Cluster_Pixels.ipynb @@ -490,7 +490,6 @@ "# use pixel SOM weights to assign pixel clusters\n", "pixel_som_clustering.cluster_pixels(\n", " fovs,\n", - " channels,\n", " base_dir,\n", " pixel_pysom,\n", " data_dir=pixel_data_dir,\n", diff --git a/tests/phenotyping/pixel_som_clustering_test.py b/tests/phenotyping/pixel_som_clustering_test.py index 78c03a066..5ff16818f 100644 --- a/tests/phenotyping/pixel_som_clustering_test.py +++ b/tests/phenotyping/pixel_som_clustering_test.py @@ -232,7 +232,7 @@ def test_cluster_pixels_base(multiprocess, capsys): os.path.join(temp_dir, 'pixel_mat_data'), norm_vals_path, 'bad_path.feather', fovs, chan_list ) - pixel_som_clustering.cluster_pixels(fovs, chan_list, temp_dir, pixel_pysom_bad) + pixel_som_clustering.cluster_pixels(fovs, temp_dir, pixel_pysom_bad) # create a sample PixelSOMCluster object pixel_pysom = cluster_helpers.PixelSOMCluster( @@ -242,7 +242,7 @@ def test_cluster_pixels_base(multiprocess, capsys): # run SOM cluster assignment pixel_som_clustering.cluster_pixels( - fovs, chan_list, temp_dir, pixel_pysom, 'pixel_mat_data', multiprocess=multiprocess + fovs, temp_dir, pixel_pysom, 'pixel_mat_data', multiprocess=multiprocess ) for fov in fovs: @@ -259,7 +259,7 @@ def test_cluster_pixels_base(multiprocess, capsys): # run SOM cluster assignment with overwrite flag pixel_som_clustering.cluster_pixels( - fovs, chan_list, temp_dir, pixel_pysom, 'pixel_mat_data', multiprocess=multiprocess, + fovs, temp_dir, pixel_pysom, 'pixel_mat_data', multiprocess=multiprocess, overwrite=True ) @@ -297,7 +297,7 @@ def test_cluster_pixels_corrupt(multiprocess, capsys): # run SOM cluster assignment pixel_som_clustering.cluster_pixels( - fovs, chans, temp_dir, pixel_pysom, 'pixel_mat_data', multiprocess=multiprocess + fovs, temp_dir, pixel_pysom, data_dir='pixel_mat_data', multiprocess=multiprocess ) # assert the _temp folder is now gone diff --git a/tests/segmentation/marker_quantification_test.py b/tests/segmentation/marker_quantification_test.py index dd96e8608..c3424892f 100644 --- a/tests/segmentation/marker_quantification_test.py +++ b/tests/segmentation/marker_quantification_test.py @@ -768,8 +768,10 @@ def test_generate_cell_table_mibitiff_loading(): fovs_subset_ext[1] = str(fovs_subset_ext[1]) + ".tiff" tiff_dir = os.path.join(temp_dir, "mibitiff_inputs") + seg_dir = os.path.join(temp_dir, "segmentation_masks") os.mkdir(tiff_dir) + os.mkdir(seg_dir) create_paired_xarray_fovs( base_dir=tiff_dir, fov_names=fovs, @@ -789,39 +791,35 @@ def test_generate_cell_table_mibitiff_loading(): for fov in range(cell_masks.shape[0]): fov_whole_cell = cell_masks[fov, :, :, 0] fov_nuclear = cell_masks[fov, :, :, 1] - image_utils.save_image(os.path.join(temp_dir, 'fov%d_whole_cell.tiff' % fov), + image_utils.save_image(os.path.join(seg_dir, 'fov%d_whole_cell.tiff' % fov), fov_whole_cell) - image_utils.save_image(os.path.join(temp_dir, 'fov%d_nuclear.tiff' % fov), + image_utils.save_image(os.path.join(seg_dir, 'fov%d_nuclear.tiff' % fov), fov_nuclear) # generate sample norm and arcsinh data for all fovs norm_data_all_fov, arcsinh_data_all_fov = marker_quantification.generate_cell_table( - segmentation_dir=temp_dir, tiff_dir=tiff_dir, - img_sub_folder=tiff_dir, is_mibitiff=True, fovs=None) + segmentation_dir=seg_dir, tiff_dir=tiff_dir, is_mibitiff=True, fovs=None) assert norm_data_all_fov.shape[0] > 0 and norm_data_all_fov.shape[1] > 0 assert arcsinh_data_all_fov.shape[0] > 0 and arcsinh_data_all_fov.shape[1] > 0 # generate sample norm and arcsinh data for a subset of fovs norm_data_fov_sub, arcsinh_data_fov_sub = marker_quantification.generate_cell_table( - segmentation_dir=temp_dir, tiff_dir=tiff_dir, - img_sub_folder=tiff_dir, is_mibitiff=True, fovs=fovs_subset) + segmentation_dir=seg_dir, tiff_dir=tiff_dir, is_mibitiff=True, fovs=fovs_subset) assert norm_data_fov_sub.shape[0] > 0 and norm_data_fov_sub.shape[1] > 0 assert arcsinh_data_fov_sub.shape[0] > 0 and arcsinh_data_fov_sub.shape[1] > 0 # generate sample norm and arcsinh data for a subset of fovs with extensions norm_data_fov_ext, arcsinh_data_fov_ext = marker_quantification.generate_cell_table( - segmentation_dir=temp_dir, tiff_dir=tiff_dir, - img_sub_folder=tiff_dir, is_mibitiff=True, fovs=fovs_subset_ext) + segmentation_dir=seg_dir, tiff_dir=tiff_dir, is_mibitiff=True, fovs=fovs_subset_ext) assert norm_data_fov_ext.shape[0] > 0 and norm_data_fov_ext.shape[1] > 0 assert arcsinh_data_fov_ext.shape[0] > 0 and arcsinh_data_fov_ext.shape[1] > 0 # test nuclear_counts True norm_data_nuc, arcsinh_data_nuc = marker_quantification.generate_cell_table( - segmentation_dir=temp_dir, tiff_dir=tiff_dir, - img_sub_folder=tiff_dir, is_mibitiff=True, fovs=fovs_subset, + segmentation_dir=seg_dir, tiff_dir=tiff_dir, is_mibitiff=True, fovs=fovs_subset, nuclear_counts=True) # setting nuclear_counts True generates data for both whole_cell and nuclear diff --git a/tests/utils/example_dataset_test.py b/tests/utils/example_dataset_test.py index 9c06372ff..fae34dcb7 100644 --- a/tests/utils/example_dataset_test.py +++ b/tests/utils/example_dataset_test.py @@ -9,17 +9,17 @@ from ark.utils.example_dataset import ExampleDataset, get_example_dataset -@pytest.fixture(scope="session", params=["segment_image_data", - "cluster_pixels", - "cluster_cells", - "post_clustering", - "fiber_segmentation", - "LDA_preprocessing", - "LDA_training_inference", - "neighborhood_analysis", - "pairwise_spatial_enrichment", - "ome_tiff", - "ez_seg_data"]) +@pytest.fixture(scope="class", params=["segment_image_data", + "cluster_pixels", + "cluster_cells", + "post_clustering", + "fiber_segmentation", + "LDA_preprocessing", + "LDA_training_inference", + "neighborhood_analysis", + "pairwise_spatial_enrichment", + "ome_tiff", + "ez_seg_data"]) def dataset_download(request, dataset_cache_dir) -> Iterator[ExampleDataset]: """ A Fixture which instantiates and downloads the dataset with respect to each @@ -32,13 +32,13 @@ def dataset_download(request, dataset_cache_dir) -> Iterator[ExampleDataset]: Yields: Iterator[ExampleDataset]: The iterable Example Dataset. """ - # Set up ExampleDataset class - example_dataset: ExampleDataset = ExampleDataset( + example_dataset = ExampleDataset( dataset=request.param, cache_dir=dataset_cache_dir, revision=EXAMPLE_DATASET_REVISION ) + # Download example data for a particular notebook example_dataset.download_example_dataset() yield example_dataset @@ -150,6 +150,7 @@ def _setup(self): ] } + # Mapping the datasets to their respective test functions. self.dataset_test_fns: dict[str, Callable] = { "image_data": self._image_data_check, "cell_table": self._cell_table_check, @@ -162,7 +163,6 @@ def _setup(self): "ez_seg_data": self._ez_seg_data_check } - # Mapping the datasets to their respective test functions. # Should be the same as `example_dataset.ExampleDataset.path_suffixes` self.move_path_suffixes = { "image_data": "image_data", @@ -182,16 +182,13 @@ def test_download_example_dataset(self, dataset_download: ExampleDataset): Args: dataset_download (ExampleDataset): Fixture for the dataset, respective to each - partition (`segment_image_data`, `cluster_pixels`, `cluster_cells`, - `post_clustering`). """ dataset_names = list( - dataset_download.dataset_paths[dataset_download.dataset].features.keys()) - + dataset_download.dataset_paths[dataset_download.dataset].keys()) for ds_n in dataset_names: dataset_cache_path = pathlib.Path( - dataset_download.dataset_paths[dataset_download.dataset][ds_n][0]) - self.dataset_test_fns[ds_n](dir_p=dataset_cache_path / ds_n) + dataset_download.dataset_paths[dataset_download.dataset][ds_n]) + self.dataset_test_fns[ds_n](dir_p=dataset_cache_path) @pytest.mark.parametrize("_overwrite_existing", [True, False]) def test_move_example_dataset(self, cleanable_tmp_path, dataset_download: ExampleDataset, @@ -567,7 +564,7 @@ def _suffix_paths(self, dataset_download: ExampleDataset, Generator: Yields the data directory for the files to be moved, and the dataset name. """ dataset_names = list( - dataset_download.dataset_paths[dataset_download.dataset].features.keys() + dataset_download.dataset_paths[dataset_download.dataset].keys() ) ds_n_suffixes = [self.move_path_suffixes[ds_n] for ds_n in dataset_names] diff --git a/tests/utils/notebooks_test.py b/tests/utils/notebooks_test.py index 4a2946387..6fea98dc2 100644 --- a/tests/utils/notebooks_test.py +++ b/tests/utils/notebooks_test.py @@ -66,7 +66,6 @@ def nb1_context( SEGMENT_IMAGE_DATA_PATH: pathlib.Path = templates_dir / "1_Segment_Image_Data.ipynb" with testbook(SEGMENT_IMAGE_DATA_PATH, timeout=6000, execute=False) as nb_context_manager: yield nb_context_manager, base_dir_generator / "nb1" - print("after init class") shutil.rmtree(base_dir_generator)