diff --git a/utils/omero-download-tool/.bumpversion.cfg b/utils/omero-download-tool/.bumpversion.cfg new file mode 100644 index 000000000..c8a407ba8 --- /dev/null +++ b/utils/omero-download-tool/.bumpversion.cfg @@ -0,0 +1,27 @@ +[bumpversion] +current_version = 0.1.0-dev0 +commit = True +tag = False +parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? +serialize = + {major}.{minor}.{patch}-{release}{dev} + {major}.{minor}.{patch} + +[bumpversion:part:release] +optional_value = _ +first_value = dev +values = + dev + _ + +[bumpversion:part:dev] + +[bumpversion:file:pyproject.toml] +search = version = "{current_version}" +replace = version = "{new_version}" + +[bumpversion:file:plugin.json] + +[bumpversion:file:VERSION] + +[bumpversion:file:src/polus/images/utils/omero_download/__init__.py] diff --git a/utils/omero-download-tool/.gitignore b/utils/omero-download-tool/.gitignore new file mode 100644 index 000000000..c4aa6d8e4 --- /dev/null +++ b/utils/omero-download-tool/.gitignore @@ -0,0 +1,175 @@ + #Byte-compiled / optimized / DLL files + __pycache__/ + *.py[cod] + *$py.class + + # C extensions + *.so + + # Distribution / packaging + .Python + build/ + develop-eggs/ + dist/ + downloads/ + eggs/ + .eggs/ + lib/ + lib64/ + parts/ + sdist/ + var/ + wheels/ + share/python-wheels/ + *.egg-info/ + .installed.cfg + *.egg + MANIFEST + + # PyInstaller + # Usually these files are written by a python script from a template + # before PyInstaller builds the exe, so as to inject date/other infos into it. + *.manifest + *.spec + + # Installer logs + pip-log.txt + pip-delete-this-directory.txt + + # Unit test / coverage reports + htmlcov/ + .tox/ + .nox/ + .coverage + .coverage.* + .cache + nosetests.xml + coverage.xml + *.cover + *.py,cover + .hypothesis/ + .pytest_cache/ + cover/ + + # Translations + *.mo + *.pot + + # Django stuff: + *.log + local_settings.py + db.sqlite3 + db.sqlite3-journal + + # Flask stuff: + instance/ + .webassets-cache + + # Scrapy stuff: + .scrapy + + # Sphinx documentation + docs/_build/ + + # PyBuilder + .pybuilder/ + target/ + + # Jupyter Notebook + .ipynb_checkpoints + + # IPython + profile_default/ + ipython_config.py + + # pyenv + # For a library or package, you might want to ignore these files since the code is + # intended to run in multiple environments; otherwise, check them in: + # .python-version + + # pipenv + # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. + # However, in case of collaboration, if having platform-specific dependencies or dependencies + # having no cross-platform support, pipenv may install dependencies that don't work, or not + # install all needed dependencies. + #Pipfile.lock + + # poetry + # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. + # This is especially recommended for binary packages to ensure reproducibility, and is more + # commonly ignored for libraries. + # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control + poetry.lock + ../../poetry.lock + + # pdm + # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. + #pdm.lock + # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it + # in version control. + # https://pdm.fming.dev/#use-with-ide + .pdm.toml + + # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm + __pypackages__/ + + # Celery stuff + celerybeat-schedule + celerybeat.pid + + # SageMath parsed files + *.sage.py + + # Environments + .env + .venv + env/ + venv/ + ENV/ + env.bak/ + venv.bak/ + + # Spyder project settings + .spyderproject + .spyproject + + # Rope project settings + .ropeproject + + # mkdocs documentation + /site + + # mypy + .mypy_cache/ + .dmypy.json + dmypy.json + + # Pyre type checker + .pyre/ + + # pytype static type analyzer + .pytype/ + + # Cython debug symbols + cython_debug/ + + # PyCharm + # JetBrains specific template is maintained in a separate JetBrains.gitignore that can + # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore + # and can be added to the global gitignore or merged into this file. For a more nuclear + # option (not recommended) you can uncomment the following to ignore the entire idea folder. + #.idea/ + + # vscode + .vscode + + # test data directory + data + # yaml file + .pre-commit-config.yaml + + # hidden files + .DS_Store + .ds_store + # flake8 + .flake8 diff --git a/utils/omero-download-tool/Dockerfile b/utils/omero-download-tool/Dockerfile new file mode 100644 index 000000000..e38498fab --- /dev/null +++ b/utils/omero-download-tool/Dockerfile @@ -0,0 +1,34 @@ +FROM continuumio/miniconda3 + +# environment variables defined in polusai/bfio +ENV EXEC_DIR="/opt/executables" +ENV POLUS_IMG_EXT=".ome.tif" +ENV POLUS_TAB_EXT=".csv" +ENV POLUS_LOG="INFO" + +ARG OMERO_USERNAME +ARG OMERO_PASSWORD +ENV OMERO_USERNAME=$OMERO_USERNAME +ENV OMERO_PASSWORD=$OMERO_PASSWORD + + +ARG conda_env=project_env + +# Work directory defined in the base container +WORKDIR ${EXEC_DIR} + +COPY pyproject.toml ${EXEC_DIR} +COPY environment.yml ${EXEC_DIR} +COPY VERSION ${EXEC_DIR} +COPY README.md ${EXEC_DIR} +COPY src ${EXEC_DIR}/src + +RUN conda env create -f environment.yml + +ENV PATH /opt/conda/envs/myenv/bin:$PATH +RUN /bin/bash -c "source activate project_env" + +RUN pip3 install ${EXEC_DIR} --no-cache-dir + +ENTRYPOINT ["python3", "-m", "polus.images.utils.omero_download"] +CMD ["--help"] diff --git a/utils/omero-download-tool/README.md b/utils/omero-download-tool/README.md new file mode 100644 index 000000000..163af1369 --- /dev/null +++ b/utils/omero-download-tool/README.md @@ -0,0 +1,45 @@ +# Omero Download (v0.1.0-dev0) + +This tool enables the retrieval of data from the Omero NCATS server. [omero_plus](http://165.112.226.159/omero_plus/login/?url=%2Fwebclient%2F). + +## Note +To access data from the Omero NCATS server, user must have to be connection to `NIHVPN` +1. Specify environmental variables for a server's username and password using the command prompt + + export `OMERO_USERNAME=XXXX` \ + export `OMERO_PASSWORD=XXXX` + + +Conda is employed to install all dependencies because one of the critical packages, `omero-py`, encountered installation issues with pip + +Currently, the supported object types in a tool include: `project`, `dataset`, `screen`, `plate`, `well` + + +## Building + +To build the Docker image for the download plugin, run +`bash build-docker.sh`. + +## Run the Docker image + +To execute the built docker image for the download plugin, run +`bash run-plugin.sh`. + +## Options + +This plugin takes 2 input arguments and +1 output argument: + +| Name | Description | I/O | Type | +| --------------- | ------------------------------------------------------------ | ------ | ----------- | +| `--dataType` | Object types to be retreived from Omero Server | Input | String | +| `--name ` | Name of an object | Input | String | +| `--objectId ` | Identification of an object of an object | Input | Integer | +| `--outDir` | Directory to store the downloaded data | Output | genericData | +| `--preview` | Generate a JSON file with outputs | Output | JSON | + + + +## Sample docker command: +```bash +docker run -e OMERO_USERNAME=$OMERO_USERNAME -e OMERO_PASSWORD=$OMERO_PASSWORD -v /home/ec2-user/data/:/home/ec2-user/data/ polusai/omero-download-tool:0.1.0-dev0 --dataType="plate" --objectId=108 --outDir=/home/ec2-user/data/output``` diff --git a/utils/omero-download-tool/VERSION b/utils/omero-download-tool/VERSION new file mode 100644 index 000000000..206c0852b --- /dev/null +++ b/utils/omero-download-tool/VERSION @@ -0,0 +1 @@ +0.1.0-dev0 diff --git a/utils/omero-download-tool/build-docker.sh b/utils/omero-download-tool/build-docker.sh new file mode 100644 index 000000000..4f0a100a9 --- /dev/null +++ b/utils/omero-download-tool/build-docker.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +version=$(" + ] +readme = "README.md" +packages = [{include = "polus", from = "src"}] + +[tool.poetry.dependencies] +python = "3.9.18" +bfio = {version = "^2.3.6", extras = ["all"]} +numpy = "1.21.0" +typer = "^0.7.0" +pydantic = "^2.5.3" + +[[tool.poetry.source]] +name = "test" +url = "https://test.pypi.org/simple/" +default = false +secondary = true + +[tool.poetry.group.dev.dependencies] +bump2version = "^1.0.1" +flake8 = "^6.0.0" +pre-commit = "^3.2.1" +flake8-docstrings = "^1.7.0" +black = "^23.3.0" +mypy = "^1.1.1" +pytest = "^7.2.2" +ruff = "^0.0.270" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = [ + "." +] diff --git a/utils/omero-download-tool/run-plugin.sh b/utils/omero-download-tool/run-plugin.sh new file mode 100644 index 000000000..16d2646b2 --- /dev/null +++ b/utils/omero-download-tool/run-plugin.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +version=$( None: + """Retrieve the microscopy image data from the OMERO NCATS server.""" + logger.info(f"--dataType = {data_type}") + logger.info(f"--name = {name}") + logger.info(f"--objectId = {object_id}") + logger.info(f"--outDir = {out_dir}") + + out_dir = out_dir.resolve() + + if not Path(out_dir).exists(): + out_dir.mkdir(exist_ok=True) + + if not preview: + model = od.OmeroDwonload( + data_type=data_type.value, + name=name, + object_id=object_id, + out_dir=out_dir, + ) + model.get_data() + else: + od.generate_preview(out_dir) + + +if __name__ == "__main__": + app() diff --git a/utils/omero-download-tool/src/polus/images/utils/omero_download/omero_download.py b/utils/omero-download-tool/src/polus/images/utils/omero_download/omero_download.py new file mode 100644 index 000000000..fcf5a08b8 --- /dev/null +++ b/utils/omero-download-tool/src/polus/images/utils/omero_download/omero_download.py @@ -0,0 +1,361 @@ +"""Omero Download Tool.""" + +import json +import logging +import os +from enum import Enum +from itertools import product +from pathlib import Path +from typing import Any +from typing import Optional +from typing import Union + +import numpy as np +from bfio import BioWriter +from omero.gateway import BlitzGateway +from omero.plugins.download import DownloadControl +from pydantic import BaseModel as V2BaseModel +from pydantic import model_validator + +OMERO_USERNAME = os.environ.get("OMERO_USERNAME") +OMERO_PASSWORD = os.environ.get("OMERO_PASSWORD") +HOST = "165.112.226.159" +PORT = 4064 + + +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO)) + + +def generate_preview( + path: Path, +) -> None: + """Generate preview of the plugin outputs.""" + with Path.open(Path(path).joinpath("preview.json"), "w") as fw: + flist = [ + f"B{index}_f0_z0_t0_c{c}.ome.tif" for index in range(10) for c in range(2) + ] + out_files: dict[str, Union[list, str]] = { + r"filepattern": ".*.ome.tif", + "outDir": [], + } + out_files["outDir"].append(flist) # type: ignore + json.dump(out_files, fw, indent=2) + + +class DATATYPE(str, Enum): + """Objects types.""" + + PROJECT = "project" + DATASET = "dataset" + SCREEN = "screen" + PLATE = "plate" + WELL = "well" + Default = "dataset" + + +class ServerConnection(V2BaseModel): + """Establishes a connection to an OMERO server using BlitzGateway. + + Args: + username: The username for authentication. + password: The password for authentication + host: The IP address of the OMERO server. + port: Port used to establish a connection between client and server. + + Returns: + BlitzGateway: A connection object to the OMERO server. + """ + + username: str + password: str + host: str + port: int + + def _authentication(self) -> BlitzGateway: + """Connection to an OMERO server using BlitzGateway.""" + return BlitzGateway( + self.username, + self.password, + host=self.host, + port=self.port, + ) + + +class CustomValidation(V2BaseModel): + """Properties with validation.""" + + data_type: str + out_dir: Path + name: Optional[str] = None + object_id: Optional[int] = None + + @model_validator(mode="before") + @classmethod + def validate_data(cls, values: Any) -> Any: # noqa: ANN401 + """Validation of Paths.""" + out_dir = values.get("out_dir") + data_type = values.get("data_type") + name = values.get("name") + object_id = values.get("object_id") + + if not out_dir.exists(): + msg = f"Output directory donot exist {out_dir}" + raise ValueError(msg) + + conn_model = ServerConnection( + username=OMERO_USERNAME, + password=OMERO_PASSWORD, + host=HOST, + port=PORT, + ) + conn = conn_model._authentication() + conn.connect() + data = conn.getObjects(data_type) + ids = [] + names = [] + for d in data: + ids.append(d.getId()) + names.append(d.getName()) + + if name is not None and name not in names: + msg = f"No such file is available {data_type}: name={name}" + raise FileNotFoundError(msg) + + if object_id is not None and object_id not in ids: + msg = f"No such file is available {data_type}: object_id={object_id}" + raise FileNotFoundError(msg) + conn.close() + + return values + + +class OmeroDwonload(CustomValidation): + """Fetch data from an Omero Server. + + Args: + data_type: The supported object types to be retreived.\ + Must be one of [project, dataset, screen, plate, well] + name: Name of the object to be downloaded. Defaults to None. + object_id: Identification of the object to be downloaded. Defaults to None. + out_dir: The directory path for the outputs. + + Returns: + microscopy image data. + """ + + data_type: str + name: Optional[str] = None + object_id: Optional[int] = None + out_dir: Path + + def _create_output_directory(self, name: str) -> Path: + """Create an output directory.""" + output = Path(self.out_dir).joinpath(name) + + if not output.exists(): + output.mkdir(exist_ok=True) + + return output + + def _write_ometif(self, image: np.ndarray, out_file: Path) -> None: + """Utilizing BioWriter for writing numpy arrays.""" + with BioWriter(file_path=out_file) as bw: + bw.X = image.shape[1] + bw.Y = image.shape[0] + bw.dtype = image.dtype + bw[:, :, :, :, :] = image + + def _rename(self, x: str) -> str: + """Rename a string.""" + return x.replace(".", "_") + + def _saveimage( # noqa PLR0913 + self, + image: np.ndarray, + name: str, + dir_name: Path, + index: int, + z: int, + c: int, + t: int, + ) -> None: + """Generating a single-plane image using BioWriter.""" + name = f"{name}_f{index}_z{z}_t{t}_c{c}.ome.tif" + image_name = Path(dir_name, name) + image = np.expand_dims(image, axis=(2, 3, 4)) + self._write_ometif(image, image_name) + + def get_data(self) -> None: # noqa: PLR0912 PLR0915 C901 + """Retrieve data from the OMERO Server.""" + conn_model = ServerConnection( + username=OMERO_USERNAME, + password=OMERO_PASSWORD, + host=HOST, + port=PORT, + ) + conn = conn_model._authentication() + conn.connect() + dc = DownloadControl() + data = conn.getObjects(self.data_type) + + try: + for d in data: + if self.name is not None or self.object_id is not None: + if self.data_type == "project": # noqa: SIM102 + if d.getName() == self.name or d.getId() == self.object_id: + logger.info( + f"Downloading {self.data_type}: \ + name={d.getName()} id={d.getId()}", + ) + project_name = d.getName() + project_dir = self._create_output_directory(project_name) + for data in d.listChildren(): + dataset_name = data.getName() + dataset_path = str(project_dir.joinpath(dataset_name)) + dataset_dir = self._create_output_directory( + dataset_path, + ) + for image in data.listChildren(): + image_file = image.getFileset() + if ( + image_file.__class__.__name__ + == "_FilesetWrapper" + ): + image_file = image.getFileset() + dc.download_fileset( + conn, + image_file, + dataset_dir, + ) + + if image_file is None: + pixels = image.getPrimaryPixels().getPlane() + for t, c, z in product( + range(0, image.getSizeT()), + range(0, image.getSizeC()), + range(0, image.getSizeZ()), + ): + pixels = image.getPrimaryPixels().getPlane( + theZ=z, + theC=c, + theT=t, + ) + fimage = np.expand_dims( + pixels, + axis=(2, 3, 4), + ) + outname = f"{image.getName()}.ome.tif" + outfile = dataset_dir.joinpath(outname) + self._write_ometif(fimage, outfile) + + if self.data_type == "dataset": # noqa: SIM102 + if d.getName() == self.name or d.getId() == self.object_id: + logger.info( + f"Downloading {self.data_type}: \ + name={d.getName()} id={d.getId()}", + ) + dataset_name = d.getName() + dataset_dir = self._create_output_directory(dataset_name) + for image in d.listChildren(): + image_file = image.getFileset() + if image_file.__class__.__name__ == "_FilesetWrapper": + dc.download_fileset(conn, image_file, dataset_dir) + if image_file is None: + pixels = image.getPrimaryPixels().getPlane() + for t, c, z in product( + range(0, image.getSizeT()), + range(0, image.getSizeC()), + range(0, image.getSizeZ()), + ): + pixels = image.getPrimaryPixels().getPlane( + theZ=z, + theC=c, + theT=t, + ) + fimage = np.expand_dims(pixels, axis=(2, 3, 4)) + outname = f"{image.getName()}.ome.tif" + outfile = dataset_dir.joinpath(outname) + self._write_ometif(fimage, outfile) + + if self.data_type == "screen": # noqa: SIM102 + if d.getName() == self.name or d.getId() == self.object_id: + screen_name = d.getName() + screen_dir = self._create_output_directory(screen_name) + for plate in d.listChildren(): + plate_name = plate.getName() + if plate_name == "MeasurementIndex.ColumbusIDX.xml": + plate_name = self._rename(plate_name) + plate_name = screen_dir.joinpath(plate_name) + plate_dir = self._create_output_directory(plate_name) + for well in plate.listChildren(): + indicies = well.countWellSample() + well_name = well.getWellPos() + for index in range(0, indicies): + pixels = well.getImage(index).getPrimaryPixels() + for t, c, z in product( + range(0, pixels.getSizeT()), + range(0, pixels.getSizeC()), + range(0, pixels.getSizeZ()), + ): + image = pixels.getPlane( + theZ=z, + theC=c, + theT=t, + ) + self._saveimage( + image, + well_name, + plate_dir, + index, + z, + c, + t, + ) + if self.data_type == "plate": # noqa: SIM102 + if d.getName() == self.name or d.getId() == self.object_id: + plate_name = d.getName() + if plate_name == "MeasurementIndex.ColumbusIDX.xml": + plate_name = self._rename(plate_name) + plate_dir = self._create_output_directory(plate_name) + for well in d.listChildren(): + indicies = well.countWellSample() + well_name = well.getWellPos() + for index in range(0, indicies): + pixels = well.getImage(index).getPrimaryPixels() + for t, c, z in product( + range(0, pixels.getSizeT()), + range(0, pixels.getSizeC()), + range(0, pixels.getSizeZ()), + ): + image = pixels.getPlane(theZ=z, theC=c, theT=t) + self._saveimage( + image, + well_name, + plate_dir, + index, + z, + c, + t, + ) + + if self.data_type == "well" and d.getId() == self.object_id: + well_pos = d.getWellPos() + well_id = d.getId() + well_name = f"well_{well_id}_{well_pos}" + well_dir = self._create_output_directory(well_name) + pixels = d.getImage().getPrimaryPixels() + for index, (t, c, z) in enumerate( + product( + range(0, pixels.getSizeT()), + range(0, pixels.getSizeC()), + range(0, pixels.getSizeZ()), + ), + ): + image = pixels.getPlane(theZ=z, theC=c, theT=t) + self._saveimage(image, well_name, well_dir, index, z, c, t) + + conn.close() + + except ValueError: + logger.error("Invalid either object types, names or identifier") diff --git a/utils/omero-download-tool/tests/__init__.py b/utils/omero-download-tool/tests/__init__.py new file mode 100644 index 000000000..fd3549d9c --- /dev/null +++ b/utils/omero-download-tool/tests/__init__.py @@ -0,0 +1 @@ +"""Omero Download Tool.""" diff --git a/utils/omero-download-tool/tests/conftest.py b/utils/omero-download-tool/tests/conftest.py new file mode 100644 index 000000000..2fa881ca3 --- /dev/null +++ b/utils/omero-download-tool/tests/conftest.py @@ -0,0 +1,50 @@ +"""Test fixtures. + +Set up all data used in tests. +""" + +import shutil +import tempfile +from pathlib import Path +from typing import Union + +import pytest +from polus.images.utils.omero_download.omero_download import DATATYPE + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add options to pytest.""" + parser.addoption( + "--slow", + action="store_true", + dest="slow", + default=False, + help="run slow tests", + ) + + +def clean_directories() -> None: + """Remove all temporary directories.""" + for d in Path(".").cwd().iterdir(): + if d.is_dir() and d.name.startswith("tmp"): + shutil.rmtree(d) + + +@pytest.fixture() +def output_directory() -> Union[str, Path]: + """Create output directory.""" + return Path(tempfile.mkdtemp(dir=Path.cwd())) + + +@pytest.fixture( + params=[ + (DATATYPE.WELL, None, 1084), + (DATATYPE.DATASET, "211129_pyomero", None), + (DATATYPE.SCREEN, "Inae_timeseries", None), + (DATATYPE.PROJECT, "NCI_IMS", None), + (DATATYPE.PLATE, None, 60), + ], +) +def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest: + """To get the parameter of the fixture.""" + return request.param diff --git a/utils/omero-download-tool/tests/test_cli.py b/utils/omero-download-tool/tests/test_cli.py new file mode 100644 index 000000000..5163496e2 --- /dev/null +++ b/utils/omero-download-tool/tests/test_cli.py @@ -0,0 +1,33 @@ +"""Test Command line Tool.""" + +from typer.testing import CliRunner +from pathlib import Path +import pytest +from polus.images.utils.omero_download.__main__ import app +from .conftest import clean_directories +import time + + +@pytest.mark.skipif("not config.getoption('slow')") +def test_cli(output_directory: Path, get_params: pytest.FixtureRequest) -> None: + """Test the command line.""" + runner = CliRunner() + data_type, name, object_id = get_params + + result = runner.invoke( + app, + [ + "--dataType", + data_type, + "--name", + name, + "--objectId", + object_id, + "--outDir", + output_directory, + ], + ) + + assert result.exit_code == 0 + time.sleep(5) + clean_directories() diff --git a/utils/omero-download-tool/tests/test_omero_download.py b/utils/omero-download-tool/tests/test_omero_download.py new file mode 100644 index 000000000..a76b37d82 --- /dev/null +++ b/utils/omero-download-tool/tests/test_omero_download.py @@ -0,0 +1,27 @@ +"""Test Omero download Tool.""" + +from pathlib import Path + +import polus.images.utils.omero_download.omero_download as od +import pytest + +from .conftest import clean_directories + + +@pytest.mark.skipif("not config.getoption('slow')") +def test_omero_download( + output_directory: Path, + get_params: pytest.FixtureRequest, +) -> None: + """Test data from Omero Server.""" + data_type, name, object_id = get_params + model = od.OmeroDwonload( + data_type=data_type, + name=name, + object_id=object_id, + out_dir=output_directory, + ) + model.get_data() + assert any(output_directory.iterdir()) is True + + clean_directories()