Skip to content

Commit

Permalink
searching files recursively in nested directories (#501)
Browse files Browse the repository at this point in the history
* fix merge conflicts

* searching files recursively in nested directories

* fix empty directory error

* fix numpy package in toml file

* fix regex for file matching

* fix annotations

* fix ruff checks

* fix ruff checks

* fix pre-commit checks

* fix merge confilcts

* deleted file

* fix plugin manifest

* fix plugin manifest

* fix repo name in manifest
  • Loading branch information
hamshkhawar authored Aug 6, 2024
1 parent 2ed6f47 commit 014f4dd
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 58 deletions.
16 changes: 4 additions & 12 deletions formats/file-renaming-tool/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,4 @@
## File Renaming(v0.2.0)

1. This plugin is updated only to the new plugin standards and no additional functionality is added.
2. This plugin is now installable with pip.
3. Argparse package is replaced with Typer package for command line arguments.
4. `baseCommand` added in a plugin manifiest.
5. `--preview` flag is added which shows outputs to be generated by this plugin along with the outFilePattern used.
6. Use `python -m python -m polus.plugins.formats.ome_converter` to run plugin from command line.
7. Replaced `Unittest` with `pytest` package.
8. Code is optimized for parallel execution of tasks
9. New feature/input argument `mapDirectory` implemented to include directory name in renamed files. It is optional if `raw` selected then orignal directory name is added in renamed files and `map` for mapped values for subdirectories `d0, d1, d2, ... dn`. If no value is passed then it rename files only for the selected directory.
10. Added a support of handling nested files in nested directories.
## [0.2.4-dev0] - 2024-01-17
### Added
- Pytests to test this plugin
- Added a support for recursively searching for files within a directory and its subdirectories of specified pattern by passing value either raw or map for `mapDirectory` input argument.
2 changes: 1 addition & 1 deletion formats/file-renaming-tool/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM polusai/bfio:2.1.9
FROM polusai/bfio:2.3.6

# environment variables defined in polusai/bfio
ENV EXEC_DIR="/opt/executables"
Expand Down
2 changes: 1 addition & 1 deletion formats/file-renaming-tool/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# File Renaming(0.2.0)
# File Renaming(0.2.4-dev0)
This WIPP plugin uses supplied file naming patterns to dynamically
rename and save files in an image collection to a new image collection.

Expand Down
2 changes: 1 addition & 1 deletion formats/file-renaming-tool/plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"description": "Rename and store image collection files in a new image collection",
"author": "Melanie Parham ([email protected]), Hamdah Shafqat Abbasi ([email protected])",
"institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
"repository": "https://github.com/PolusAI/polus-plugins",
"repository": "https://github.com/PolusAI/image-tools",
"website": "https://ncats.nih.gov/preclinical/core/informatics",
"citation": "",
"containerId": "polusai/file-renaming-tool:0.2.4",
Expand Down
3 changes: 2 additions & 1 deletion formats/file-renaming-tool/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ readme = "README.md"
packages = [{include = "polus", from = "src"}]

[tool.poetry.dependencies]
python = ">=3.9"
python = ">=3.9,<3.12"
typer = "^0.7.0"
tqdm = "^4.64.1"
numpy = "^1.26.3"

[tool.poetry.group.dev.dependencies]
bump2version = "^1.0.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Any
from typing import Optional

import numpy as np
import typer
from polus.images.formats.file_renaming import file_renaming as fr

Expand All @@ -23,7 +24,7 @@


@app.command()
def main( # noqa: PLR0913 D417 C901 PLR0912
def main( # noqa: PLR0913 D417 C901 PLR0912 PLR0915
inp_dir: pathlib.Path = typer.Option(
...,
"--inpDir",
Expand Down Expand Up @@ -104,6 +105,17 @@ def main( # noqa: PLR0913 D417 C901 PLR0912
)

elif map_directory:
file_ext = re.split("\\.", file_pattern)[-1]

subdirs = np.unique(
[
sub
for sub in subdirs
for f in pathlib.Path(sub).rglob("*")
if f.suffix == f".{file_ext}"
],
)

if len(subdirs) == 1:
logger.info(
"Renaming files in a single directory.",
Expand All @@ -122,6 +134,9 @@ def main( # noqa: PLR0913 D417 C901 PLR0912
outfile_pattern = f"d1_{out_file_pattern}"

fr.rename(subdirs[0], out_dir, file_pattern, outfile_pattern)
logger.info(
"Finished renaming files.",
)
if len(subdirs) > 1:
subnames = [pathlib.Path(sb).name for sb in subdirs]
sub_check = all(name == subnames[0] for name in subnames)
Expand Down Expand Up @@ -149,6 +164,9 @@ def main( # noqa: PLR0913 D417 C901 PLR0912
else:
outfile_pattern = f"d{i}_{out_file_pattern}"
fr.rename(sub, out_dir, file_pattern, outfile_pattern)
logger.info(
"Finished renaming files.",
)

if preview:
with pathlib.Path.open(pathlib.Path(out_dir, "preview.json"), "w") as jfile:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

from tqdm import tqdm

EXT = (".csv", ".txt", ".cppipe", ".yml", ".yaml", ".xml", ".json")

logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))

Expand All @@ -31,6 +33,20 @@ class MappingDirectory(str, enum.Enum):
Default = ""


def image_directory(dirpath: pathlib.Path) -> Union[bool, None]:
"""Fetching image directory only.
Args:
dirpath: Path to directory.
Returns:
bool.
"""
for file in dirpath.iterdir():
return bool(file.is_file() and file.suffix not in EXT)
return None


def get_data(inp_dir: str) -> tuple[list[pathlib.Path], list[pathlib.Path]]:
"""Get group names from pattern. Convert patterns (c+ or dd) to regex.
Expand All @@ -46,8 +62,9 @@ def get_data(inp_dir: str) -> tuple[list[pathlib.Path], list[pathlib.Path]]:
if path.is_dir():
if path.parent in dirpaths:
dirpaths.remove(path.parent)
dirpaths.append(path)
elif path.is_file() and not path.name.startswith("."):
if image_directory(path):
dirpaths.append(path)
elif path.is_file() and not path.name.endswith(tuple(EXT)):
fpath = pathlib.Path(inp_dir).joinpath(path)
filepath.append(fpath)

Expand Down Expand Up @@ -301,6 +318,7 @@ def rename( # noqa: C901, PLR0915, PLR0912
inp_files: list[str] = [
f"{f.name}" for f in inpfiles if pathlib.Path(f).suffix == f".{file_ext}"
]

if len(inp_files) == 0:
msg = "Please check input directory again!! As it does not contain files"
raise ValueError(msg)
Expand Down
Loading

0 comments on commit 014f4dd

Please sign in to comment.