From c2b84e2655b73fd6efbee8bebb012aa246b74c45 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Thu, 18 Jan 2024 07:50:32 -0600 Subject: [PATCH] created plugin structure --- .../package-release.sh | 16 ++ .../feature-subsetting-plugin/run-docker.sh | 23 +++ .../clustering/feature_subsetting/__init__.py | 2 + .../clustering/feature_subsetting/__main__.py | 156 ++++++++++++++++++ .../feature_subsetting/feature_subset.py} | 1 + .../tests/__init__.py | 0 .../tests/conftest.py | 0 .../tests/test_cli.py | 0 .../tests/test_feature_subsetting.py | 0 9 files changed, 198 insertions(+) create mode 100644 clustering/feature-subsetting-plugin/src/polus/plugins/clustering/feature_subsetting/__init__.py create mode 100644 clustering/feature-subsetting-plugin/src/polus/plugins/clustering/feature_subsetting/__main__.py rename clustering/feature-subsetting-plugin/src/{main.py => polus/plugins/clustering/feature_subsetting/feature_subset.py} (99%) create mode 100644 clustering/feature-subsetting-plugin/tests/__init__.py create mode 100644 clustering/feature-subsetting-plugin/tests/conftest.py create mode 100644 clustering/feature-subsetting-plugin/tests/test_cli.py create mode 100644 clustering/feature-subsetting-plugin/tests/test_feature_subsetting.py diff --git a/clustering/feature-subsetting-plugin/package-release.sh b/clustering/feature-subsetting-plugin/package-release.sh index e69de29bb..8e53414b9 100644 --- a/clustering/feature-subsetting-plugin/package-release.sh +++ b/clustering/feature-subsetting-plugin/package-release.sh @@ -0,0 +1,16 @@ +# This script is designed to help package a new version of a plugin + +# Get the new version +version=$( None: + """Cluster data using HDBSCAN.""" + logger.info(f"--inpDir = {inp_dir}") + logger.info(f"--filePattern = {file_pattern}") + # Regular expression for grouping. + logger.info(f"--groupingPattern = {grouping_pattern}") + # Whether to average data for each group. + logger.info(f"--averageGroups = {average_groups}") + # Name of column to use for grouping. + logger.info(f"--labelCol = {label_col}") + # Minimum cluster size for clustering using HDBSCAN. + logger.info(f"--minClusterSize = {min_cluster_size}") + # Set outlier cluster id as 1. + logger.info(f"--incrementOutlierId = {increment_outlier_id}") + logger.info(f"--outDir = {out_dir}") + + inp_dir = inp_dir.resolve() + out_dir = out_dir.resolve() + + assert inp_dir.exists(), f"{inp_dir} does not exist!! Please check input path again" + assert ( + out_dir.exists() + ), f"{out_dir} does not exist!! Please check output path again" + + num_workers = max([cpu_count(), 2]) + + files = fp.FilePattern(inp_dir, file_pattern) + + if files is None: + msg = f"No tabular files found. Please check {file_pattern} again" + raise ValueError(msg) + + if preview: + with Path.open(Path(out_dir, "preview.json"), "w") as jfile: + out_json: dict[str, Any] = { + "filepattern": file_pattern, + "outDir": [], + } + for file in files(): + out_name = file[1][0].name.replace( + "".join(file[1][0].suffixes), + f"_hdbscan{hd.POLUS_TAB_EXT}", + ) + out_json["outDir"].append(out_name) + json.dump(out_json, jfile, indent=2) + else: + with preadator.ProcessManager( + name="Cluster data using HDBSCAN", + num_processes=num_workers, + threads_per_process=2, + ) as pm: + for file in tqdm( + files(), + total=len(files()), + desc="Clustering data", + mininterval=5, + initial=0, + unit_scale=True, + colour="cyan", + ): + pm.submit_process( + hd.hdbscan_clustering, + file[1][0], + min_cluster_size, + out_dir, + grouping_pattern, + label_col, + average_groups, + increment_outlier_id, + ) + pm.join_processes() + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/clustering/feature-subsetting-plugin/src/main.py b/clustering/feature-subsetting-plugin/src/polus/plugins/clustering/feature_subsetting/feature_subset.py similarity index 99% rename from clustering/feature-subsetting-plugin/src/main.py rename to clustering/feature-subsetting-plugin/src/polus/plugins/clustering/feature_subsetting/feature_subset.py index a942d67c2..ccc179e74 100644 --- a/clustering/feature-subsetting-plugin/src/main.py +++ b/clustering/feature-subsetting-plugin/src/polus/plugins/clustering/feature_subsetting/feature_subset.py @@ -1,3 +1,4 @@ +"""Feature Subsetting Plugin.""" import argparse, logging, subprocess, time, multiprocessing, sys import os import filepattern diff --git a/clustering/feature-subsetting-plugin/tests/__init__.py b/clustering/feature-subsetting-plugin/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/clustering/feature-subsetting-plugin/tests/conftest.py b/clustering/feature-subsetting-plugin/tests/conftest.py new file mode 100644 index 000000000..e69de29bb diff --git a/clustering/feature-subsetting-plugin/tests/test_cli.py b/clustering/feature-subsetting-plugin/tests/test_cli.py new file mode 100644 index 000000000..e69de29bb diff --git a/clustering/feature-subsetting-plugin/tests/test_feature_subsetting.py b/clustering/feature-subsetting-plugin/tests/test_feature_subsetting.py new file mode 100644 index 000000000..e69de29bb