diff --git a/.github/workflows/package-filter.yml b/.github/workflows/package-filter.yml
index 9ab8f353b..070367408 100644
--- a/.github/workflows/package-filter.yml
+++ b/.github/workflows/package-filter.yml
@@ -20,6 +20,9 @@ on:
       list:
         description: "The list of directories containing the updated packages"
         value: ${{ jobs.package-filter.outputs.list }}
+      num_packages:
+        description: "The number of updated packages"
+        value: ${{ jobs.package-filter.outputs.num_packages }}
 
 permissions:
   contents: read
@@ -31,6 +34,7 @@ jobs:
     outputs:
       matrix: ${{ steps.package-filter.outputs.matrix }}
       list: ${{ steps.package-filter.outputs.list }}
+      num_packages: ${{ steps.package-filter.outputs.num_packages }}
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -41,6 +45,7 @@ jobs:
       - name: Find Updated Packages
         id: package-filter
         run: |
+          NUM_PACKAGES=0
           PACKAGE_DIRS=""
           COMPANION_FILES="VERSION .bumpversion.cfg"
 
@@ -63,7 +68,17 @@ jobs:
           echo "The comparison point is ${comparison_point}"
 
           # Get the changed files
-          changed_files=$(git diff --name-only ${comparison_point}...)
+          # the `--diff-filter=ACMR` flag filters out deleted files. The filters are as follows:
+          # A: Added
+          # C: Copied
+          # M: Modified
+          # R: Renamed
+          # D: Deleted
+          # T: Type changed (for example, regular file or symlink or submodule)
+          # U: Unmerged
+          # X: Unknown
+          # B: Broken pairing
+          changed_files=$(git diff --diff-filter=ACMR --name-only ${comparison_point}...)
           # echo the changed files
           echo "The changed files are $changed_files"
 
@@ -113,6 +128,7 @@ jobs:
                 fi
               fi
 
+              NUM_PACKAGES=$((NUM_PACKAGES+1))
               PACKAGE_DIRS="$PACKAGE_DIRS ${pkg_dir}"
             fi
           done
@@ -124,29 +140,38 @@ jobs:
           echo "The updated packages are $PACKAGE_DIRS"
           if [ -z "$PACKAGE_DIRS" ]
           then
-            echo "::error::No updated packages were found" && exit 1
-          fi
+            echo "::warning::No updated packages were found"
 
-          # Convert the package directories to JSON for the output matrix
-          JSON="{\"include\": ["
-          for package_dir in $PACKAGE_DIRS
-          do
-            package_name=$(basename $package_dir)
-            JSON_LINE="{\"package_dir\": \"${package_dir}\", \"package_name\": \"${package_name}\"},"
-            # Add the JSON line to the JSON string if it is not already included
-            if [ ! "$JSON" == *"$JSON_LINE"* ]
+            echo "matrix={\"include\": []}" >> $GITHUB_OUTPUT
+            echo "list=" >> $GITHUB_OUTPUT
+            echo "num_packages=0" >> $GITHUB_OUTPUT
+
+          else
+            echo "The number of updated packages is $NUM_PACKAGES"
+
+            # Convert the package directories to JSON for the output matrix
+            JSON="{\"include\": ["
+            for package_dir in $PACKAGE_DIRS
+            do
+              package_name=$(basename $package_dir)
+              JSON_LINE="{\"package_dir\": \"${package_dir}\", \"package_name\": \"${package_name}\"},"
+              # Add the JSON line to the JSON string if it is not already included
+              if [ ! "$JSON" == *"$JSON_LINE"* ]
+              then
+                JSON="$JSON$JSON_LINE"
+              fi
+            done
+
+            # Remove trailing comma and add closing brackets
+            if [ "$JSON" == *"," ]
             then
-              JSON="$JSON$JSON_LINE"
+              JSON="${JSON%?}"
             fi
-          done
+            JSON="$JSON]}"
 
-          # Remove trailing comma and add closing brackets
-          if [ "$JSON" == *"," ]
-          then
-            JSON="${JSON%?}"
-          fi
-          JSON="$JSON]}"
+            # Set the output
+            echo "matrix=$( echo "$JSON" )" >> $GITHUB_OUTPUT
+            echo "list=$( echo "$PACKAGE_DIRS" )" >> $GITHUB_OUTPUT
+            echo "num_packages=$NUM_PACKAGES" >> $GITHUB_OUTPUT
 
-          # Set the output
-          echo "matrix=$( echo "$JSON" )" >> $GITHUB_OUTPUT
-          echo "list=$( echo "$PACKAGE_DIRS" )" >> $GITHUB_OUTPUT
+          fi
diff --git a/.github/workflows/package-tests.yml b/.github/workflows/package-tests.yml
index 44f4a1c65..b6ae538d3 100644
--- a/.github/workflows/package-tests.yml
+++ b/.github/workflows/package-tests.yml
@@ -27,6 +27,7 @@ jobs:
   pre-commit:
     name: Pre-commit | ${{ matrix.package_name }}
     needs: package-filter
+    if: ${{ needs.package-filter.outputs.num_packages > 0 }}
     strategy:
       fail-fast: false
       matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }}
@@ -56,6 +57,7 @@ jobs:
   docker:
     name: Docker | Build ${{ matrix.package_name }}
     needs: package-filter
+    if: ${{ needs.package-filter.outputs.num_packages > 0 }}
     strategy:
       fail-fast: false
       matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }}
@@ -106,6 +108,7 @@ jobs:
   tests:
     name: Test | ${{ matrix.package_name }}
     needs: package-filter
+    if: ${{ needs.package-filter.outputs.num_packages > 0 }}
     strategy:
       fail-fast: false
       matrix: ${{ fromJson(needs.package-filter.outputs.matrix) }}
diff --git a/clustering/K-NN/Distributed-Memory/Dockerfile-Distributed b/clustering/K-NN/Distributed-Memory/Dockerfile-Distributed
deleted file mode 100644
index 11e218a63..000000000
--- a/clustering/K-NN/Distributed-Memory/Dockerfile-Distributed
+++ /dev/null
@@ -1,30 +0,0 @@
-FROM ubuntu:latest
-
-RUN apt-get -y update && apt-get -y install g++ make wget
-RUN mkdir -p /home/DistributedKNN /home/Inputs /home/Outputs
-
-COPY . /home/DistributedKNN
-WORKDIR /home/DistributedKNN
-
-RUN wget https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.0.tar.gz
-RUN tar xfz openmpi-4.0.0.tar.gz
-RUN rm openmpi-4.0.0.tar.gz
-WORKDIR /home/DistributedKNN/openmpi-4.0.0
-RUN ./configure
-RUN make all install
-ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
-ENV PATH="/usr/local/bin:${PATH}"
-
-WORKDIR /home/DistributedKNN
-RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz
-RUN tar xfz boost_1_71_0.tar.gz 
-RUN rm boost_1_71_0.tar.gz
-WORKDIR /home/DistributedKNN/boost_1_71_0
-RUN ./bootstrap.sh
-RUN ./b2
-ENV LD_LIBRARY_PATH="/home/DistributedKNN/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}"
-
-WORKDIR /home/DistributedKNN
-RUN mpicxx -I/home/DistributedKNN/boost_1_71_0 KNN_Distributed_code-OpenMP.cpp -o output.exe -L/home/DistributedKNN/boost_1_71_0/stage/lib -lboost_iostreams -O2 -fopenmp
-ENV OMP_NUM_THREADS=2
-ENTRYPOINT ["mpirun","-np","4","./output.exe"]
diff --git a/clustering/K-NN/Distributed-Memory/KNN_Distributed_code-OpenMP.cpp b/clustering/K-NN/Distributed-Memory/KNN_Distributed_code-OpenMP.cpp
deleted file mode 100644
index 336f9c56a..000000000
--- a/clustering/K-NN/Distributed-Memory/KNN_Distributed_code-OpenMP.cpp
+++ /dev/null
@@ -1,1411 +0,0 @@
-/**
- * @author      Mahdi Maghrebi <mahdi.maghrebi@nih.gov>
- * October 2019
- * This is the Implementation of K-NN Algorithm in Distributed Systems as developed 
- * in "PANDA: Extreme Scale Parallel K-Nearest Neighbor on Distributed Architectures", Patwary et a., 2016
- */
-
-#include <iostream>
-#include <string>
-#include <fstream>
-#include <mpi.h>
-#include <math.h>
-#include <vector>
-#include <stack>
-#include <boost/iostreams/device/mapped_file.hpp> 
-#include <boost/iostreams/stream.hpp>             
-#include <set>
-#include <omp.h>
-#include <iomanip>
-
-using namespace std;
-/**
- * Read the output of linux command execution 
- * @param  cmd  is the inux command to be executed
- * @return the output from the execution of the linux command
- */
-std::string exec(const char* cmd) {
-	std::array<char, 128> buffer;
-	std::string result;
-	std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
-	if (!pipe) {
-		throw std::runtime_error("popen() failed!");
-	}
-	while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-		result += buffer.data();
-	}
-	return result;
-}
-/**
- * Defining the criteria for Sorting the data in a pair container from the biggest value to the smallest
- */
-bool sortinrev(const pair<double,int> &a,const pair<double,int> &b) { 
-	return (a.first > b.first); 
-} 
-/**
- * Compute the variance of a sampled data over data dimensions and Sort dimensions according to their variability
- * @param   DataCounts  Number of total data from which we take the samples
- * @param   nodeData0 Dataset containing the data available for sampling
- * @param   featureCounts Number of features in dataset (equal to number of columns in the input csv file)
- * @param   world_size  Total number of MPI processors
- * @param	globalKdTreeSamples Number of Samples from dataset for computation here 	 
- * @return  VectorGlobalSqrtSum A sorted pair containig the index of the dimensions with the highest variability 
- */
-auto findMaxVarDims(int DataCounts,double **nodeData0, int featureCounts, int world_size, int globalKdTreeSamples) {
-	double samplingData[globalKdTreeSamples][featureCounts];
-	double localSum[featureCounts], globalSum[featureCounts];
-	double localSqrtSum[featureCounts], globalSqrtSum[featureCounts];	
-
-	for (int j=0; j<featureCounts; ++j){
-		localSum[j]=0;
-		localSqrtSum[j]=0;
-	}
-
-	for (int i=0; i< globalKdTreeSamples; ++i){
-		int randomIndex=rand()%DataCounts;
-		for (int j=0; j<featureCounts; ++j){
-			samplingData[i][j]=nodeData0[randomIndex][j];
-			localSum[j]+=samplingData[i][j];	
-		}
-	}	
-
-	MPI_Allreduce(localSum,globalSum,featureCounts,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);		
-
-	for (int i=0; i< globalKdTreeSamples; ++i){
-		for (int j=0; j<featureCounts; ++j){
-			localSqrtSum[j]+=pow((samplingData[i][j]-(globalSum[j]/world_size/globalKdTreeSamples)),2) ;
-		}
-	}
-
-	MPI_Allreduce(localSqrtSum,globalSqrtSum,featureCounts,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
-
-	vector<pair<double,int>> VectorGlobalSqrtSum;
-
-	for (int j=0; j<featureCounts; ++j){
-		VectorGlobalSqrtSum.push_back(make_pair(globalSqrtSum[j],j));
-	}
-	sort(VectorGlobalSqrtSum.begin(), VectorGlobalSqrtSum.end(),sortinrev);
-	return VectorGlobalSqrtSum;			
-}
-/**
- * Compute the distance between 2 data points within the same bucket
- * @param  index Index of the first data point
- * @param  index2 Index of the second data point
- * @param  mappedData2 2D array containing dataset	owned by each processor	 
- * @param  featureCounts Number of features in dataset (equal to number of columns in the input csv file)
- * @return sqrt(dist) The distance between 2 data points within the same bucket
- */
-double computeDistance (int index,int index2, double** mappedData2, int featureCounts){		
-	double dist=0;
-	for (int i=0; i<featureCounts; ++i){
-		double differences=mappedData2[index][i]-mappedData2[index2][i];
-		dist+=differences*differences;
-	}
-	return sqrt(dist);	
-}
-/**
- * Compute the distance between 2 data points during querying
- * @param   index Index of the first data point
- * @param   i Index of the processor that has sent query
- * @param   jj Beginning index of the desired data point in the received data from the querying processor
- * @param   mappedData2 2D array containing dataset	owned by the current processor	 	 
- * @param   receivingPointCoordinates 2D array containing data received from the querying processors	 
- * @param   featureCounts Number of features in dataset (equal to number of columns in the input csv file)
- * @return sqrt(dist) The distance between 2 data points 
- */	
-double computeDistance2 (int index, int i, int jj, double** mappedData2, double** receivingPointCoordinates,int featureCounts ){	
-	double dist=0;
-	for (int k=0; k<featureCounts; ++k){
-		double differences= mappedData2[index][k]-receivingPointCoordinates[i][jj+k];
-		dist+=differences*differences;
-	}
-	return sqrt(dist);	
-}
-/**
- * Compute the median of data at a dividing node of the global Kd Tree
- * @param  maxVarDimension Index of the chosen dimension for computing median
- * @param  nodeDataIndex0 Vector containing the indices of data available at the dividing node
- * @param   globalKdTreeSamplesMedian Number of data sampled by each processor to collaboratively compute the median at the dividing node of the global Kd tree
- * @param   Epsilon The acceptable buffer in estimating the median	 
- * @param   world_size Total number of MPI processors
- * @param   world_rank Rank of each MPI processor
- * @param	data 2D array containing the datapoint coordinates owned by each processor
- * @return  MedianCandidate The estimated value of median at the dividing node 
- */		
-double globalFindMedian (int maxVarDimension, vector<int> nodeDataIndex0, int globalKdTreeSamplesMedian, double Epsilon, int world_size, int world_rank, double** data) {	
-	int randomIndex;	
-	vector <double> sampledDataValues, leftSampledDataValues, rightSampledDataValues;
-	sampledDataValues.reserve(globalKdTreeSamplesMedian);
-	leftSampledDataValues.reserve(globalKdTreeSamplesMedian);
-	rightSampledDataValues.reserve(globalKdTreeSamplesMedian);
-
-	for (int i=0; i< globalKdTreeSamplesMedian; ++i){
-		randomIndex=rand()%nodeDataIndex0.size();
-		int index=nodeDataIndex0[randomIndex];  
-		sampledDataValues.push_back(data[index][maxVarDimension]);
-	}
-
-	int randomRank;
-	double MedianCandidate;
-	int totalCountsData=world_size*globalKdTreeSamplesMedian;
-	int accumulatedLeftCounts=0;
-	bool whileFlag=true;
-	int whileCount=0;
-
-	while(whileFlag){
-		if (world_rank==0) {randomRank=rand()%world_size;}
-		MPI_Bcast(&randomRank,1,MPI_INT,0,MPI_COMM_WORLD);       
-
-		if (world_rank==randomRank) {
-			randomIndex=rand()%sampledDataValues.size();
-			MedianCandidate=sampledDataValues[randomIndex];
-		}
-		MPI_Bcast(&MedianCandidate,1,MPI_DOUBLE,randomRank,MPI_COMM_WORLD);
-
-		int leftCounts=0; int rightCounts=0;int globalleftCounts=0;
-		leftSampledDataValues.clear();
-		rightSampledDataValues.clear();
-
-		for (int i=0; i<sampledDataValues.size() ; ++i){
-			if (sampledDataValues[i] < MedianCandidate) {
-				leftSampledDataValues.push_back(sampledDataValues[i]);
-				++leftCounts;
-			}
-			else{ 
-				rightSampledDataValues.push_back(sampledDataValues[i]);
-				++rightCounts;        
-			}
-		}
-
-		MPI_Allreduce(&leftCounts,&globalleftCounts,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
-		globalleftCounts+=accumulatedLeftCounts;
-		double ratio= double(globalleftCounts)/totalCountsData;
-
-		if ( ratio < 0.5+Epsilon && ratio > 0.5-Epsilon ) {
-			whileFlag=false;
-			return MedianCandidate ;}
-		else if (ratio < 0.5-Epsilon){
-			accumulatedLeftCounts=globalleftCounts;
-			sampledDataValues.clear();
-			sampledDataValues=rightSampledDataValues;    
-		}
-
-		++whileCount;
-		// For diagnosis, the following error hints at the difficulty of finding the median 	
-		MPI_File logfile;
-		char line[1024];
-		if (whileCount % 10000 == 0) {
-			printf("Too Many Trials for Global KD Tree Median, Processor = %d \n",world_rank);
-			sprintf(line,"Too Many Trials for Global KD Tree Median, Processor = %d \n",world_rank);
-			MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-		}
-	}
-}
-/**
- * Compute the median of data at a dividing node of the local Kd Tree
- * @param  localKdTreeSamplesMedian Number of samples used to compute the median
- * @paramn  sampledDataValues The coordinates of the sampled data
- * @param   Epsilon The acceptable buffer in estimating the median	 
- * @param   world_rank Rank of each MPI processor
- * @return  MedianCandidate The estimated value of median at the dividing node 
- */		
-double localFindMedian (int localKdTreeSamplesMedian,vector<double> sampledDataValues, double Epsilon, int world_rank) {	
-	vector <double> leftSampledDataValues, rightSampledDataValues;
-	leftSampledDataValues.reserve(localKdTreeSamplesMedian);
-	rightSampledDataValues.reserve(localKdTreeSamplesMedian);
-
-	int accumulatedLeftCounts=0;
-	bool whileFlag=true;
-	int whileCount=0;
-
-	while(whileFlag){    
-		int randomIndex=rand()%sampledDataValues.size();
-		double MedianCandidate=sampledDataValues[randomIndex];	
-		int leftCounts=0;
-		int rightCounts=0;
-		leftSampledDataValues.clear();
-		rightSampledDataValues.clear();
-
-		for (int i=0; i<sampledDataValues.size() ; ++i){
-			if (sampledDataValues[i] < MedianCandidate) {
-				leftSampledDataValues.push_back(sampledDataValues[i]);
-				++leftCounts;
-			}
-			else{ 
-				rightSampledDataValues.push_back(sampledDataValues[i]);
-				++rightCounts;        
-			}
-		}
-		leftCounts+=accumulatedLeftCounts;
-		double ratio= double(leftCounts)/localKdTreeSamplesMedian;
-
-		if ( ratio < 0.5+Epsilon && ratio > 0.5-Epsilon ) {
-			whileFlag=false;	
-			return MedianCandidate;
-		}
-		else if (ratio < 0.5-Epsilon){
-			accumulatedLeftCounts=leftCounts;
-			sampledDataValues.clear();
-			sampledDataValues=rightSampledDataValues;    
-			whileFlag=true;
-		}
-		else if (ratio > 0.5+Epsilon){
-			whileFlag=true;
-		}
-		++whileCount;
-
-		if (whileCount % 10000 == 0) {
-			if (Epsilon<0.25) Epsilon*=2; 
-			else return MedianCandidate;
-		}
-	}
-}
-/**
- * Sort the max-heap data structure for a new data inserted at its index i 
- * @param  ID The ID of the point data
- * @paramn  i Index of the inserted data in the Heap 
- * @param   KNNDistanceinBuckets The values of distances for selected K-NNs    	 
- * @param   KNNIDsinBuckets The IDs of the selected K-NNs
- * @param   KNNCounts  Desired count of K-NNs to be computed in this program	  
- */			
-void Max_Heapify(int ID, int i, double ** KNNDistanceinBuckets, int ** KNNIDsinBuckets,int KNNCounts) {
-	int largest = 0;
-	int l = 2*i + 1; 
-	int r = 2*i + 2;
-
-	if ((l < KNNCounts) && (KNNDistanceinBuckets[ID][l] > KNNDistanceinBuckets[ID][i])) {
-		largest = l;
-	}
-	else {
-		largest = i;
-	}
-
-	if ((r < KNNCounts) && (KNNDistanceinBuckets[ID][r] > KNNDistanceinBuckets[ID][largest])) {
-		largest = r;
-	}
-
-	if (largest != i) {
-		std::swap(KNNDistanceinBuckets[ID][i], KNNDistanceinBuckets[ID][largest]);
-		std::swap(KNNIDsinBuckets[ID][i], KNNIDsinBuckets[ID][largest]);
-		Max_Heapify(ID, largest, KNNDistanceinBuckets, KNNIDsinBuckets,KNNCounts);
-	}
-}
-/**
- * Build Max-Heap datat structure for the first time
- * @param  ID The ID of the point data
- * @param   KNNCounts  Desired count of K-NNs to be computed in this program
- * @param   KNNDistanceinBuckets The values of distances for selected K-NNs    	 
- * @param   KNNIDsinBuckets The IDs of the selected K-NNs	  
- */	
-void Build_Max_Heap(int ID,int KNNCounts, double** KNNDistanceinBuckets, int** KNNIDsinBuckets) {
-	for (int i = floor((KNNCounts - 1) / 2); i >= 0; i--) {
-		Max_Heapify(ID, i,KNNDistanceinBuckets, KNNIDsinBuckets,KNNCounts);
-	}
-}
-/**
- * Sort the max-heap data structure for a newly inserted data
- * @param   k The index of the inserted point data
- * @param   receivingHeapArrayDistances2DCopy The values of distances for selected K-NNs    	 
- * @param   receivingHeapArray2DCopy The IDs of the selected K-NNs
- * @param   KNNCounts  Desired count of K-NNs to be computed in this program	  
- */		
-void Max_Heapify2 (int k, double * receivingHeapArrayDistances2DCopy, int * receivingHeapArray2DCopy,int KNNCounts) {
-	int largest = 0;
-	int l = 2*k + 1; 
-	int r = 2*k + 2;
-
-	if ((l < KNNCounts) && (receivingHeapArrayDistances2DCopy[l] > receivingHeapArrayDistances2DCopy[k])) {
-		largest = l;
-	}
-	else {
-		largest = k;
-	}
-
-	if ((r < KNNCounts) && (receivingHeapArrayDistances2DCopy[r] > receivingHeapArrayDistances2DCopy[largest])) {
-		largest = r;
-	}
-
-	if (largest != k) {
-		std::swap(receivingHeapArrayDistances2DCopy[k], receivingHeapArrayDistances2DCopy[largest]);
-		std::swap(receivingHeapArray2DCopy[k], receivingHeapArray2DCopy[largest]);
-		Max_Heapify2(largest,receivingHeapArrayDistances2DCopy,receivingHeapArray2DCopy,KNNCounts);
-	}
-}
-/**
- * Build Max-Heap datat structure for the first time
- * @param   KNNCounts  Desired count of K-NNs to be computed in this program
- * @param   receivingHeapArrayDistances2DCopy The values of distances for selected K-NNs    	 
- * @param   receivingHeapArray2DCopy The IDs of the selected K-NNs	  
- */	
-void Build_Max_Heap2(int KNNCounts, double* receivingHeapArrayDistances2DCopy, int* receivingHeapArray2DCopy) {
-	for (int ii = floor((KNNCounts - 1) / 2); ii >= 0; ii--) {
-		Max_Heapify2(ii,receivingHeapArrayDistances2DCopy,receivingHeapArray2DCopy,KNNCounts);
-	}
-}
-
-
-/**
- * Main Function of the Code
- */			
-int main(int argc, char * const argv[]) {
-	/**	
-	 * MPI Parallel Logfile
-	 */		
-	MPI_File logfile;
-	char line[1024];
-	/**	
-	 * Beginning MPI communications
-	 */			
-	MPI_Init(NULL, NULL);
-	/**	
-	 * world_size is defined here as total number of MPI processors
-	 */	
-	int world_size;
-	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
-	/**	
-	 * world_rank is defined here as the rank of MPI processors
-	 */	
-	int world_rank;
-	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
-	/**
-	 * The errors and informational messages are outputted to the log file 
-	 */	
-	MPI_File_open(MPI_COMM_WORLD, "Setting.txt", MPI_MODE_WRONLY | MPI_MODE_CREATE,MPI_INFO_NULL, &logfile);		
-	/**
-	 * The following arguments are passed to the code (in order) from the command line:
-	 * fileName is the full path to the input csv dataset
-	 * KNNCounts is the desired number of K-NNs for each data point to be computed in this code
-	 * featureCounts is the number of columns in the input csv datastet (number of data dimensions)
-	 */	
-	string fileName = argv[1]; 	
-	const int KNNCounts = atoi(argv[2]); 		
-
-	int featureCounts, colIndex1, colIndex2;
-	if (argc == 3) {
-		string cmd0="head -n 1 "+ fileName + " |tr '\\,' '\\n' |wc -l ";
-		featureCounts = stoi(exec(cmd0.c_str())); 
-	} else if (argc == 5) {
-		string cmd0="head -n 1 "+ fileName + " |tr '\\,' '\\n' |wc -l ";
-		featureCounts = stoi(exec(cmd0.c_str())); 
-		colIndex1 = atoi(argv[3]); 
-		colIndex2 = atoi(argv[4]); 
-	} else 	{
-		printf("Wrong Input Arguments\n");
-		sprintf(line,"Wrong Input Arguments\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE); 
-		return -1;
-	}
-	/**	
-	 * The following important parameters are used in the design of algorithm. Their values are
-	 * initialized according to the suggested values in the referencing paper.
-	 * globalKdTreeSamples is the number of data sampled by each processor to collaboratively compute dimensions with the highest variability.
-	 * globalKdTreeSamplesMedian is the number of data sampled by each processor to collaboratively compute the median of the chosen dimension for each splitting node within the global Kd Tree.
-	 * localKdTreeSamplesMedian is the number of data sampled by each processor separately to compute the median of the chosen dimension for each splitting node within the local Kd Tree.
-	 * Epsilon is a buffer in accepting the Median value
-	 * Parallel_IO is a flag that defines if the input csv file can be read in parallel by all the processors
-	 * bucketSize is the size of a bucket (or a leaf) in the local Kd Tree
-	 * estimatedExtraLayers: To limit the growing size of the local Kd Trees, the growth of the tree is limited by a number of layers defined here from the initial guess of the required buckets
-	 */		
-	const int globalKdTreeSamples=256;	
-	const int globalKdTreeSamplesMedian=256;
-	int localKdTreeSamplesMedian=1024;
-	double Epsilon=0.01; 
-	const int Parallel_IO = 1; 
-	const int bucketSize=32;
-	const int estimatedExtraLayers=1;
-	/**	
-	 * Seed for random number generation
-	 */	
-	srand(17);
-	/**	
-	 * total number of MPI processors should be a power of 2 due to algorithm design for global Kd Tree.
-	 * Otherwise, output an error and exit the program
-	 */	
-	bool powerOfTwo = !(world_size == 0) && !(world_size & (world_size - 1));
-	if (powerOfTwo!=true) {
-		if (world_rank==0) {
-			printf("Number of Processors should be a power of 2\n");
-			sprintf(line,"Number of Processors should be a power of 2\n");
-			MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-		}
-		MPI_Finalize();
-		return 0;
-	}	
-	int numericWidth=floor(log10(world_size) + 1);		
-	/**	
-	 * The master processor splits the input csv file as each processor could have its own non-overlapping set of input data
-	 */
-	if (world_rank==0) {
-		string cmd=string("split -n l/")+to_string(world_size)+" "+ fileName+" -a "+to_string(numericWidth)+" -d tmpFile --additional-suffix=.csv"; 
-		int returnValue=system(cmd.c_str());
-	}
-	/**	
-	 * All procesors neeed to stop here until master processor returns
-	 */
-	MPI_Barrier(MPI_COMM_WORLD);
-	/**	
-	 * Each processor reads its own set of data from a unique csv file (localFileName)
-	 */	
-	int worldRankWidth=floor(log10(world_rank) + 1);
-	std::stringstream ss;
-	ss << std::setw(numericWidth-worldRankWidth) << std::setfill('0') << world_rank;
-	std::string s = ss.str();
-	string localFileName="tmpFile"+s+".csv";
-
-	ifstream infile;   
-	infile.open(localFileName); 
-	/**	
-	 * Output error in case the localFileName was not opened for reading
-	 */	
-	if(infile.fail()) { 
-		printf("error in opening the input file\n");
-		sprintf(line,"error in opening the input file\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-		return 1; 
-	} 
-	/**	
-	 * Each processor finds out about the number of records in its localFileName
-	 */	
-	string cmd3="wc -l "+localFileName;
-	string outputCmd3 = exec(cmd3.c_str());
-	int tmpFileLineCounts=stoi(outputCmd3.substr(0, outputCmd3.find(" ")));
-	/**	
-	 * The master node needs to subtract 1 record which is for header information
-	 */	
-	if (world_rank==0) {
-		string dummyLine;
-		getline(infile, dummyLine);
-		--tmpFileLineCounts;
-	} 
-	/**	
-	 * MPI communication between the processors as they all need to know how many data the other processors have
-	 */
-	int tmpFileLineCountsArray[world_size], tmpFileLineCountsArrayCum[world_size] ;
-	int sendBuffer0[0];
-	sendBuffer0[0]=tmpFileLineCounts;
-	MPI_Allgather(sendBuffer0,1,MPI_INT,tmpFileLineCountsArray,1,MPI_INT,MPI_COMM_WORLD);
-	/**	
-	 * All Processors make an array tmpFileLineCountsArrayCum that cummulatively stores the number of data in the other processors 
-	 */
-	for (int i=0; i<world_size; ++i) {
-		tmpFileLineCountsArrayCum[i]=0;
-	}
-
-	for (int i=0; i<world_size; ++i) {	
-		for (int j=0; j<i+1; ++j) {
-			tmpFileLineCountsArrayCum[i]+=tmpFileLineCountsArray[j];
-		}
-	}	
-	if (world_rank==0) {
-		printf("The input csv file contains %d rows of raw data (w/o header) with %d columns\n",tmpFileLineCountsArrayCum[world_size-1],featureCounts);
-		sprintf(line,"The input csv file contains %d rows of raw data (w/o header) with %d columns\n",tmpFileLineCountsArrayCum[world_size-1],featureCounts);
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);		
-	}		
-	/**	
-	 * Parse data from csv file and store them in a 2D array
-	 */
-	double ** inputdata= new double*[tmpFileLineCounts];;
-	for (int i=0; i<tmpFileLineCounts; ++i) { inputdata[i] = new double[featureCounts]; }
-
-	if (argc==3){
-		for (int i=0; i<tmpFileLineCounts; ++i) {
-			string temp, temp2;
-			getline(infile, temp);	
-			for (int j=0; j<featureCounts; ++j){
-				temp2 =temp.substr(0, temp.find(","));
-				inputdata[i][j]=atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}
-	} else {
-		for (int i=0; i<tmpFileLineCounts; ++i) {
-			string temp, temp2;
-			getline(infile, temp);	
-			for (int j=0; j<featureCounts; ++j){
-				temp2 =temp.substr(0, temp.find(","));
-				if (j >= colIndex1-1 && j < colIndex2) inputdata[i][j] = atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}	
-	}
-	if (argc == 5) featureCounts=colIndex2-colIndex1+1;
-	/**	
-	 * Remove the local input files as their data has been already parsed and read
-	 */
-	infile.close();
-	string cmd2= string("rm ")+localFileName;
-	int returnValue=system(cmd2.c_str());
-	/**
-	 * Query about the number of available OpenMP processors and set it for OpenMP
-	 */	
-	int nProcessors = omp_get_num_procs();
-    omp_set_num_threads(nProcessors-1);
-	cout <<"Total Number of OpenMP Processes in the Parallel Region = "<< nProcessors-1 <<endl;
-	
-	/**	
-	 * Compute dimensions with the highest variance
-	 */	
-	vector<pair<double,int>> VectorGlobalSqrtSum;
-	vector <int> nodeDataIndex[world_size];
-	nodeDataIndex[0].reserve(tmpFileLineCounts);
-	for (int i=0; i<tmpFileLineCounts; ++i){nodeDataIndex[0].push_back(i);}		
-	VectorGlobalSqrtSum=findMaxVarDims(nodeDataIndex[0].size(), inputdata, featureCounts, world_size, globalKdTreeSamples);	
-	/**	
-	 * Constructing the global Kd Tree collaboratively by all the processors
-	 */		
-	vector<double> globalMedianValuesforNodes;
-	vector <int> nextLayerNodeDataIndex[world_size];
-	int nodeCounts=1, nodesLayer=0;	
-	double medianNodeData;
-
-	while (nodeCounts!= world_size){ 
-		if (world_rank ==0) {
-			printf("Constructing Global Kd Tree: Layer = %d \n",nodesLayer);
-			sprintf(line,"Constructing Global Kd Tree: Layer = %d \n",nodesLayer);
-			MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-		}
-		int indexMaxVarDim=VectorGlobalSqrtSum[nodesLayer].second; 
-
-		for (int i=0; i<nodeCounts; ++i){
-			int countLeft=0, countRight=0;
-			medianNodeData=globalFindMedian(indexMaxVarDim,nodeDataIndex[i], globalKdTreeSamplesMedian,Epsilon, world_size,world_rank, inputdata);						
-			globalMedianValuesforNodes.push_back(medianNodeData); 
-
-			for (int j=0; j< nodeDataIndex[i].size(); ++j){ 
-				int index=nodeDataIndex[i][j]; 
-				if (inputdata[index][indexMaxVarDim] < medianNodeData){ 
-					nextLayerNodeDataIndex[i*2].push_back(index);
-					++countLeft;
-				}
-				else{
-					nextLayerNodeDataIndex[i*2+1].push_back(index);
-					++countRight;
-				}   
-			}
-		}
-		nodeCounts*=2;
-		++nodesLayer;
-
-		for (int i=0; i<nodeCounts; ++i){
-			nodeDataIndex[i].clear();
-			nodeDataIndex[i]=nextLayerNodeDataIndex[i];	
-			nextLayerNodeDataIndex[i].clear();	
-		}
-	}	
-	
-	int indexMaxVarDim=VectorGlobalSqrtSum[nodesLayer].second; 
-	for (int i=0; i<nodeCounts; ++i){
-		int countLeft=0, countRight=0;
-		medianNodeData=globalFindMedian(indexMaxVarDim,nodeDataIndex[i], globalKdTreeSamplesMedian,Epsilon, world_size,world_rank, inputdata);						
-		globalMedianValuesforNodes.push_back(medianNodeData); 			
-	}
-	
-	/**	
-	 * Once the number of dividing nodes in the global Kd Tree became equal to the number of MPI processors
-	 * each processor will be responsible for the data of one dividing node
-	 * Index of data for each processor is stored at ProcessorLocalDataIndex
-	 */	
-	int *ProcessorLocalDataIndex;
-	int cnts; 
-
-	for (int i=0; i<nodeCounts; ++i){
-		int rcount[world_size];
-		int send_buffer[0];
-		int displs[nodeCounts];
-		displs[0]=0;	
-		int myDATA[nodeDataIndex[i].size()];
-
-		for (int j=0; j< nodeDataIndex[i].size(); ++j){ 
-			if (world_rank>0) myDATA[j]= nodeDataIndex[i][j]+tmpFileLineCountsArrayCum[world_rank-1];
-			else if(world_rank==0) myDATA[j]= nodeDataIndex[i][j];
-		}  
-
-		int Totalcounts=(int)nodeDataIndex[i].size();			            			  
-		send_buffer[0]=nodeDataIndex[i].size();								
-		MPI_Gather(send_buffer,1, MPI_INT,rcount,1, MPI_INT,i,MPI_COMM_WORLD);
-
-		if (world_rank==i){	
-			cnts=0;
-			for (int k=0; k<nodeCounts; ++k){cnts+=rcount[k];}			
-			for (int k=1; k<nodeCounts; ++k){displs[k]=displs[k-1]+rcount[k-1];}
-			ProcessorLocalDataIndex = new int[cnts]; 
-		}	
-		MPI_Gatherv(myDATA,Totalcounts,MPI_INT,ProcessorLocalDataIndex,rcount,displs,MPI_INT,i,MPI_COMM_WORLD);     		 
-	}
-	/**	
-	 * Now, each processor only reads its own data from the input csv file according to the indices of ProcessorLocalDataIndex
-	 * If parallel I/O is not available (Parallel_IO=0), each processor reads the file at a time
-	 * the main output of this section is mappedData which is a 2D array storing dataset
-	 */
-	int rankOfProcess=0;
-	double mappedData[cnts][featureCounts];  
-	int indexLookupArray[tmpFileLineCountsArrayCum[world_size-1]];
-
-	if (Parallel_IO){
-		using boost::iostreams::mapped_file_source;
-		using boost::iostreams::stream;
-		mapped_file_source mmap(fileName);
-		stream<mapped_file_source> is(mmap, std::ios::binary);
-		string tempString,tempString2;
-		int m_numLines = 0;
-		string dummyLine;
-		getline(is, dummyLine);    
-
-		for (int i=0; i<cnts; ++i){      
-			int lineIndex=ProcessorLocalDataIndex[i];
-			bool flag=true;
-
-			while (flag==true){
-				if (m_numLines==lineIndex) {  
-					indexLookupArray[lineIndex]=i; 
-					getline(is, tempString);      
-					for (int j=0;j<featureCounts;++j){
-						tempString2 =tempString.substr(0, tempString.find(","));
-						mappedData[i][j]=atof(tempString2.c_str());
-						tempString.erase(0, tempString.find(",") + 1);
-					}
-					m_numLines++;
-					flag=false;
-				}
-				else {
-					getline(is, dummyLine);  
-					m_numLines++;
-					flag=true;
-					if(!is) {flag=false; break;}
-				}				
-			}
-		}
-		mmap.close();
-	}
-	else{
-		while (rankOfProcess < world_size){
-			if (world_rank==rankOfProcess){
-				using boost::iostreams::mapped_file_source;
-				using boost::iostreams::stream;
-				mapped_file_source mmap(fileName);
-				stream<mapped_file_source> is(mmap, std::ios::binary);
-				string tempString,tempString2;
-				int m_numLines = 0;
-				string dummyLine;
-				getline(is, dummyLine);    
-
-				for (int i=0; i<cnts; ++i){      
-					int lineIndex=ProcessorLocalDataIndex[i];
-					bool flag=true;
-
-					while (flag==true){
-						if (m_numLines==lineIndex) {  
-							indexLookupArray[lineIndex]=i; 
-							getline(is, tempString);      
-							for (int j=0;j<featureCounts;++j){
-								tempString2 =tempString.substr(0, tempString.find(","));
-								mappedData[i][j]=atof(tempString2.c_str());
-								tempString.erase(0, tempString.find(",") + 1);
-							}
-							m_numLines++;
-							flag=false;
-						}
-						else {
-							getline(is, dummyLine);  
-							m_numLines++;
-							flag=true;
-							if(!is) {flag=false; break;}
-						}				
-					}
-				}
-				mmap.close();
-			}
-			++rankOfProcess;
-			MPI_Barrier(MPI_COMM_WORLD);
-		}  
-	}
-	/**	
-	 * Now, it is the time to construct the local Kd Tree by each processor separately
-	 * Tree construction continues until all data is stored in the buckets of size bucketSize
-	 * or maxAllowedLayers is reached 
-	 */
-	if (world_rank==0) {
-		printf("Constructing the Local Kd Tree\n");
-		sprintf(line,"Constructing the Local Kd Tree\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-
-	int layerNodeCounts=1;  
-	int localNodesLayer=0;
-	vector < vector<int> > localNodeDataIndex;
-	vector <int> tmpvector;
-	vector <double> localMedianNodeData;
-	vector <int> isBucket;
-	bool localFlag=true;
-	int numberofNodeSofar;
-	int nodeIndexofaPoint[cnts];      
-	tmpvector.reserve(cnts);
-
-	for (int i=0; i<cnts; ++i){
-		tmpvector.push_back(ProcessorLocalDataIndex[i]); 
-		nodeIndexofaPoint[i]=-1;
-	}
-	localNodeDataIndex.push_back(tmpvector);		
-
-	isBucket.reserve(localNodeDataIndex[0].size()/bucketSize);
-	localMedianNodeData.reserve(localNodeDataIndex[0].size()/bucketSize);
-	/**	
-	 * Ideally we need estimatedLayers number of layers in the local Kd tree
-	 */
-	int estimatedLayers=int(log2(localNodeDataIndex[0].size()/bucketSize))+1;  
-	int maxAllowedLayers=estimatedLayers+estimatedExtraLayers;
-	if (maxAllowedLayers+nodesLayer > featureCounts){
-		printf("Error in Exceeding Dimensions, increase BucketSize\n");
-		sprintf(line,"Error in Exceeding Dimensions, increase BucketSize\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-
-	if (localNodeDataIndex[0].size() <= bucketSize+1) {isBucket.push_back(1); localFlag=false;}  
-	else {isBucket.push_back(0);}	
-
-	while (localFlag){
-		int indexMaxVarDim=VectorGlobalSqrtSum[localNodesLayer+nodesLayer].second; 
-		if (localNodesLayer==0) {numberofNodeSofar=0;}
-		else {numberofNodeSofar=pow(2,localNodesLayer)-1;}
-
-		for (int i=0; i<layerNodeCounts; ++i){
-			int globalID=numberofNodeSofar +i;
-			int countLeft=0, countRight=0;
-			int leftNodeGlobalIndex=numberofNodeSofar+layerNodeCounts+(i*2);
-			int rightNodeGlobalIndex=numberofNodeSofar+layerNodeCounts+(i*2)+1;
-			localNodeDataIndex.push_back(std::vector<int>());
-			localNodeDataIndex.push_back(std::vector<int>());
-
-			if (isBucket[globalID]==1) {isBucket.push_back(0); isBucket.push_back(0); localMedianNodeData.push_back(0); continue;}
-			if (localNodeDataIndex[globalID].size()==0) {isBucket.push_back(0); isBucket.push_back(0); localMedianNodeData.push_back(0); continue;}		
-			if (localKdTreeSamplesMedian > localNodeDataIndex[globalID].size()/2) localKdTreeSamplesMedian=localNodeDataIndex[globalID].size()/2;			
-			vector <double> sampledDataValues;	
-			for (int i=0; i< localKdTreeSamplesMedian; ++i){
-				int randomIndex=rand()%localNodeDataIndex[globalID].size();
-				int index=localNodeDataIndex[globalID][randomIndex];			
-				int index1=indexLookupArray[index];	         
-				sampledDataValues.push_back(mappedData[index1][indexMaxVarDim]);
-			}
-
-			double temp=localFindMedian(localKdTreeSamplesMedian,sampledDataValues,Epsilon,world_rank);
-			localMedianNodeData.push_back(temp);
-
-			for (int j=0; j< localNodeDataIndex[globalID].size(); ++j){ 
-				int index0=localNodeDataIndex[globalID][j];
-				int index=indexLookupArray[index0];
-				if (mappedData[index][indexMaxVarDim] < localMedianNodeData[globalID]){ 
-					localNodeDataIndex[leftNodeGlobalIndex].push_back(index0);
-					++countLeft;
-				}
-				else{
-					localNodeDataIndex[rightNodeGlobalIndex].push_back(index0);
-					++countRight;
-				} 
-			}
-
-			if (countLeft ==1) {
-				localNodeDataIndex[rightNodeGlobalIndex].push_back(localNodeDataIndex[leftNodeGlobalIndex][0]);
-				localNodeDataIndex[leftNodeGlobalIndex].pop_back();
-				--countLeft;
-			}
-
-			if (countRight ==1) {
-				localNodeDataIndex[leftNodeGlobalIndex].push_back(localNodeDataIndex[rightNodeGlobalIndex][0]);
-				localNodeDataIndex[rightNodeGlobalIndex].pop_back();
-				--countRight;
-			}
-
-
-			if ((countLeft <= bucketSize+1 && countLeft >0)|| ((localNodesLayer == maxAllowedLayers-1) && countLeft >0) ) {  
-				isBucket.push_back(1);			
-
-				for (int j=0; j< localNodeDataIndex[leftNodeGlobalIndex].size(); ++j){ 
-					int index0=localNodeDataIndex[leftNodeGlobalIndex][j];
-					int index=indexLookupArray[index0];
-					nodeIndexofaPoint[index]=leftNodeGlobalIndex;							
-				} 
-			}
-			else {isBucket.push_back(0);}
-
-			if ((countRight <= bucketSize+1 && countRight >0) || ((localNodesLayer == maxAllowedLayers-1) && countRight >0) ) {  
-				isBucket.push_back(1);
-
-				for (int j=0; j< localNodeDataIndex[rightNodeGlobalIndex].size(); ++j){ 
-					int index0=localNodeDataIndex[rightNodeGlobalIndex][j];
-					int index=indexLookupArray[index0];              
-					nodeIndexofaPoint[index]=rightNodeGlobalIndex;		
-				}
-			}
-			else {isBucket.push_back(0);}		
-		}
-
-		localFlag=false;
-		for (int i=0; i<layerNodeCounts; ++i){
-			int globalID=numberofNodeSofar + i;	   
-			if (isBucket[globalID]==0 && localNodeDataIndex[globalID].size()>0 ) {localFlag=true; break;
-			}
-		}
-		layerNodeCounts*=2;
-		++localNodesLayer;	
-	}
-	/**	
-	 * For performance, it is better to refer to local Kd tree later
-	 * from the ID of the first dividing node which has been converted to a bucket
-	 */
-	int FirstBucket;
-	for (int i=0; i< localNodeDataIndex.size(); ++i){ 
-		if (isBucket[i] == 1) {FirstBucket=i;break;}
-	}
-	/**	
-	 * Now, it is the time to start computing K-NNs from the data points within each bucket in the local Kd Tree
-	 * and store them in KNNIDsinBuckets and KNNDistanceinBuckets
-	 * To improve the performance, the data locality was considered for main arrays of localNodeDataIndex2 and mappedData2 
-	 * and the data within the same bucket arranged close to each other in the new arrays
-	 */
-	if (world_rank==0) {
-		printf("Computing K-NNs for the points within the Same Bucket\n");
-		sprintf(line,"Computing K-NNs for the points within the Same Bucket\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-	int KNNIDsinBucketsFilledCounts[cnts];
-	int localIndexConvertor[cnts];
-	int counter=0;
-	vector<vector<int>> localNodeDataIndex2;  
-
-	int **KNNIDsinBuckets = new int*[cnts];
-	for (int i=0; i<cnts; ++i) { KNNIDsinBuckets[i] = new int[KNNCounts]; }	
-
-	double ** KNNDistanceinBuckets = new double*[cnts];
-	for (int i=0; i<cnts; ++i) { KNNDistanceinBuckets[i] = new double[KNNCounts]; }	
-
-	for (int i=0; i< localNodeDataIndex.size(); ++i){ 
-		localNodeDataIndex2.push_back(std::vector<int>());	
-		if (isBucket[i] == 0) {continue;}
-
-		for (int j=0; j< localNodeDataIndex[i].size(); ++j){ 
-			localIndexConvertor[counter]=localNodeDataIndex[i][j];
-			localNodeDataIndex2[i].push_back(counter); 
-			++counter;
-		}
-	}
-
-	double** mappedData2=new double*[cnts];
-	for (int i=0; i<cnts; ++i) { mappedData2[i] = new double[featureCounts]; }
-
-	int nodeIndexofaPoint2[cnts];
-
-	for (int i=0; i< cnts; ++i){ 
-		int pointID=localIndexConvertor[i];
-		int index=indexLookupArray[pointID]; 
-		nodeIndexofaPoint2[i]=nodeIndexofaPoint[index];
-		for (int j=0; j<featureCounts; ++j){
-			mappedData2[i][j]=mappedData[index][j];
-		}
-	}
-
-	for (int i=0; i< cnts; ++i){ 
-		for (int j=0; j< KNNCounts; ++j){ 
-			KNNIDsinBuckets[i][j]=-1;
-		}
-	}
-
-	for (int i=0; i< cnts; ++i){ 
-		KNNIDsinBucketsFilledCounts[i]=0;
-	}
-
-	for (int i=FirstBucket; i< localNodeDataIndex2.size(); ++i){ 
-		if (isBucket[i] == 0) {continue;}		
-		for (int j=0; j< localNodeDataIndex2[i].size()-1; ++j){ 
-			int index=localNodeDataIndex2[i][j];		
-			for (int k=j+1; k<localNodeDataIndex2[i].size(); ++k){ 
-				int index2=localNodeDataIndex2[i][k];		
-				int emptyIndex = KNNIDsinBucketsFilledCounts[index];
-				double dist=computeDistance(index,index2,mappedData2,featureCounts); 
-				
-				if  (emptyIndex < KNNCounts) {
-					KNNIDsinBuckets[index][emptyIndex]=localIndexConvertor[index2];                    
-					++KNNIDsinBucketsFilledCounts[index];                      
-					KNNDistanceinBuckets[index][emptyIndex]=dist;          
-					if (emptyIndex==(KNNCounts-1)) Build_Max_Heap(index,KNNCounts,KNNDistanceinBuckets,KNNIDsinBuckets);
-				}
-				else { 
-					if (dist < KNNDistanceinBuckets[index][0]) {         
-						KNNIDsinBuckets[index][0]=localIndexConvertor[index2];                                         
-						KNNDistanceinBuckets[index][0]=dist;     
-						Max_Heapify(index, 0, KNNDistanceinBuckets, KNNIDsinBuckets,KNNCounts);
-					}    
-				}
-
-				int emptyIndex2 = KNNIDsinBucketsFilledCounts[index2];
-				if  (emptyIndex2 < KNNCounts) {
-					KNNIDsinBuckets[index2][emptyIndex2]=localIndexConvertor[index];	                     
-					++KNNIDsinBucketsFilledCounts[index2];                      
-					KNNDistanceinBuckets[index2][emptyIndex2]=dist;       
-					if (emptyIndex2==(KNNCounts-1)) Build_Max_Heap(index2,KNNCounts,KNNDistanceinBuckets,KNNIDsinBuckets);                        
-				}
-				else {
-					if (dist < KNNDistanceinBuckets[index2][0]) {  
-						KNNIDsinBuckets[index2][0]=localIndexConvertor[index];                                         
-						KNNDistanceinBuckets[index2][0]=dist;
-						Max_Heapify(index2, 0, KNNDistanceinBuckets, KNNIDsinBuckets, KNNCounts);}
-				}
-			}
-		}
-	}
-	/**	
-	 * Now, it is the time to find the IDs of processors that contain the neighboring sub-spaces 
-	 * A neighboring processor is selected if its distance from the given point is less than 
-	 * the maximum distance in the heap of that point (first entry of heap)
-	 */
-	if (world_rank==0) {
-		printf("Finding the Spatial Neighboring Processors\n");
-		sprintf(line,"Finding the Spatial Neighboring Processors\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-
-	vector<int> ScatterVlocalNodeDataIndex[world_size];
-	vector<int> ScatterVKNNIDsinBucketsFilledCounts[world_size];
-
-	int globalLayerID = int(log2(world_size));
-	int lowestNodeID=pow(2,globalLayerID)-1; 
-	int highestNodeID=lowestNodeID+world_size-1;
-	int NeighboringNodes[cnts][world_size-1];
-
-	if (world_size != 1){ 
-		for (int i=0; i<cnts; ++i){
-			for (int j=0; j<world_size-1; ++j){
-				NeighboringNodes[i][j]=-1;
-			}
-		}
-
-		for (int i=FirstBucket; i< localNodeDataIndex2.size(); ++i){ 
-			if (isBucket[i] == 0) {continue;}
-			for (int j=0; j< localNodeDataIndex2[i].size(); ++j){ 
-				int index1= localNodeDataIndex2[i][j];
-
-				double rPrime=KNNDistanceinBuckets[index1][0];
-				stack<pair<int,double>> globalStack;  
-				globalStack.push(make_pair(0,0));
-				/**	 
-				 * C1NodeID is the closer child, and C2NodeID is the other child
-				 */
-				int C1NodeID,C2NodeID;
-				int jcounts=0;
-
-				while (!globalStack.empty()){
-					pair<int,double> topPairinStack=globalStack.top();
-					int nodeID=topPairinStack.first;
-					double dValue=topPairinStack.second;
-					globalStack.pop();
-					int nodesLayer0=int(log2(nodeID+1));
-					int indexMaxVarDim=VectorGlobalSqrtSum[nodesLayer0].second;
-
-					if (dValue < rPrime){		
-						double dPrime= mappedData2[index1][indexMaxVarDim] - globalMedianValuesforNodes[nodeID];
-						if (dPrime < 0) { 
-							C1NodeID=2*nodeID+1; 
-							C2NodeID=2*nodeID+2; 
-						}
-						else{
-							C1NodeID=2*nodeID+2; 
-							C2NodeID=2*nodeID+1; 
-						}
-
-						dPrime=sqrt(dValue*dValue+dPrime*dPrime);
-						if (dPrime<rPrime) { 
-							if (C2NodeID <= highestNodeID) {
-								globalStack.push(make_pair(C2NodeID,dPrime));
-								if (C2NodeID >= lowestNodeID && (C2NodeID-lowestNodeID)!=world_rank) {
-									NeighboringNodes[index1][jcounts]=C2NodeID-lowestNodeID;
-									ScatterVlocalNodeDataIndex[C2NodeID-lowestNodeID].push_back(index1);
-									ScatterVKNNIDsinBucketsFilledCounts[C2NodeID-lowestNodeID].push_back(KNNIDsinBucketsFilledCounts[index1]);
-									++jcounts;
-								}
-							}
-						}
-
-						if (C1NodeID <= highestNodeID) {
-							globalStack.push(make_pair(C1NodeID,dValue));
-							if (C1NodeID >= lowestNodeID && (C1NodeID-lowestNodeID)!=world_rank) {
-								NeighboringNodes[index1][jcounts]=C1NodeID-lowestNodeID;
-								ScatterVlocalNodeDataIndex[C1NodeID-lowestNodeID].push_back(index1);
-								ScatterVKNNIDsinBucketsFilledCounts[C1NodeID-lowestNodeID].push_back(KNNIDsinBucketsFilledCounts[index1]);
-								++jcounts;
-							}
-						}
-					}
-				}	
-			} 
-		}
-	}	
-	/**	
-	 * Now, send the data of the given point to the neighboring processors identified above 
-	 * for further computation of possible K-NNs in those processors
-	 */
-	int displ[world_size],displ2[world_size],displ3[world_size];
-	int bufferCounts[world_size],bufferCounts2[world_size],bufferCounts3[world_size];
-	bufferCounts[world_rank]=0;
-	bufferCounts2[world_rank]=0;  
-	bufferCounts3[world_rank]=0;
-
-	if (world_size != 1){	
-		for (int i=0; i<cnts; ++i){      
-			ScatterVlocalNodeDataIndex[world_rank].push_back(i);      
-			ScatterVKNNIDsinBucketsFilledCounts[world_rank].push_back(KNNIDsinBucketsFilledCounts[i]);					
-		}
-
-		for (int i=0; i<world_size; ++i){ 
-			bufferCounts[i]=ScatterVlocalNodeDataIndex[i].size(); 
-			bufferCounts2[i]=ScatterVlocalNodeDataIndex[i].size()*KNNCounts;
-			bufferCounts3[i]=ScatterVlocalNodeDataIndex[i].size()*featureCounts;
-		}
-
-		displ[0]=0;
-		displ2[0]=0;
-		displ3[0]=0;
-		for (int i=1; i<world_size; ++i){
-			displ[i]= displ[i-1]+bufferCounts[i-1];
-			displ2[i]= displ2[i-1]+bufferCounts2[i-1];
-			displ3[i]= displ3[i-1]+bufferCounts3[i-1];
-		}
-	}
-
-	const int ArraySizeScatterV=displ[world_size-1]+bufferCounts[world_size-1];
-	int sendbuffer[ArraySizeScatterV];
-	int sendbuffer2[ArraySizeScatterV];
-	int sendbuffer4[ArraySizeScatterV*KNNCounts];
-	double sendbuffer5[ArraySizeScatterV*featureCounts];
-	double sendbuffer6[ArraySizeScatterV*KNNCounts];
-
-	if (world_size != 1){		
-		for (int i=0; i<world_size; ++i){
-			int KIndex=displ[i];	
-			for (int j=0; j<ScatterVlocalNodeDataIndex[i].size(); ++j){
-				sendbuffer[KIndex+j]=ScatterVlocalNodeDataIndex[i][j]; 
-				sendbuffer2[KIndex+j]=ScatterVKNNIDsinBucketsFilledCounts[i][j];
-				for (int kk=0; kk<KNNCounts; ++kk){
-					sendbuffer4[(KIndex+j)*KNNCounts+kk]= KNNIDsinBuckets[ScatterVlocalNodeDataIndex[i][j]][kk];
-					sendbuffer6[(KIndex+j)*KNNCounts+kk]= KNNDistanceinBuckets[ScatterVlocalNodeDataIndex[i][j]][kk];
-				}
-				for (int ll=0; ll<featureCounts; ++ll){
-					sendbuffer5[(KIndex+j)*featureCounts+ll]= mappedData2[ScatterVlocalNodeDataIndex[i][j]][ll];
-				}
-			}
-		}
-	}
-
-	int receivingCountsMatrix[world_size];
-	int receiveCounts,TotalReceiveCounts=0;
-	int *receivingIndices[world_size];
-	int *receivingHeapSize[world_size];
-	int *receivingHeapArray[world_size];
-	double *receivingHeapArrayDistances[world_size];
-	double *receivingPointCoordinates[world_size];
-
-	if (world_size != 1){	
-		for (int i=0; i<world_size; ++i){
-			MPI_Scatter (bufferCounts,1,MPI_INT,&receiveCounts,1 ,MPI_INT,i,MPI_COMM_WORLD); 
-			receivingIndices[i]=new int[receiveCounts];  
-			receivingHeapSize[i]=new int[receiveCounts];  
-			receivingHeapArray[i]=new int[receiveCounts*KNNCounts];
-			receivingHeapArrayDistances[i]=new double[receiveCounts*KNNCounts];
-			receivingPointCoordinates[i]=new double[receiveCounts*featureCounts];
-			receivingCountsMatrix[i]=receiveCounts;
-			TotalReceiveCounts+=receiveCounts; 
-
-			MPI_Scatterv (&sendbuffer ,bufferCounts, displ, MPI_INT,&receivingIndices[i][0],receiveCounts,MPI_INT,i,MPI_COMM_WORLD); 
-			MPI_Scatterv (&sendbuffer2,bufferCounts, displ, MPI_INT,&receivingHeapSize[i][0],receiveCounts,MPI_INT,i,MPI_COMM_WORLD); 
-			MPI_Scatterv (&sendbuffer4,bufferCounts2,displ2,MPI_INT,&receivingHeapArray[i][0],receiveCounts*KNNCounts,MPI_INT,i,MPI_COMM_WORLD); 
-			MPI_Scatterv (&sendbuffer5,bufferCounts3,displ3,MPI_DOUBLE,&receivingPointCoordinates[i][0],receiveCounts*featureCounts,MPI_DOUBLE,i,MPI_COMM_WORLD); 
-			MPI_Scatterv (&sendbuffer6,bufferCounts2,displ2,MPI_DOUBLE,&receivingHeapArrayDistances[i][0],receiveCounts*KNNCounts,MPI_DOUBLE,i,MPI_COMM_WORLD); 
-		}
-	}
-	else{
-		receivingIndices[0]=new int[cnts]; 
-		receivingHeapArray[0]=new int[cnts*KNNCounts];	
-		receivingHeapArrayDistances[0]=new double[cnts*KNNCounts];	
-		receivingPointCoordinates[0]=new double[cnts*featureCounts];
-		receivingCountsMatrix[0]=cnts;
-		receivingHeapSize[0]=new int[cnts];
-
-		for (int i=0; i<cnts; ++i){      
-			receivingIndices[0][i]=i;
-			receivingHeapSize[0][i]=KNNIDsinBucketsFilledCounts[i];
-
-			for (int j=0; j<KNNCounts; ++j){ 
-				receivingHeapArrayDistances[0][i*KNNCounts+j]=KNNDistanceinBuckets[i][j];
-				receivingHeapArray[0][i*KNNCounts+j]=KNNIDsinBuckets[i][j];
-			}
-
-			for (int j=0; j<featureCounts; ++j){    
-				receivingPointCoordinates[0][i*featureCounts+j]=mappedData2[i][j];
-			}
-		}
-	}
-	delete[] ProcessorLocalDataIndex;
-	/**	
-	 * Now, follow querying to compute possible K-NNs for each given point
-	 * For each point, querying is performed on the local Kd Tree of its hosting processor as well as 
-	 * the local Kd Tree of the neighboring processors identified above
-	 * This section is the implementation of Algorithm 1 in the referencing paper and is computationally the most expensive part of the code
-	 */
-	if (world_rank==0) {
-		printf("Computing K-NNs for Queries\n");
-		sprintf(line,"Computing K-NNs for Queries\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-	/**	 
-	 * C1NodeID is the closer child, and C2NodeID is the other child
-	 */
-	int C1NodeID,C2NodeID;
-
-	for (int i=0; i<world_size; ++i){
-		/**	
-		 * To improve the performance, multi-threading using OpenMP is implemented here
-		 */
-        #pragma omp parallel for private(C1NodeID,C2NodeID)
-		for (int j=0; j<receivingCountsMatrix[i]; ++j){
-
-			int tmpReceivingHeapSize=receivingHeapSize[i][j];
-			double rPrimeValue=receivingHeapArrayDistances[i][j*KNNCounts];
-
-			double * receivingHeapArrayDistances2DCopy=new double[KNNCounts];
-			int * receivingHeapArray2DCopy=new int[KNNCounts];
-			/**	
-			 * To improve the performance, 1D arrays receivingHeapArray2DCopy and receivingHeapArrayDistances2DCopy are used here
-			 */
-			for (int k=0; k<KNNCounts; ++k){
-				receivingHeapArray2DCopy[k]=receivingHeapArray[i][j*KNNCounts+k];
-				receivingHeapArrayDistances2DCopy[k]=receivingHeapArrayDistances[i][j*KNNCounts+k];
-			}
-
-			int tmpIsHeapChanged=0;
-			stack<pair<int,double>> globalStack; 
-			globalStack.push(make_pair(0,0)); 
-
-			while (!globalStack.empty()){
-				pair<int,double> topPairinStack=globalStack.top();
-				int nodeID=topPairinStack.first;
-				double dValue=topPairinStack.second;
-				globalStack.pop();
-				int nodesLayer0=int(log2(nodeID+1));
-				int indexMaxVarDim=VectorGlobalSqrtSum[nodesLayer0+nodesLayer].second;
-
-				if (isBucket[nodeID] == 1) {
-					for (int kk=0; kk<localNodeDataIndex2[nodeID].size(); ++kk){ 					
-
-						if (i==world_rank) {
-							int index=receivingIndices[i][j];
-							if (nodeIndexofaPoint2[index]==nodeID) break;
-						}
-
-						int index= localNodeDataIndex2[nodeID][kk];
-						double distance=computeDistance2(index,i,j*featureCounts,mappedData2,receivingPointCoordinates,featureCounts);
-
-						if (distance < rPrimeValue){
-							if (tmpReceivingHeapSize < KNNCounts){
-								receivingHeapArray2DCopy[tmpReceivingHeapSize]=localIndexConvertor[index];
-								receivingHeapArrayDistances2DCopy[tmpReceivingHeapSize]=distance;
-								++tmpReceivingHeapSize;
-								tmpIsHeapChanged=1;     
-								if(tmpReceivingHeapSize==KNNCounts) {
-									Build_Max_Heap2(KNNCounts,receivingHeapArrayDistances2DCopy,receivingHeapArray2DCopy);
-									rPrimeValue=receivingHeapArrayDistances2DCopy[0];
-								}     
-							}
-							else if (distance < receivingHeapArrayDistances2DCopy[0]){
-								receivingHeapArrayDistances2DCopy[0]=distance;
-								receivingHeapArray2DCopy[0]=localIndexConvertor[index];							
-								Max_Heapify2(0,receivingHeapArrayDistances2DCopy,receivingHeapArray2DCopy,KNNCounts);
-								tmpIsHeapChanged=1;
-								rPrimeValue=receivingHeapArrayDistances2DCopy[0];
-							}                
-						}
-					}
-				}
-				else {
-					if (dValue < rPrimeValue){
-						double dPrime= receivingPointCoordinates[i][j*featureCounts+indexMaxVarDim] - localMedianNodeData[nodeID];
-						if (dPrime < 0) { 
-							C1NodeID=2*nodeID+1; 
-							C2NodeID=2*nodeID+2;
-						}
-						else{
-							C1NodeID=2*nodeID+2; 
-							C2NodeID=2*nodeID+1; 
-						}
-
-						dPrime=sqrt(dValue*dValue+dPrime*dPrime);
-						if (dPrime < rPrimeValue) { 
-							if (C2NodeID <= localNodeDataIndex2.size()) {
-								globalStack.push(make_pair(C2NodeID,dPrime));
-							}
-						}
-
-						if (C1NodeID <= localNodeDataIndex2.size()) {
-							globalStack.push(make_pair(C1NodeID,dValue));
-						}
-					}
-				}
-			}
-
-			if (tmpIsHeapChanged==1) {
-				for (int k=0; k<KNNCounts; ++k){
-					receivingHeapArray[i][j*KNNCounts+k]=receivingHeapArray2DCopy[k];
-					receivingHeapArrayDistances[i][j*KNNCounts+k]=receivingHeapArrayDistances2DCopy[k];
-				}
-			}
-		}
-	}
-	/**	
-	 * Now, Send the newly computed K-NNs from the above (Algorithm 1) to the original processor contained it
-	 */
-	if (world_rank==0) {
-		printf("Sending the Outputs of Query Computations Back to the Original Node\n");
-		sprintf(line,"Sending the Outputs of Query Computations Back to the Original Node\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-	int sendbuffer4return[TotalReceiveCounts*KNNCounts];
-	double sendbuffer6return[TotalReceiveCounts*KNNCounts];
-
-	if (world_size != 1){	
-		int indexreturn=0;
-		for (int i=0; i<world_size; ++i){
-			for (int j=0; j<receivingCountsMatrix[i]; ++j){
-				for (int k=0; k<KNNCounts; ++k){
-					sendbuffer4return[indexreturn]=receivingHeapArray[i][j*KNNCounts+k];
-					sendbuffer6return[indexreturn]=receivingHeapArrayDistances[i][j*KNNCounts+k];
-					++indexreturn;
-				}
-			}
-		}
-	}
-
-	int displreturn[world_size];
-	displreturn[0]=0;
-	for (int i=1; i<world_size; ++i){displreturn[i]= displreturn[i-1]+receivingCountsMatrix[i-1]*KNNCounts;}
-	int sendCounts[world_size];
-	for (int i=0; i<world_size; ++i){sendCounts[i]= receivingCountsMatrix[i]*KNNCounts;}
-	int *originalNodereceivingHeapArray[world_size];
-	double *originalNodereceivingHeapArrayDistances[world_size];
-
-	if (world_size != 1){
-		for (int i=0; i<world_size; ++i){
-			originalNodereceivingHeapArray[i]=new int[bufferCounts2[i]];
-			originalNodereceivingHeapArrayDistances[i]=new double[bufferCounts2[i]];
-			MPI_Scatterv (&sendbuffer4return,sendCounts,displreturn,MPI_INT,&originalNodereceivingHeapArray[i][0],bufferCounts2[i],MPI_INT,i,MPI_COMM_WORLD); 
-			MPI_Scatterv (&sendbuffer6return,sendCounts,displreturn,MPI_DOUBLE,&originalNodereceivingHeapArrayDistances[i][0],bufferCounts2[i],MPI_DOUBLE,i,MPI_COMM_WORLD); 
-		}
-	}
-	else {
-		originalNodereceivingHeapArray[0]=new int[cnts*KNNCounts];
-		originalNodereceivingHeapArrayDistances[0]=new double[cnts*KNNCounts];
-
-		for (int i=0; i<cnts*KNNCounts; ++i){
-			originalNodereceivingHeapArray[0][i]=receivingHeapArray[0][i];
-			originalNodereceivingHeapArrayDistances[0][i]=receivingHeapArrayDistances[0][i];
-		}
-	}
-	/**	
-	 * Now, organize and sort the K-NNs for each given point either 
-	 * received from the neighboring processors or was initially computed from the points within the same bucket
-	 * after sorting, choose only the desired number (KNNCounts) of K-NNs with the shortest distance 
-	 */
-	if (world_rank==0) {
-		printf("Preparing the Final Outputs\n");
-		sprintf(line,"Preparing the Final Outputs\n");
-		MPI_File_write(logfile, line, strlen(line), MPI_CHAR, MPI_STATUS_IGNORE);
-	}
-
-	ofstream outputFileIndex,outputFileDistance;
-	string filename1= "KNN_Indices_"+to_string(world_rank)+".csv";
-	outputFileIndex.open(filename1);	
-	string filename2= "KNN_Distances_"+to_string(world_rank)+".csv";
-	outputFileDistance.open(filename2);
-
-	for (int i=0; i<cnts; ++i){
-		/**	
-		 * Set container removes the duplicates and sort data accroding to their distance
-		 */
-		set <pair<double,int>> setContainer;
-		int pointID=localIndexConvertor[i];
-		/**	
-		 * Insert into Set container the K-NNs initially computed from the points within the same bucket
-		 */
-		for (int j=0; j<KNNCounts; ++j){
-			if (KNNIDsinBuckets[i][j] != -1) setContainer.insert(make_pair(KNNDistanceinBuckets[i][j],KNNIDsinBuckets[i][j]));
-		}
-		/**	
-		 * Insert into Set container the K-NNs computed from the querying in the same processor
-		 */
-		for (int k=0; k<KNNCounts; ++k){
-			setContainer.insert(make_pair(originalNodereceivingHeapArrayDistances[world_rank][i*KNNCounts+k],originalNodereceivingHeapArray[world_rank][i*KNNCounts+k]));
-		} 
-		/**	
-		 * Insert into Set container the K-NNs computed from the querying of the other neighboring processors
-		 */
-		for (int j=0; j<world_size-1; ++j){
-			int neighborID=NeighboringNodes[i][j];
-			if (neighborID == -1) continue;
-
-			vector<int>::iterator it = std::find(ScatterVlocalNodeDataIndex[neighborID].begin(), ScatterVlocalNodeDataIndex[neighborID].end(), i);			
-			int index = std::distance(ScatterVlocalNodeDataIndex[neighborID].begin(), it);  		
-
-			for (int k=0; k<KNNCounts; ++k){
-				setContainer.insert(make_pair(originalNodereceivingHeapArrayDistances[neighborID][index*KNNCounts+k],originalNodereceivingHeapArray[neighborID][index*KNNCounts+k]));  
-			} 
-		}
-		/**	
-		 * Output the results as sorted in the Set container
-		 */
-		set <pair<double,int>>::iterator pairIt;
-		pairIt=setContainer.begin();
-		int outputCounter=0;
-		for (int ii=0; ii<KNNCounts*2; ++ii){
-			if (outputCounter==KNNCounts) break;
-
-			if ((*pairIt).second==-1) {pairIt++; continue;}
-			if (outputCounter == 0) outputFileIndex<<pointID<<",";
-
-			if (outputCounter != KNNCounts-1) { outputFileIndex<<(*pairIt).second<<",";
-			} else {outputFileIndex<<(*pairIt).second<<endl;}
-
-			if (outputCounter == 0) outputFileDistance<<pointID<<",";
-
-			if (outputCounter != KNNCounts-1) { outputFileDistance<<(*pairIt).first<<",";
-			} else {outputFileDistance<<(*pairIt).first<<endl;}		
-			pairIt++;
-			outputCounter++;			
-		}
-	} //loop cnts
-
-	delete[] receivingHeapArray[world_size];
-	delete[] receivingHeapArrayDistances[world_size];
-
-	outputFileIndex.close();
-	outputFileDistance.close();	
-	/**	
-	 * All procesors neeed to stop here until master processor returns
-	 */
-	MPI_Barrier(MPI_COMM_WORLD);
-	/**	
-	 * Concatenate the Outputs from various processors into a single file and remove the extra output files
-	 */	
-	
-	if (world_rank==0) {
-		string cmd4= string("cat KNN_Indices_*.csv > KNN_Indices.csv");
-		int returnValue=system(cmd4.c_str());	
-		string cmd5= string("cat KNN_Distances_*.csv > KNN_Distances.csv");
-		returnValue=system(cmd5.c_str());
-
-		string cmd6= string("rm KNN_Indices_*");
-		returnValue=system(cmd6.c_str());
-		string cmd7= string("rm KNN_Distances_*");
-		returnValue=system(cmd7.c_str());
-	}
-
-	MPI_File_close(&logfile);
-	MPI_Finalize();		
-	return 0;			
-}
-
diff --git a/clustering/K-NN/README.rst b/clustering/K-NN/README.rst
deleted file mode 100644
index 085fa3688..000000000
--- a/clustering/K-NN/README.rst
+++ /dev/null
@@ -1,196 +0,0 @@
-===================================
-K-NN Code for Shared-Memory Systems
-===================================
-
-The K-NN code for the Shared-Memory systems was implemented according to the algorithm developed by Dong et al., 2012, titled "Efficient K-Nearest Neighbor Graph Construction for Generic Similarity Measures". The full description of the algorithm is available 
-`Here <https://labshare.atlassian.net/wiki/spaces/WIPP/pages/699039829/K-NN+Implementations+in+C+>`_.
-
-------------------------
-Installing Boost Library
-------------------------
-
-Both K-NN codes for Shared-Memory and Distributed-Memory use Boost library for mapping data into memory and reading inputs from the command line. The steps for installing Boost library in Linux are displayed below.
- 
-.. code:: bash
-    
-    wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz
-    tar xfz boost_1_71_0.tar.gz 
-    cd boost_1_71_0/
-    ./bootstrap.sh
-    ./b2
-    export LD_LIBRARY_PATH=currentpath/stage/lib:$LD_LIBRARY_PATH
-
-It is recommended to include the last line into .bashrc file at home directory. 
-
------------------
-Runtime Arguments
------------------
-
-The code requires the following parameters as the input.
-
-1- ``filePath``: The full path to the input csv file containig the dataset. Please ensure there are no other csv files in this path. Please note that the code assumes that the first line in the input csv file is the header and ignores it.
-
-2- ``K``: The desired number of Nearest Neighbours to be computed. 
-
-3- ``sampleRate``: The rate at which we do sampling. This parameter plays a key role in the performance. This parameter is a trades-off between the performance and the accuracy of the results. The values closer to 1 provides more accurate results but the execution time instead takes longer.  
-  
-4- ``convThreshold``: An integer that defines the threshold for the convergence of the model. A fixed integer is used here instead of the expression delta*N*K which is given in the reference paper.
- 
-5- ``outputPath``: The full path to the output csv files.    
-
-6,7- ``colIndex1`` and ``colIndex2`` (Optional): The indices of columns from the input csv file where raw data exists continuously in between. If these two arguments were left blank, the code assumes that the entire input csv file is raw data and automatically computes the number of columns in the input csv file. The numbering for these 2 indices begin from 1 (and not 0). 
-
-------------
-Code Outputs
-------------
-
-The code produces the following output files:
-
-1- ``KNN_Indices.csv``: The indices of K-NNs for the entire dataset. The order of data here is the same as the order of data at the input csv file.   
-
-2- ``KNN_Distances.csv``: The corresponding distances of K-NNs which was saved at KNN_Indices.csv.   
-
-3- ``Setting.txt``: The logging file containing the errors and messages. 
-
---------------------------------
-An Example of Executing the Code
---------------------------------
-
-.. code:: bash
-
-    ulimit -s unlimited
-    g++ -I/Path_To_Boost_Library/boost_1_71_0 KNN_Serial_Code.cpp -o output.exe -L/Path_To_Boost_Library/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem  -O2 
-    time ./output.exe --inputPath . --K 10 --sampleRate 0.99  --convThreshold 5  --outputPath .
-    time ./output.exe --inputPath . --K 10 --sampleRate 0.99  --convThreshold 5  --outputPath .  --colIndex1 3 --colIndex2 26
-
-Please note that the multi-threaded version of Shared-Memory K-NN can be compiled and run as follows. The number of threads in the OpenMP parallelized region of the code is automatically set equal to the number of threads in the machine minus 1.
-
-.. code:: bash
-
-    ulimit -s unlimited
-    g++ -I/Path_To_Boost_Library/boost_1_71_0 KNN_Serial_Code.cpp -o output.exe -L/Path_To_Boost_Library/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem  -O2 -fopenmp
-    time ./output.exe --inputPath . --K 10 --sampleRate 0.99  --convThreshold 5  --outputPath .
-    time ./output.exe --inputPath . --K 10 --sampleRate 0.99  --convThreshold 5  --outputPath .  --colIndex1 3 --colIndex2 26   
-        
----------------------------
-An Advise About Performance
----------------------------
-
-The parameter ``sampleRate`` has a significant impact on the performance. It is advised that its optimal value to be determined for each specific project. 
-
--------------------
-Install WIPP Plugin
-------------------- 
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of plugin.json into the pop-up window and submit.
-   
-------------------------------------------
-An Example of Running the Docker Container
-------------------------------------------  
-
-.. code:: bash
-
-    docker run -v /path/to/data:/home/Inputs -v /path/to/outputs:/home/Outputs \
-            containername --inputPath /home/Inputs --K 10 --sampleRate 0.9 \
-            --convThreshold 5 --outputPath /home/Outputs    
-            
-              
-==================================================
-GPU-Enabled K-NN Code for Shared-Memory Systems
-==================================================    
-
-Alternatively, the performance of K-NN code for Shared-Memory Systems was improved by adding CUDA directives. The computation loads are then automatically switched between GPU and CPU.  
-
-.. code:: bash
-
-    ulimit -s unlimited
-    nvcc -I/Path_To_Boost_Library/boost_1_71_0 KNN_GPU_Code.cu -o output.exe -L/Path_To_Boost_Library/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem -O2 -arch=sm_75
-    time ./output.exe --inputPath . --K 10 --sampleRate 0.99  --convThreshold 5  --outputPath .
-
-The following parameters are GPU-specific parameters. Their values might need to be adjusted for any given device. 
-
-1- ``MAXTPB``: The Max number of Threads per Block. It is by deafult 1024.
-
-2- ``MinimumThreads``: The Minimum number of computations that is needed to switch the computation to GPU device (Otherwise stay in host). This parameter might have considerable impact on the performance. 
-
-3- ``arch=sm_75``: This compilation flag should represent the GPU specificiation of the given machine. 
-
-
-The docker for GPU-Enabled K-NN code can also be run using the following command.
-
-.. code:: bash
-
-    docker run --gpus all -v /path/to/data:/home/Inputs -v /path/to/outputs:/home/Outputs \
-            containername --inputPath /home/Inputs --K 10 --sampleRate 0.9 \
-            --convThreshold 5 --outputPath /home/Outputs  
-
-========================================
-K-NN Code for Distributed-Memory Systems
-========================================
-
-The K-NN code for the Distributed-Memory systems was implemented according to the algorithm developed by Patwary et a., 2016, titled "PANDA: Extreme Scale Parallel K-Nearest Neighbor on Distributed Architectures". The full description of the algorithm is available 
-`Here <https://labshare.atlassian.net/wiki/spaces/WIPP/pages/699039829/K-NN+Implementations+in+C+>`_.
-
------------------
-Runtime Arguments
------------------
-
-The code requires the following input parameters.
-
-1- ``Number of Processors``: Due to the special design for global Kd Tree, the number of processors should be a power of 2 (e.g., 1,2,4,8,16,...).  
-
-2- ``filePath``: The full path to the input csv file containig the raw dataset. Please note that the code assumes that the first line in the input csv file is the header and ignores it.
-
-3- ``KNNCounts``: The desired number of Nearest Neighbours to be computed.
-
-4- ``colIndex1`` and ``colIndex2`` (Optional): The index of columns from the input csv file where raw data exists continuously in between. If these two arguments were left blank, the code assumes that the entire input csv file is raw data and automatically computes the number of columns in the input csv file. The numbering for these 2 indices begin from 1 (and not 0). 
-
-Please note that the performance has been improved by adding OpenMP directives (multi-threading) in addition to the current MPI directives (multi-node). The number of threads in the OpenMP parallelized region of the code is set using an environment variable as shown below.
-
-.. code:: bash
-
-    export OMP_NUM_THREADS=2
-
---------------------------------
-An Example of Executing the code
---------------------------------
-
-.. code:: bash
-
-    ulimit -s unlimited
-    export OMP_NUM_THREADS=2
-    mpicxx -I/Path_To_Boost_Library/boost_1_71_0 KNN_Distributed_code-OpenMP.cpp -o output.exe -L/Path_To_Boost_Library/boost_1_71_0/stage/lib -lboost_iostreams -O2 -fopenmp
-    time mpirun -np 4 ./output.exe /fullPath/inputfile.csv 15
-    time mpirun -np 4 ./output.exe /fullPath/inputfile.csv 15 3 26
-    
-------------
-Code Outputs
-------------
-
-Similar to the Shared-Memory code, the Distributed-Memory code produces the following output files:
-
-1- ``KNN_Indices.csv``: The indices of K-NNs for the entire dataset. The first entry in each row contains the index of that point according to the index from the input csv file.
-
-2- ``KNN_Distances.csv``: The corresponding distances of K-NNs which were saved at KNN_Indices.csv. Similarly, the first entry in each row contains the index of that point according to the index from the input csv file.
-
-3- ``Setting.txt``: The logging file containing the error and messages. 
-   
--------------------------------------
-Description of Some Design Parameters
--------------------------------------
-
-The code also has a few other parameters that are a part of the Kd Tree design. These parameters were initialized in the code to the values suggested in the reference paper (Patwary et al., 2016). For the complicated cases, these values might need to be adjusted for the optimal performance.     
- 
-1- ``globalKdTreeSamples``: The number of data sampled by each processor to collaboratively compute the dimensions with the highest variability.
-
-2- ``globalKdTreeSamplesMedian``: The number of data sampled by each processor to collaboratively compute the median of the chosen dimension at each splitting node of the global Kd Tree.
-
-3- ``Parallel_IO``: A flag that defines if the input csv file can be read in parallel by all the processors. 
-
-4-``Epsilon``: The error in estimating the Median value.
-
-5- ``localKdTreeSamplesMedian``: The number of data sampled by each processor separately to compute the median of the chosen dimension at each splitting node of the local Kd Tree.
-
-6- ``bucketSize``: The size of a bucket (or a leaf) in the local Kd Tree.
-
-7- ``estimatedExtraLayers``: To limit the growing size of the local Kd Trees, the growth of the tree is limited by a cerain number of layers using this parameter.
- 
diff --git a/clustering/K-NN/Shared-Memory-GPU/Dockerfile b/clustering/K-NN/Shared-Memory-GPU/Dockerfile
deleted file mode 100644
index cb9c83c72..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/Dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-FROM ktaletsk/gpu:bionic-20190612-cuda-10-2
-
-# Update apps on the base image
-RUN apt-get -y update && apt-get install -y g++ wget make
- 
-#Create new directory 
-RUN mkdir -p /home/GPU_KNN /home/Inputs /home/Outputs
- 
-# Specify the working directory
-WORKDIR /home/GPU_KNN
- 
-# Install Boost Library 
-RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz
-RUN tar xfz boost_1_71_0.tar.gz 
-RUN rm boost_1_71_0.tar.gz
-WORKDIR /home/GPU_KNN/boost_1_71_0
-RUN ./bootstrap.sh
-RUN ./b2
-ENV LD_LIBRARY_PATH="/home/GPU_KNN/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" 
-
-# Copy the current folder to the docker image
-COPY . /home/GPU_KNN
-
-# Compile the source file 
-WORKDIR /home/GPU_KNN
-RUN nvcc -I/home/GPU_KNN/boost_1_71_0 KNN_GPU_Code.cu -o Out.exe -L/home/GPU_KNN/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem -arch=sm_75 -O2
- 
-# Run the output program from the previous step
-ENTRYPOINT ["./Out.exe"]
-
-
-
diff --git a/clustering/K-NN/Shared-Memory-GPU/KNN_GPU_Code.cu b/clustering/K-NN/Shared-Memory-GPU/KNN_GPU_Code.cu
deleted file mode 100644
index cbba53145..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/KNN_GPU_Code.cu
+++ /dev/null
@@ -1,727 +0,0 @@
-/**
- * This code is an implementation of the algorithm presented by Dong et al., 2012,
- *"Efficient K-Nearest Neighbor Graph Construction for Generic Similarity Measures" 
- * and the performance has been improved by CUDA (GPU) directives.
- * @author:   Mahdi Maghrebi <mahdi.maghrebi@nih.gov>
- * March 2020
- */
-
-#include <vector>
-#include <iostream> 
-#include <list>
-#include <string>
-#include <math.h>
-#include <fstream>
-#include <float.h>
-#include <boost/filesystem.hpp> 
-#include <curand.h>
-#include <curand_kernel.h>
-#include <algorithm>
-#include <boost/iostreams/device/mapped_file.hpp> 
-#include <boost/iostreams/stream.hpp>    
-
-using boost::iostreams::mapped_file_source;
-using boost::iostreams::stream;	
-using namespace std;
-
-/**
- * The Max number of Threads per Block. This is one of GPU hardware characteristics.
- */
-#define MAXTPB 1024
-
-/**
- * The Minimum number of computations that is needed to switch to GPU device (Otherwise stay in host)
- */
-#define MinimumThreads 10
-
-/**
- * Error handling for GPU Code
- */
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
-{
-	if (code != cudaSuccess)
-	{
-		fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-		if (abort) exit(code);
-	}
-}
-
-/**
- * GPU Kernel definition
- */
-__global__ void ComputeDistancesKernel(int * device_New_Final_List_1D, int * device_New_Final_List_Index, int Dim, double * device_New_Final_List_Dist_1D, double * device_dataPointsGPU, int * device_New_Final_List_Dist_Index){
-
-	int localDim=Dim;
-	double localvalue=0;
-	int Cnts=device_New_Final_List_Index[blockIdx.x+1]-device_New_Final_List_Index[blockIdx.x];
-	int Cnts_Dist=device_New_Final_List_Dist_Index[blockIdx.x+1]-device_New_Final_List_Dist_Index[blockIdx.x];	
-	int par1, par2;
-	int cnt=0;
-	int flag=0;
-
-	if (threadIdx.x < Cnts_Dist){
-		for (int i=0; i < Cnts; ++i){
-			if (flag ==1) break;
-			for (int j=i+1; j < Cnts; ++j){
-				if (threadIdx.x == cnt) {
-					par1 = device_New_Final_List_1D[i + device_New_Final_List_Index[blockIdx.x]];
-					par2 = device_New_Final_List_1D[j + device_New_Final_List_Index[blockIdx.x]]; 
-					flag=1;
-					break;         
-				}
-				++cnt;
-			}
-		}
-
-		for (int i=0; i<localDim; ++i){
-		    double tmp=device_dataPointsGPU[par1*localDim+i] - device_dataPointsGPU[par2*localDim+i];
-		    localvalue += tmp*tmp;
-			//localvalue += pow((device_dataPointsGPU[par1*localDim+i] - device_dataPointsGPU[par2*localDim+i]), 2);
-		}	
-
-		int IndexIDWrite= device_New_Final_List_Dist_Index[blockIdx.x]+threadIdx.x;
-		device_New_Final_List_Dist_1D[IndexIDWrite] = localvalue;	
-	}
-	return;
-}												
-
-/**
- * Read the output of linux command execution 
- * @param  cmd  is the linux command to be executed
- * @return the output from the execution of the linux command
- */
-std::string exec(const char* cmd) {
-	std::array<char, 128> buffer;
-	std::string result;
-	std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
-	if (!pipe) {
-		throw std::runtime_error("popen() failed!");
-	}
-	while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-		result += buffer.data();
-	}
-	return result;
-}
-
-/**
- * Replace the farthest point in B_Index (for u1) with u2 if u2 is closer
- * <p>
- * This method corresponds to UPDATENN(B[u1],<u2,l,true>) in the paper
- * </p>
- * @param  Dist  represents B_Dist
- * @param  Index represents B_Index
- * @param  IsNew represents B_IsNew
- * @param  u1    the indice of point that we want to potentially update its K-NN with the point u2
- * @param  u2    the indice of potential K-NN fpr point u1
- * @param  distance the spatial distance between u1 and u2
- * @param  flag updates B_IsNew
- * @return 1 if B_Index[u1][.] is updated, 0 otherwise
- */
-int UpdateNN (int** B_Index, double ** B_Dist, short** B_IsNew, short* allEntriesFilled, int K, int u1, int u2, double distance, int flag = 1) {
-
-	if(allEntriesFilled[u1]==0){		
-		for (int j = 0; j < K; j++) {	
-			if (B_Dist[u1][j] < 0) {
-
-				for (int jj = 0; jj < j; jj++) {if (B_Index[u1][jj] == u2) return 0;}
-
-				B_Dist[u1][j] = distance;
-				B_Index[u1][j] = u2;
-				B_IsNew[u1][j] = flag;
-				if (j==K-1) allEntriesFilled[u1]=1;
-				return 1;}
-		}
-	}
-
-	else{
-		for (int j = 0; j < K; j++) {
-			if (B_Index[u1][j] == u2) return 0;
-		}
-
-		double max = DBL_MIN;
-		int index = -1;
-		for (int j = 0; j < K; j++) {
-			if (B_Dist[u1][j] > max) {
-				max = B_Dist[u1][j];
-				index = j;
-			}
-		}
-		if (index == -1) { cout << "Error"<<endl; } 
-		if (distance < max) {
-			B_Dist[u1][index] = distance;
-			B_Index[u1][index] = u2;
-			B_IsNew[u1][index] = flag;
-			return 1;
-		}
-		else { return 0; }
-	}
-	return 0;  
-}
-
-
-int main(int argc, char * const argv[]) {
-	/**
-	 * The errors and informational messages are outputted to the log file 
-	 */
-	ofstream logFile;
-	string logFileName="Setting.txt";
-	logFile.open(logFileName);
-
-	/**
-	 * The input parameters are read from command line which are as follow.
-	 * inputPath: The full path to the input file containig the dataset.	 
-	 * outputPath: The full path to the output csv files.
-	 * K: K in K-NN that means the desired number of Nearest Neighbours to be computed.
-	 * sampleRate: The rate at which we do sampling. This parameter plays a key role in the performance.
-	 * This parameter is a trades-off between the performance and the accuracy of the results.
-	 * Values closer to 1 provides more accurate results but the execution takes longer.	 
-	 * convThreshold: Convergance Threshold. A fixed integer is used here instead of delta*N*K.	
-	 * colIndex1 and colIndex2 (optional): The indices of columns from the input csv file where raw data exists continuously in between.
-	 * If these two arguments were left blank, the code assumes that the entire input csv file is raw data 
-	 * and automatically computes the number of columns in the input csv file. 	
-	 */	 
-	string filePath, outputPath, outputPath2, inputPath,LogoutputPath;
-	int K,convThreshold, colIndex1=-1, colIndex2=-1;
-	float sampleRate;	
-
-	for (int i=1; i<argc;++i){
-		if (string(argv[i])=="--inputPath") {
-			inputPath=argv[i+1];
-
-			if(!boost::filesystem::exists(inputPath) || !boost::filesystem::is_directory(inputPath))
-			{
-				logFile << "Incorrect input path";
-				cout << "Incorrect input path";
-				return 1;
-			}
-
-			const std::string ext = ".csv";
-			boost::filesystem::recursive_directory_iterator it(inputPath);
-			boost::filesystem::recursive_directory_iterator endit;
-
-			bool fileFound = false;
-			while(it != endit) {
-				if(boost::filesystem::is_regular_file(*it) && it->path().extension() == ext){
-					fileFound = true;
-					filePath = it->path().string();
-					break;
-				}
-				++it;
-			}
-			if (!fileFound){
-				logFile << "CSV file is not found in the input path";
-				cout << "CSV file is not found in the input path";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--K") K=atoi(argv[i+1]);
-		else if (string(argv[i])=="--sampleRate") sampleRate=stof(argv[i+1]);
-		else if (string(argv[i])=="--convThreshold") convThreshold=stof(argv[i+1]);
-		else if (string(argv[i])=="--outputPath"){
-			boost::filesystem::path p(argv[i+1]);
-
-			if(!boost::filesystem::exists(p) || !boost::filesystem::is_directory(p))
-			{
-				logFile << "Incorrect output path";
-				cout << "Incorrect output path";
-				return 1;
-			}
-
-			LogoutputPath=argv[i+1];
-			boost::filesystem::path joinedPath = p / boost::filesystem::path("KNN_Indices.csv");
-			outputPath = joinedPath.string();
-			boost::filesystem::path joinedPath2 = p / boost::filesystem::path("KNN_Distances.csv");
-			outputPath2 = joinedPath2.string();
-		}
-		else if (string(argv[i])=="--colIndex1") {
-			colIndex1=stof(argv[i+1]);
-			if (colIndex1<1) {
-				logFile << "colIndex1 should be greater than 1";
-				cout << "colIndex1 should be greater than 1";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--colIndex2") {
-			colIndex2=stof(argv[i+1]); 
-			if (colIndex2<1) {
-				logFile << "colIndex2 should be greater than 1";
-				cout << "colIndex2 should be greater than 1";
-				return 1;
-			}
-		}    
-	}	
-
-	logFile<<"------------The following Input Arguments were read------------"<<endl;
-	logFile<<"The full path to the input file: "<< filePath<<endl;	
-	logFile<<"The desired number of NN to be computed: "<< K <<endl;
-	logFile<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 
-	logFile<<"The convergance threshold: "<< convThreshold <<endl; 				
-	logFile<<"The full path to the output file1: "<< outputPath<<endl;
-	logFile<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {logFile<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {logFile<<"The optioanl column index ends at: "<< colIndex2<<endl;}		
-
-	cout<<"------------The following Input Arguments were read------------"<<endl;	
-	cout<<"The full path to the input file: "<< filePath<<endl;
-	cout<<"The desired number of NN to be computed: "<< K <<endl;	
-	cout<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 	
-	cout<<"The convergance threshold: "<< convThreshold <<endl; 	
-	cout<<"The full path to the output file1: "<< outputPath<<endl;
-	cout<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {cout<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {cout<<"The optioanl column index ends at: "<< colIndex2<<endl;}	
-
-	/**
-	 * Size of Dataset without the header (i.e.(#Rows in dataset)-1).
-	 */	
-	string cmd="wc -l "+filePath;
-	string outputCmd = exec(cmd.c_str());
-	const int N=stoi(outputCmd.substr(0, outputCmd.find(" ")))-1;
-	/**
-	 * Dimension of Dataset (#Columns)
-	 * is computed automatically if not passed as argument in command line
-	 * otherwise, the range (beginning and end) for the column index of input csv file is needed
-	 */
-	int Dim;
-	string cmd2="head -n 1 "+ filePath + " |tr '\\,' '\\n' |wc -l ";
-	Dim = stoi(exec(cmd2.c_str())); 
-
-	logFile<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-	cout<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-
-	/**
-	 * A 2D Array containing the entire input dataset (read from filePath).
-	 */
-	double** dataPoints = new double*[N];
-	for (int i = 0; i < N; ++i) { dataPoints[i] = new double[Dim]; }
-
-	double* dataPointsGPU = new double[N*Dim];
-	/**
-	 * indices of K-NN for all the points in dataset
-	 */
-	int** B_Index = new int*[N];
-	for (int i = 0; i < N; ++i) { B_Index[i] = new int[K]; }
-	/**
-	 * corresponding distance for K-NN indices stored in B_Index
-	 */
-	double** B_Dist = new double*[N];
-	for (int i = 0; i < N; ++i) { B_Dist[i] = new double[K]; }
-	/**
-	 * corresponding flag for K-NN indices stored in B_Index
-	 */
-	short** B_IsNew = new short*[N];
-	for (int i = 0; i < N; ++i) { B_IsNew[i] = new short[K]; }
-	/**
-	 * Data structure for new[v]
-	 */
-	vector<int> *New_Index = new std::vector<int>[N];
-	/**
-	 * Data structure for REVERSE(new[v]) or new'
-	 */
-	vector<int> *Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for SAMPLE(new'[v],pk)
-	 */
-	vector<int> *Sampled_Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for new[v] U SAMPLE(new'[v],pk)
-	 */
-	vector<int> *New_Final_List = new vector<int>[N];
-	/**
-	 * An approximation of zero in computing distances. Two points with the distance
-	 * smaller than epsilon are considered as one point.
-	 */
-	double epsilon = 1e-10; //
-	short* allEntriesFilled = new short[N];
-	/**
-	 * At first, let's Read Dataset from Input File Using Memory Mapping
-	 */
-	mapped_file_source mmap(filePath);
-    stream<mapped_file_source> is(mmap, std::ios::binary);
-    if (is.fail())
-	{
-		logFile << "error in Opening Input File" << endl;
-		cout << "error in Opening Input File" << endl;
-		return ;
-	}
-	/**
-	 * Remove the header info
-	 */
-	string dummyLine;
-	getline(is, dummyLine);
-	/**
-	 * Reading the Entire Dataset
-	 */
-	if (argc==11){
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				double tempV=atof(temp2.c_str());
-				dataPoints[i][j] = tempV;
-				dataPointsGPU[i*Dim+j] = tempV;
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}
-	} else {
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				if (j >= colIndex1-1 && j < colIndex2) {
-				double tempV=atof(temp2.c_str());
-				dataPoints[i][j] = tempV;
-				dataPointsGPU[i*Dim+j] = tempV;
-				}
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}	
-	}
-	mmap.close();
-
-	if (colIndex1 != -1) Dim=colIndex2-colIndex1+1;
-	if (Dim <1) {
-		logFile << "Error in Computing the Dimension of input csv file" << endl;
-		cout << "Error in Computing the Dimension of input csv file" << endl;
-		return 1;	
-	}
-	//Convert Pagged Memory to the Pinned Memory for better performance
-	cudaHostRegister(dataPointsGPU,N*Dim*sizeof(double),0);
-	/**
-	 * Copy the GPU version of input data (dataPointsGPU) to GPU memory (device_dataPointsGPU)
-	 */
-	cudaStream_t stream;
-	cudaStreamCreate(&stream);
-
-	double * device_dataPointsGPU;
-	cudaMalloc ((void **) &device_dataPointsGPU, N*Dim*sizeof(double));            
-	cudaMemcpyAsync (device_dataPointsGPU, dataPointsGPU, N*Dim*sizeof(double),cudaMemcpyHostToDevice, stream); 	
-	gpuErrchk(cudaPeekAtLastError());	
-
-	/**
-	 * define a seed for random generator. Using a constant value produces
-	 * the same set of random numbers and is good for debugging. Alternatively,
-	 * we can select the seed number randomly as srand(time(NULL))
-	 */
-	srand(17);
-	/**
-	 * Initialization of Arrays B_IsNew and B_Dist
-	 */
-	for (int i = 0; i < N; ++i) {
-		allEntriesFilled[i]=0;
-		for (int j = 0; j < K; ++j) {
-			B_IsNew[i][j] = 1;
-			B_Dist[i][j] = -1.0;
-		}
-	}
-	/**
-	 * Random Initialization of B_Index
-	 */
-	int randomIndex, iter;
-	for (int i = 0; i < N; ++i) {
-		for (int j = 0; j < K; ++j) {
-			iter = 1;
-			while (iter) {
-				randomIndex = rand() % N;
-				if (randomIndex != i) {
-					B_Index[i][j] = randomIndex;
-					iter = 0;
-				}
-			}
-		}
-	}
-
-	/**
-	 * Main Loop of the Algorithm
-	 */
-	bool iterate = true;
-	while (iterate) {
-		int c_criteria = 0;
-		int abort=0;
-		/**
-		 * Create "New" for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < K; ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					if (B_IsNew[i][j] == 1) {
-						New_Index[i].push_back(B_Index[i][j]);
-						B_IsNew[i][j] = 0;
-					}
-				}
-			}
-		}
-		/**
-		 * Create "New'"(or REVERSE("New")) for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				Reverse_New_Index[New_Index[i][j]].push_back(i);
-			}
-		}
-		/**
-		 * Random Sampling from "New'"
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < Reverse_New_Index[i].size(); ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					Sampled_Reverse_New_Index[i].push_back(Reverse_New_Index[i][j]);
-				}
-			}
-		}
-		/**
-		 * "New"= "New" U SAMPLE("New'", pK)
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(New_Index[i][j]);
-			}
-			for (int j = 0; j < Sampled_Reverse_New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(Sampled_Reverse_New_Index[i][j]);
-			}
-		}
-		/**
-		 * Remove duplicates from New_Final_List
-		 */
-		for (int i = 0; i < N; ++i) {	
-			sort(New_Final_List[i].begin(), New_Final_List[i].end());
-			auto last = std::unique(New_Final_List[i].begin(), New_Final_List[i].end());
-			New_Final_List[i].erase(last, New_Final_List[i].end());
-		}
-
-		/**
-		 * Max_New_Final_List_Length is the maximum length of New_Final_List array
-		 */
-		int Max_New_Final_List_Length=0;
-
-		for (int i = 0; i < N; ++i) {       
-			if (New_Final_List[i].size()> Max_New_Final_List_Length) Max_New_Final_List_Length=New_Final_List[i].size();
-		}
-		/**
-		 * ThreadsPerBlockNeeded is the required number of threads per block to compute the longest array of New_Final_List
-		 */
-		int ThreadsPerBlockNeeded=0;	
-		for (int i = 0; i < Max_New_Final_List_Length; ++i) {              
-			for (int j = i+1; j < Max_New_Final_List_Length; ++j) {				        
-				++ThreadsPerBlockNeeded;	
-			}
-		}
-
-		/**
-		 * Switch to GPU computations if the following conditions met. Otherwise proceed to CPU computations. 
-		 */		 
-		if (ThreadsPerBlockNeeded < MAXTPB  && ThreadsPerBlockNeeded > MinimumThreads) { 
-			/**
-			 * TotalCounts is the total number of elements in New_Final_List
-			 */		
-			int TotalCounts=0;		
-			for (int i = 0; i < N; ++i) {       
-				TotalCounts += New_Final_List[i].size();
-			}	
-			/**
-			 * New_Final_List_1D is the 1D representation of New_Final_List for transferring to GPU
-			 */										
-			int * New_Final_List_1D = new int [TotalCounts]; 
-			int cnt=0;
-
-			for (int i = 0; i < N; ++i) {
-				for (int j = 0; j < New_Final_List[i].size(); ++j) {	
-					New_Final_List_1D[cnt] = New_Final_List[i][j];
-					++cnt;
-				}
-			}	
-			/**
-			 * device_New_Final_List_1D is on the GPU memory and contains New_Final_List_1D
-			 */	
-			int *device_New_Final_List_1D;	
-			cudaMalloc ((void **) &device_New_Final_List_1D, TotalCounts*sizeof(int)); 
-			gpuErrchk(cudaMemcpy (device_New_Final_List_1D, New_Final_List_1D, TotalCounts* sizeof(int),cudaMemcpyHostToDevice)); 
-			/**
-			 * New_Final_List_Index is the index of New_Final_List[i] data. It is needed as New_Final_List has variable size in each row of data.
-			 */									 
-			int * New_Final_List_Index = new int [N+1];
-			New_Final_List_Index[0] = 0;
-			for (int i = 1; i < N+1; ++i) {	
-				New_Final_List_Index[i] = New_Final_List[i-1].size()+New_Final_List_Index[i-1];
-			}
-			/**
-			 * device_New_Final_List_Index is on the GPU memory and contains New_Final_List_Index
-			 */	
-			int *device_New_Final_List_Index;		
-			cudaMalloc ((void **) &device_New_Final_List_Index, (N+1)*sizeof(int)); 
-			gpuErrchk(cudaMemcpy (device_New_Final_List_Index, New_Final_List_Index, (N+1)* sizeof(int),cudaMemcpyHostToDevice));
-			/**
-			 * New_Final_List_Dist_Index is the index of pairs of distances computed in GPU. 
-			 */							     	       	
-			int * New_Final_List_Dist_Index = new int [N+1];         
-			int TotalCounts_Dist=0;
-
-			for (int i = 0; i < N; ++i) {
-				New_Final_List_Dist_Index[i]=TotalCounts_Dist;
-				for (int j = 0; j < New_Final_List[i].size(); ++j) {	
-					for (int k = j+1; k < New_Final_List[i].size(); ++k) {	
-						++TotalCounts_Dist;
-					}
-				}				
-			}
-			New_Final_List_Dist_Index[N]=TotalCounts_Dist;
-			/**
-			 * device_New_Final_List_Dist_Index is on the GPU memory and contains New_Final_List_Dist_Index
-			 */	
-			int * device_New_Final_List_Dist_Index;	            
-			cudaMalloc ((void **) &device_New_Final_List_Dist_Index, (N+1)*sizeof(int)); 		                
-			gpuErrchk(cudaMemcpy (device_New_Final_List_Dist_Index, New_Final_List_Dist_Index, (N+1) * sizeof(int),cudaMemcpyHostToDevice));
-			/**
-			 * device_New_Final_List_Dist_1D is on the GPU memory and contains 1D array of pairs of distances computed in GPU.
-			 */						        
-			double *device_New_Final_List_Dist_1D;  
-			cudaMalloc ((void **) &device_New_Final_List_Dist_1D, TotalCounts_Dist*sizeof(double)); 
-			/**
-			 * Launch the Kernel to compute the distance computations for all pairs of the points.
-			 * cudaDeviceSynchronize is required to ensure data transfer to GPU memory is already finished.
-			 */				        
-			gpuErrchk(cudaDeviceSynchronize());	
-
-			logFile<< "Number of Blocks = "<<N<< " and Number of Threads Per Block = "<<ThreadsPerBlockNeeded<<endl;
-			cout<< "Number of Blocks = "<<N<< " and Number of Threads Per Block = "<<ThreadsPerBlockNeeded<<endl;
-
-			ComputeDistancesKernel<<<N, ThreadsPerBlockNeeded>>>(device_New_Final_List_1D,device_New_Final_List_Index, Dim,device_New_Final_List_Dist_1D, device_dataPointsGPU,device_New_Final_List_Dist_Index);
-			gpuErrchk(cudaDeviceSynchronize());	
-			/**
-			 * New_Final_List_Dist_1D is on the host containing device_New_Final_List_Dist_1D
-			 */				
-			double * New_Final_List_Dist_1D = new double [TotalCounts_Dist]; 
-			gpuErrchk(cudaMemcpy (New_Final_List_Dist_1D, device_New_Final_List_Dist_1D, TotalCounts_Dist* sizeof(double),cudaMemcpyDeviceToHost)); 
-
-			/**
-			 * Now that we have computed all the distance pairs on GPU, we update the appropriate arrays on host 
-			 * c=c+UPDATENN(B[u1],<u2,l,true>)
-			 */
-
-			for (int i = 0; i < N; ++i) {
-				if (abort != 0) break;
-				int tmpcnt=0;
-
-				for (int it = 0; it < New_Final_List[i].size(); ++it) {
-					int par1= New_Final_List[i][it];
-
-					for (int it2 = it+1; it2 < New_Final_List[i].size(); ++it2) {
-						int par2= New_Final_List[i][it2];
-
-						if (abort ==0) {
-							double dist= New_Final_List_Dist_1D[New_Final_List_Dist_Index[i]+tmpcnt];
-							double dista = sqrt(dist);
-							++tmpcnt;
-
-							if (dista < epsilon) {
-								logFile << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl;; 
-								cout << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl; 
-								abort=1; iterate = false; 
-							}
-
-							c_criteria += UpdateNN(B_Index, B_Dist, B_IsNew, allEntriesFilled, K, par1, par2, dista, 1);
-							c_criteria += UpdateNN(B_Index, B_Dist, B_IsNew, allEntriesFilled, K, par2, par1, dista, 1);
-
-						}
-					}
-				}
-			}
-
-		/**
-			 * Free the pointers' memory allocations on host and device
-			 */
-			 
-			cudaFree(device_New_Final_List_1D); 		   
-			cudaFree(device_New_Final_List_Index);
-			cudaFree(device_New_Final_List_Dist_Index);			
-			cudaFree(device_New_Final_List_Dist_1D);
-
-			delete [] New_Final_List_Dist_1D, New_Final_List_Index, New_Final_List_Dist_Index;
-			delete[]  New_Final_List_1D;
-
-		} else {
-			for (int i = 0; i < N; ++i) {
-				if (abort != 0) break;
-
-				for (int it = 0; it < New_Final_List[i].size(); ++it) {
-					int par1= New_Final_List[i][it];
-
-					for (int it2 = it+1; it2 < New_Final_List[i].size(); ++it2) {
-						int par2= New_Final_List[i][it2];
-						if (abort ==0) {
-
-							double dist = 0;
-							for (int j = 0; j < Dim; ++j) {
-								dist += pow((dataPoints[par1][j] - dataPoints[par2][j]), 2);
-							}
-
-							double dista = sqrt(dist);
-
-							if (dista < epsilon) {
-								logFile << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl;; 
-								cout << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl; 
-								abort=1;iterate = false; 
-							}						
-							c_criteria += UpdateNN(B_Index, B_Dist, B_IsNew, allEntriesFilled, K, par1, par2, dista, 1);
-							c_criteria += UpdateNN(B_Index, B_Dist, B_IsNew, allEntriesFilled, K, par2, par1, dista, 1);
-						}
-					}
-				}
-			}
-		}
-
-		logFile << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		cout << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		if (c_criteria < convThreshold) { iterate = false; }
-		/**
-		 * Clear the contents of the used data structures
-		 */
-		for (int i = 0; i < N; ++i) {
-			New_Index[i].clear();
-			Reverse_New_Index[i].clear();
-			Sampled_Reverse_New_Index[i].clear();
-			New_Final_List[i].clear();
-		}
-	}
-    cudaHostUnregister(dataPointsGPU);
-	cudaFree(device_dataPointsGPU);
-
-	/**
-	 * Sort and output the results
-	 */
-	ofstream outputFileIndex,outputFileDistance;
-	outputFileIndex.open(outputPath);	
-	outputFileDistance.open(outputPath2);
-
-	for (int i=0; i<N; ++i){
-		vector<pair<double,int>> aggregateResults;
-		for (int j=0; j<K; ++j){
-			aggregateResults.push_back(make_pair(B_Dist[i][j], B_Index[i][j]));
-		}		
-		sort(aggregateResults.begin(), aggregateResults.end());	
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileIndex<<aggregateResults[j].second<<",";}
-			else {outputFileIndex<<aggregateResults[j].second<<endl; }
-		}
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileDistance<<aggregateResults[j].first<<",";}
-			else {outputFileDistance<<aggregateResults[j].first<<endl; }
-		}
-	}
-
-	outputFileIndex.close();
-	outputFileDistance.close();
-	logFile.close();
-	/**
-	 * copy Logfile to the file system which could be accessed outside the docker container
-	 */ 
-	string cmd3="cp "+ logFileName+"  "+LogoutputPath;
-	string outputCmd3 = exec(cmd3.c_str());
-
-	return 0;
-}
diff --git a/clustering/K-NN/Shared-Memory-GPU/VERSION b/clustering/K-NN/Shared-Memory-GPU/VERSION
deleted file mode 100644
index 6e8bf73aa..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.0
diff --git a/clustering/K-NN/Shared-Memory-GPU/ict.yaml b/clustering/K-NN/Shared-Memory-GPU/ict.yaml
deleted file mode 100644
index a5ae758cb..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/ict.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-author:
-- Mahdi Maghrebi
-contact: author@ict.com
-container: labshare/polus-knn-plugin:cuda-0.1.0
-description: K-Nearest Neighbors CUDA Code
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input csv file containing the raw data
-  format:
-  - csvCollection
-  name: inputPath
-  required: true
-  type: path
-- description: The desired number of Nearest Neighbors (NN) to be computed
-  format:
-  - number
-  name: K
-  required: true
-  type: number
-- description: The rate at which the sampling is conducted. The values closer to 1
-    provides more accurate results but the execution takes longer.
-  format:
-  - number
-  name: sampleRate
-  required: true
-  type: number
-- description: The threshold for the convergence
-  format:
-  - number
-  name: convThreshold
-  required: true
-  type: number
-name: labshare/K-NN(CUDA)
-outputs:
-- description: The full path to the output csv collections containing indices and
-    distances of K-NNs
-  format:
-  - csvCollection
-  name: outputPath
-  required: true
-  type: path
-repository: https://github.com/polusai/image-tools
-specVersion: 1.0.0
-title: K-NN (CUDA)
-ui:
-- description: Name of the input CSV collection
-  key: inputs.inputPath
-  title: The path to the input csv collection
-  type: path
-- description: Insert an integer
-  key: inputs.K
-  title: The desired number of Nearest Neighbors (NN) to be computed
-  type: number
-- description: Insert a value between 0 and 1
-  key: inputs.sampleRate
-  title: 'The sampling rate '
-  type: number
-- description: Insert an integer. Smaller values result in more accurate solutions.
-  key: inputs.convThreshold
-  title: The threshold for the convergence
-  type: number
-version: 0.1.0
diff --git a/clustering/K-NN/Shared-Memory-GPU/k-nn(cuda).cwl b/clustering/K-NN/Shared-Memory-GPU/k-nn(cuda).cwl
deleted file mode 100644
index c40d4db99..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/k-nn(cuda).cwl
+++ /dev/null
@@ -1,28 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  K:
-    inputBinding:
-      prefix: --K
-    type: double
-  convThreshold:
-    inputBinding:
-      prefix: --convThreshold
-    type: double
-  inputPath:
-    inputBinding:
-      prefix: --inputPath
-    type: Directory
-  outputPath:
-    inputBinding:
-      prefix: --outputPath
-    type: Directory
-  sampleRate:
-    inputBinding:
-      prefix: --sampleRate
-    type: double
-outputs:
-  outputPath: !!python/name:builtins.NotImplementedError ''
-requirements:
-  DockerRequirement:
-    dockerPull: labshare/polus-knn-plugin:cuda-0.1.0
diff --git a/clustering/K-NN/Shared-Memory-GPU/plugin.json b/clustering/K-NN/Shared-Memory-GPU/plugin.json
deleted file mode 100644
index d814b4f37..000000000
--- a/clustering/K-NN/Shared-Memory-GPU/plugin.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "name": "K-NN (CUDA)",
-    "version": "0.1.0",
-    "title": "K-NN (CUDA)",
-    "description": "K-Nearest Neighbors CUDA Code",
-    "author": "Mahdi Maghrebi",
-    "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-    "containerId": "labshare/polus-knn-plugin:cuda-0.1.0",
-    "inputs": [
-        {
-            "name": "inputPath",
-            "type": "csvCollection",
-            "description": "Input csv file containing the raw data",
-            "required": "True"
-        },
-        {
-            "name": "K",
-            "type": "number",
-            "description": "The desired number of Nearest Neighbors (NN) to be computed",
-            "required": "True"
-        },
-        {
-            "name": "sampleRate",
-            "type": "number",
-            "description": "The rate at which the sampling is conducted. The values closer to 1 provides more accurate results but the execution takes longer.",
-            "required": "True"
-        },
-        {
-            "name": "convThreshold",
-            "type": "number",
-            "description": "The threshold for the convergence",
-            "required": "True"
-        }
-    ],
-    "outputs": [
-        {
-            "name": "outputPath",
-            "type": "csvCollection",
-            "description": "The full path to the output csv collections containing indices and distances of K-NNs"
-        }
-
-    ],
-    "ui": [
-        {
-            "key": "inputs.inputPath",
-			"title": "The path to the input csv collection",
-			"description": "Name of the input CSV collection"
-        },
-        {
-            "key": "inputs.K",
-            "title": "The desired number of Nearest Neighbors (NN) to be computed",
-            "description": "Insert an integer"
-        },
-        {
-            "key": "inputs.sampleRate",
-            "title": "The sampling rate ",
-            "description": "Insert a value between 0 and 1"
-        },
-        {
-            "key": "inputs.convThreshold",
-            "title": "The threshold for the convergence",
-            "description": "Insert an integer. Smaller values result in more accurate solutions."
-        }
-    ]
-}
diff --git a/clustering/K-NN/Shared-Memory-OpenMP/Dockerfile b/clustering/K-NN/Shared-Memory-OpenMP/Dockerfile
deleted file mode 100644
index 62ea20d47..000000000
--- a/clustering/K-NN/Shared-Memory-OpenMP/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# Get the base Ubuntu image from Docker Hub
-FROM ubuntu:latest
- 
-# Update apps on the base image
-RUN apt-get -y update && apt-get install -y g++ wget make
- 
-#Create new directories
-RUN mkdir -p /home/SharedKNN /home/Inputs /home/Outputs 
- 
-# Specify the working directory
-WORKDIR /home/SharedKNN
- 
-# Install Boost Library 
-RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz
-RUN tar xfz boost_1_71_0.tar.gz 
-RUN rm boost_1_71_0.tar.gz
-WORKDIR /home/SharedKNN/boost_1_71_0
-RUN ./bootstrap.sh
-RUN ./b2
-ENV LD_LIBRARY_PATH="/home/SharedKNN/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" 
-
-# Copy the current folder which contains C++ source code to the Docker image
-COPY . /home/SharedKNN
-
-# Use g++ to compile the source file 
-WORKDIR /home/SharedKNN
-RUN g++ -I/home/SharedKNN/boost_1_71_0 KNN_OpenMP_Code.cpp -O2 -o Out.exe -L/home/SharedKNN/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem -fopenmp
- 
-# Run the output program from the previous step
-ENTRYPOINT ["./Out.exe"]
diff --git a/clustering/K-NN/Shared-Memory-OpenMP/KNN_OpenMP_Code.cpp b/clustering/K-NN/Shared-Memory-OpenMP/KNN_OpenMP_Code.cpp
deleted file mode 100644
index ba4690b9a..000000000
--- a/clustering/K-NN/Shared-Memory-OpenMP/KNN_OpenMP_Code.cpp
+++ /dev/null
@@ -1,489 +0,0 @@
-/**
- * This code is an implementation of the algorithm presented by Dong et al., 2012,
- *"Efficient K-Nearest Neighbor Graph Construction for Generic Similarity Measures"  
- * @author:   Mahdi Maghrebi <mahdi.maghrebi@nih.gov>
- * August 2019
- */
-
-#include <vector>
-#include <iostream> 
-#include <list>
-#include <string>
-#include <math.h>
-#include <fstream>
-#include <float.h>
-#include <boost/filesystem.hpp> 
-#include <omp.h>
-#include <boost/iostreams/device/mapped_file.hpp> 
-#include <boost/iostreams/stream.hpp>    
-
-using boost::iostreams::mapped_file_source;
-using boost::iostreams::stream;	
-
-using namespace std;
-
-/**
- * Read the output of linux command execution 
- * @param  cmd  is the linux command to be executed
- * @return the output from the execution of the linux command
- */
-std::string exec(const char* cmd) {
-	std::array<char, 128> buffer;
-	std::string result;
-	std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
-	if (!pipe) {
-		throw std::runtime_error("popen() failed!");
-	}
-	while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-		result += buffer.data();
-	}
-	return result;
-}
-
-int main(int argc, char * const argv[]) {
-	/**
-	 * The errors and informational messages are outputted to the log file 
-	 */
-	ofstream logFile;
-	string logFileName="Setting.txt";
-	logFile.open(logFileName);
-
-	/**
-	 * The input parameters are read from command line which are as follow.
-	 * inputPath: The full path to the input file containig the dataset.	 
-	 * outputPath: The full path to the output csv files.
-	 * K: K in K-NN that means the desired number of Nearest Neighbours to be computed.
-	 * sampleRate: The rate at which we do sampling. This parameter plays a key role in the performance.
-	 * This parameter is a trades-off between the performance and the accuracy of the results.
-	 * Values closer to 1 provides more accurate results but the execution takes longer.	 
-	 * convThreshold: Convergance Threshold. A fixed integer is used here instead of delta*N*K.	
-	 * colIndex1 and colIndex2 (optional): The indices of columns from the input csv file where raw data exists continuously in between.
-	 * If these two arguments were left blank, the code assumes that the entire input csv file is raw data 
-	 * and automatically computes the number of columns in the input csv file. 	
-	 */	 
-	string filePath, outputPath, outputPath2, inputPath,LogoutputPath;
-	int K,convThreshold, colIndex1=-1, colIndex2=-1;
-	float sampleRate;	
-
-	for (int i=1; i<argc;++i){
-		if (string(argv[i])=="--inputPath") {
-			inputPath=argv[i+1];
-
-			if(!boost::filesystem::exists(inputPath) || !boost::filesystem::is_directory(inputPath))
-			{
-				logFile << "Incorrect input path";
-				cout << "Incorrect input path";
-				return 1;
-			}
-
-			const std::string ext = ".csv";
-			boost::filesystem::recursive_directory_iterator it(inputPath);
-			boost::filesystem::recursive_directory_iterator endit;
-
-			bool fileFound = false;
-			while(it != endit) {
-				if(boost::filesystem::is_regular_file(*it) && it->path().extension() == ext){
-					fileFound = true;
-					filePath = it->path().string();
-					break;
-				}
-				++it;
-			}
-			if (!fileFound){
-				logFile << "CSV file is not found in the input path";
-				cout << "CSV file is not found in the input path";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--K") K=atoi(argv[i+1]);
-		else if (string(argv[i])=="--sampleRate") sampleRate=stof(argv[i+1]);
-		else if (string(argv[i])=="--convThreshold") convThreshold=stof(argv[i+1]);
-		else if (string(argv[i])=="--outputPath"){
-			boost::filesystem::path p(argv[i+1]);
-
-			if(!boost::filesystem::exists(p) || !boost::filesystem::is_directory(p))
-			{
-				logFile << "Incorrect output path";
-				cout << "Incorrect output path";
-				return 1;
-			}
-
-			LogoutputPath=argv[i+1];
-			boost::filesystem::path joinedPath = p / boost::filesystem::path("KNN_Indices.csv");
-			outputPath = joinedPath.string();
-			boost::filesystem::path joinedPath2 = p / boost::filesystem::path("KNN_Distances.csv");
-			outputPath2 = joinedPath2.string();
-		}
-		else if (string(argv[i])=="--colIndex1") {
-			colIndex1=stof(argv[i+1]);
-			if (colIndex1<1) {
-				logFile << "colIndex1 should be greater than 1";
-				cout << "colIndex1 should be greater than 1";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--colIndex2") {
-			colIndex2=stof(argv[i+1]); 
-			if (colIndex2<1) {
-				logFile << "colIndex2 should be greater than 1";
-				cout << "colIndex2 should be greater than 1";
-				return 1;
-			}
-		}    
-	}	
-
-	logFile<<"------------The following Input Arguments were read------------"<<endl;
-	logFile<<"The full path to the input file: "<< filePath<<endl;	
-	logFile<<"The desired number of NN to be computed: "<< K <<endl;
-	logFile<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 
-	logFile<<"The convergance threshold: "<< convThreshold <<endl; 				
-	logFile<<"The full path to the output file1: "<< outputPath<<endl;
-	logFile<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {logFile<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {logFile<<"The optioanl column index ends at: "<< colIndex2<<endl;}		
-
-	cout<<"------------The following Input Arguments were read------------"<<endl;	
-	cout<<"The full path to the input file: "<< filePath<<endl;
-	cout<<"The desired number of NN to be computed: "<< K <<endl;	
-	cout<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 	
-	cout<<"The convergance threshold: "<< convThreshold <<endl; 	
-	cout<<"The full path to the output file1: "<< outputPath<<endl;
-	cout<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {cout<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {cout<<"The optioanl column index ends at: "<< colIndex2<<endl;}	
-
-	/**
-	 * Size of Dataset without the header (i.e.(#Rows in dataset)-1).
-	 */	
-	string cmd="wc -l "+filePath;
-	string outputCmd = exec(cmd.c_str());
-	const int N=stoi(outputCmd.substr(0, outputCmd.find(" ")))-1;
-	/**
-	 * Dimension of Dataset (#Columns)
-	 * is computed automatically if not passed as argument in command line
-	 * otherwise, the range (beginning and end) for the column index of input csv file is needed
-	 */
-	int Dim;
-	string cmd2="head -n 1 "+ filePath + " |tr '\\,' '\\n' |wc -l ";
-	Dim = stoi(exec(cmd2.c_str())); 
-
-	logFile<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-	cout<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-	
-	/**
-	 * Query about the number of available processors and set it for OpenMP
-	 */	
-	int nProcessors = omp_get_num_procs();
-    omp_set_num_threads(nProcessors-1);
-	cout <<"Total Number of Processes in the Parallel Region = "<< nProcessors-1 <<endl;
-
-	/**
-	 * A 2D Array containing the entire input dataset (read from filePath).
-	 */
-	double** dataPoints = new double*[N];
-	for (int i = 0; i < N; ++i) { dataPoints[i] = new double[Dim]; }
-	/**
-	 * indices of K-NN for all the points in dataset
-	 */
-	int** B_Index = new int*[N];
-	for (int i = 0; i < N; ++i) { B_Index[i] = new int[K]; }
-	/**
-	 * corresponding distance for K-NN indices stored in B_Index
-	 */
-	double** B_Dist = new double*[N];
-	for (int i = 0; i < N; ++i) { B_Dist[i] = new double[K]; }
-	/**
-	 * corresponding flag for K-NN indices stored in B_Index
-	 */
-	short** B_IsNew = new short*[N];
-	for (int i = 0; i < N; ++i) { B_IsNew[i] = new short[K]; }
-	/**
-	 * Data structure for new[v]
-	 */
-	vector<int> *New_Index = new std::vector<int>[N];
-	/**
-	 * Data structure for REVERSE(new[v]) or new'
-	 */
-	vector<int> *Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for SAMPLE(new'[v],pk)
-	 */
-	vector<int> *Sampled_Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for new[v] U SAMPLE(new'[v],pk)
-	 */
-	vector<int> *New_Final_List = new vector<int>[N];
-	/**
-	 * An approximation of zero in computing distances. Two points with the distance
-	 * smaller than epsilon are considered as one point.
-	 */
-	double epsilon = 1e-10; //
-	short* allEntriesFilled = new short[N];
-	/**
-	 * At first, let's Read Dataset from Input File Using Memory Mapping
-	 */
-	mapped_file_source mmap(filePath);
-    stream<mapped_file_source> is(mmap, std::ios::binary);
-    if (is.fail())
-	{
-		logFile << "error in Opening Input File" << endl;
-		cout << "error in Opening Input File" << endl;
-		return 1;
-	}
-	/**
-	 * Remove the header info
-	 */
-	string dummyLine;
-	getline(is, dummyLine);
-	/**
-	 * Reading the Entire Dataset
-	 */
-	if (argc==11){
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				dataPoints[i][j] = atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}
-	} else {
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				if (j >= colIndex1-1 && j < colIndex2) dataPoints[i][j] = atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}	
-	}
-	mmap.close();
-
-	if (colIndex1 != -1) Dim=colIndex2-colIndex1+1;
-	if (Dim <1) {
-		logFile << "Error in Computing the Dimension of input csv file" << endl;
-		cout << "Error in Computing the Dimension of input csv file" << endl;
-		return 1;	
-	}
-	/**
-	 * define a seed for random generator. Using a constant value produces
-	 * the same set of random numbers and is good for debugging. Alternatively,
-	 * we can select the seed number randomly as srand(time(NULL))
-	 */
-	srand(17);
-	/**
-	 * Initialization of Arrays B_IsNew and B_Dist
-	 */
-	for (int i = 0; i < N; ++i) {
-		allEntriesFilled[i]=0;
-		for (int j = 0; j < K; ++j) {
-			B_IsNew[i][j] = 1;
-			B_Dist[i][j] = -1.0;
-		}
-	}
-	/**
-	 * Random Initialization of B_Index
-	 */
-	int randomIndex, iter;
-	for (int i = 0; i < N; ++i) {
-		for (int j = 0; j < K; ++j) {
-			iter = 1;
-			while (iter) {
-				randomIndex = rand() % N;
-				if (randomIndex != i) {
-					B_Index[i][j] = randomIndex;
-					iter = 0;
-				}
-			}
-		}
-	}
-	/**
-	 * Replace the farthest point in B_Index (for u1) with u2 if u2 is closer
-	 * <p>
-	 * This method corresponds to UPDATENN(B[u1],<u2,l,true>) in the paper
-	 * </p>
-	 * @param  Dist  represents B_Dist
-	 * @param  Index represents B_Index
-	 * @param  IsNew represents B_IsNew
-	 * @param  u1    the indice of point that we want to potentially update its K-NN with the point u2
-	 * @param  u2    the indice of potential K-NN fpr point u1
-	 * @param  distance the spatial distance between u1 and u2
-	 * @param  flag updates B_IsNew
-	 * @return 1 if B_Index[u1][.] is updated, 0 otherwise
-	 */
-	auto UpdateNN = [&](int u1, int u2, double distance, int flag = 1) {
-
-		if(allEntriesFilled[u1]==0){		
-			for (int j = 0; j < K; j++) {	
-				if (B_Dist[u1][j] < 0) {
-
-					for (int jj = 0; jj < j; jj++) {if (B_Index[u1][jj] == u2) return 0;}
-
-					B_Dist[u1][j] = distance;
-					B_Index[u1][j] = u2;
-					B_IsNew[u1][j] = flag;
-					if (j==K-1) allEntriesFilled[u1]=1;
-					return 1;}
-			}
-		}
-
-		else{
-			for (int j = 0; j < K; j++) {
-				if (B_Index[u1][j] == u2) return 0;
-			}
-
-			double max = DBL_MIN;
-			int index = -1;
-			for (int j = 0; j < K; j++) {
-				if (B_Dist[u1][j] > max) {
-					max = B_Dist[u1][j];
-					index = j;
-				}
-			}
-			if (index == -1) { cout << "Error"; } 
-			if (distance < max) {
-				B_Dist[u1][index] = distance;
-				B_Index[u1][index] = u2;
-				B_IsNew[u1][index] = flag;
-				return 1;
-			}
-			else { return 0; }
-		}
-	};
-	/**
-	 * Main Loop of the Algorithm
-	 */
-	bool iterate = true;
-	while (iterate) {
-		/**
-		 * Create "New" for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < K; ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					if (B_IsNew[i][j] == 1) {
-						New_Index[i].push_back(B_Index[i][j]);
-						B_IsNew[i][j] = 0;
-					}
-				}
-			}
-		}
-		/**
-		 * Create "New'"(or REVERSE("New")) for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				Reverse_New_Index[New_Index[i][j]].push_back(i);
-			}
-		}
-		/**
-		 * Random Sampling from "New'"
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < Reverse_New_Index[i].size(); ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					Sampled_Reverse_New_Index[i].push_back(Reverse_New_Index[i][j]);
-				}
-			}
-		}
-		/**
-		 * "New"= "New" U SAMPLE("New'", pK)
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(New_Index[i][j]);
-			}
-			for (int j = 0; j < Sampled_Reverse_New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(Sampled_Reverse_New_Index[i][j]);
-			}
-		}
-		/**
-		 * c=c+UPDATENN(B[u1],<u2,l,true>)
-		 */
-		int c_criteria = 0;
-		int abort=0;
-
-		for (int i = 0; i < N; ++i) {
-			if (abort != 0) break;
-
-            #pragma omp parallel for schedule(dynamic) 
-			for (int it = 0; it < New_Final_List[i].size(); ++it) {
-				int par1= New_Final_List[i][it];
-                
-				for (int it2 = it+1; it2 < New_Final_List[i].size(); ++it2) {
-					int par2= New_Final_List[i][it2];
-					if (par1 != par2 && abort ==0) {
-
-						double dist = 0;
-						for (int j = 0; j < Dim; ++j) {
-							dist += pow((dataPoints[par1][j] - dataPoints[par2][j]), 2);
-						}
-						double dista = sqrt(dist);
-						if (dista < epsilon) {
-							logFile << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl;; 
-							cout << "Found Duplicate Data for Points "<< par1 << " and " << par2 <<endl; 
-							abort=1;
-							iterate = false; 
-						}
-                        #pragma omp critical
-						{
-							c_criteria += UpdateNN(par1, par2, dista, 1);
-							c_criteria += UpdateNN(par2, par1, dista, 1);
-						}
-					}
-				}
-			}
-		}
-		logFile << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		cout << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		if (c_criteria < convThreshold) { iterate = false; }
-		/**
-		 * Clear the contents of the used data structures
-		 */
-		for (int i = 0; i < N; ++i) {
-			New_Index[i].clear();
-			Reverse_New_Index[i].clear();
-			Sampled_Reverse_New_Index[i].clear();
-			New_Final_List[i].clear();
-		}
-	}
-	/**
-	 * Sort and output the results
-	 */
-	ofstream outputFileIndex,outputFileDistance;
-	outputFileIndex.open(outputPath);	
-	outputFileDistance.open(outputPath2);
-
-	for (int i=0; i<N; ++i){
-		vector<pair<double,int>> aggregateResults;
-		for (int j=0; j<K; ++j){
-			aggregateResults.push_back(make_pair(B_Dist[i][j], B_Index[i][j]));
-		}		
-		sort(aggregateResults.begin(), aggregateResults.end());	
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileIndex<<aggregateResults[j].second<<",";}
-			else {outputFileIndex<<aggregateResults[j].second<<endl; }
-		}
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileDistance<<aggregateResults[j].first<<",";}
-			else {outputFileDistance<<aggregateResults[j].first<<endl; }
-		}
-	}
-
-	outputFileIndex.close();
-	outputFileDistance.close();
-	logFile.close();
-	/**
-	 * copy Logfile to the file system which could be accessed outside the docker container
-	 */ 
-	string cmd3="cp "+ logFileName+"  "+LogoutputPath;
-	string outputCmd3 = exec(cmd3.c_str());
-
-	return 0;
-}
-
-
diff --git a/clustering/K-NN/Shared-Memory-OpenMP/VERSION b/clustering/K-NN/Shared-Memory-OpenMP/VERSION
deleted file mode 100644
index 6e8bf73aa..000000000
--- a/clustering/K-NN/Shared-Memory-OpenMP/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.0
diff --git a/clustering/K-NN/Shared-Memory-OpenMP/plugin.json b/clustering/K-NN/Shared-Memory-OpenMP/plugin.json
deleted file mode 100644
index 720bec783..000000000
--- a/clustering/K-NN/Shared-Memory-OpenMP/plugin.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "name": "K-NN (Multi-Threded)",
-    "version": "openmp-0.1.0",
-    "title": "K-NN (Multi-Threded)",
-    "description": "K-Nearest Neighbors Multi-Threded Code",
-    "author": "Mahdi Maghrebi",
-    "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-    "containerId": "labshare/polus-knn-plugin:openmp-0.1.0",
-    "inputs": [
-        {
-            "name": "inputPath",
-            "type": "csvCollection",
-            "description": "Input csv file containing the raw data",
-            "required": "True"
-        },
-        {
-            "name": "K",
-            "type": "number",
-            "description": "The desired number of Nearest Neighbors (NN) to be computed",
-            "required": "True"
-        },
-        {
-            "name": "sampleRate",
-            "type": "number",
-            "description": "The rate at which the sampling is conducted. The values closer to 1 provides more accurate results but the execution takes longer.",
-            "required": "True"
-        },
-        {
-            "name": "convThreshold",
-            "type": "number",
-            "description": "The threshold for the convergence",
-            "required": "True"
-        }
-    ],
-    "outputs": [
-        {
-            "name": "outputPath",
-            "type": "csvCollection",
-            "description": "The full path to the output csv collections containing indices and distances of K-NNs"
-        }
-
-    ],
-    "ui": [
-        {
-            "key": "inputs.inputPath",
-			"title": "The path to the input csv collection",
-			"description": "Name of the input csv collection"
-        },
-        {
-            "key": "inputs.K",
-            "title": "The desired number of Nearest Neighbors (NN) to be computed",
-            "description": "Insert an integer"
-        },
-        {
-            "key": "inputs.sampleRate",
-            "title": "The sampling rate ",
-            "description": "Insert a value between 0 and 1"
-        },
-        {
-            "key": "inputs.convThreshold",
-            "title": "The threshold for the convergence",
-            "description": "Insert an integer. Smaller values result in more accurate solutions."
-        }
-    ]
-}
diff --git a/clustering/K-NN/Shared-Memory-Serial/Dockerfile-Serial_Code b/clustering/K-NN/Shared-Memory-Serial/Dockerfile-Serial_Code
deleted file mode 100644
index 75b46b373..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/Dockerfile-Serial_Code
+++ /dev/null
@@ -1,30 +0,0 @@
-# Get the base Ubuntu image from Docker Hub
-FROM ubuntu:latest
- 
-# Update apps on the base image
-RUN apt-get -y update && apt-get install -y g++ wget make
- 
-#Create a new directory
-RUN mkdir -p /home/SharedKNN /home/Inputs /home/Outputs
- 
-# Copy the current folder which contains C++ source code to the Docker image
-COPY . /home/SharedKNN
- 
-# Specify the working directory
-WORKDIR /home/SharedKNN
- 
-# Install Boost Library 
-RUN wget https://dl.bintray.com/boostorg/release/1.71.0/source/boost_1_71_0.tar.gz
-RUN tar xfz boost_1_71_0.tar.gz 
-RUN rm boost_1_71_0.tar.gz
-WORKDIR /home/SharedKNN/boost_1_71_0
-RUN ./bootstrap.sh
-RUN ./b2
-ENV LD_LIBRARY_PATH="/home/SharedKNN/boost_1_71_0/stage/lib:${LD_LIBRARY_PATH}" 
-
-# Use g++ to compile the source file 
-WORKDIR /home/SharedKNN
-RUN g++ -I/home/SharedKNN/boost_1_71_0 KNN_Serial_Code.cpp -O2 -o Out.exe -L/home/SharedKNN/boost_1_71_0/stage/lib -lboost_iostreams -lboost_system -lboost_filesystem 
- 
-# Run the output program from the previous step
-ENTRYPOINT ["time ./Out.exe"]
diff --git a/clustering/K-NN/Shared-Memory-Serial/KNN_Serial_Code.cpp b/clustering/K-NN/Shared-Memory-Serial/KNN_Serial_Code.cpp
deleted file mode 100644
index a7359c084..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/KNN_Serial_Code.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-/**
- * This code is an implementation of the algorithm presented by Dong et al., 2012,
- *"Efficient K-Nearest Neighbor Graph Construction for Generic Similarity Measures"  
- * @author:   Mahdi Maghrebi <mahdi.maghrebi@nih.gov>
- * August 2019
- */
-
-#include <vector>
-#include <iostream> 
-#include <list>
-#include <string>
-#include <math.h>
-#include <fstream>
-#include <float.h>
-#include <boost/filesystem.hpp>
-#include <boost/iostreams/device/mapped_file.hpp> 
-#include <boost/iostreams/stream.hpp>    
-
-using boost::iostreams::mapped_file_source;
-using boost::iostreams::stream;		
-using namespace std;
-
-/**
- * Read the output of linux command execution 
- * @param  cmd  is the linux command to be executed
- * @return the output from the execution of the linux command
- */
-std::string exec(const char* cmd) {
-	std::array<char, 128> buffer;
-	std::string result;
-	std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
-	if (!pipe) {
-		throw std::runtime_error("popen() failed!");
-	}
-	while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
-		result += buffer.data();
-	}
-	return result;
-}
-
-int main(int argc, char * const argv[]) {
-	/**
-	 * The errors and informational messages are outputted to the log file 
-	 */
-	ofstream logFile;
-	string logFileName="Setting.txt";
-	logFile.open(logFileName);
-
-	/**
-	 * The input parameters are read from command line which are as follow.
-	 * inputPath: The full path to the input file containig the dataset.	 
-	 * outputPath: The full path to the output csv files.
-	 * K: K in K-NN that means the desired number of Nearest Neighbours to be computed.
-	 * sampleRate: The rate at which we do sampling. This parameter plays a key role in the performance.
-	 * This parameter is a trades-off between the performance and the accuracy of the results.
-	 * Values closer to 1 provides more accurate results but the execution takes longer.	 
-	 * convThreshold: Convergance Threshold. A fixed integer is used here instead of delta*N*K.	
-	 * colIndex1 and colIndex2 (optional): The indices of columns from the input csv file where raw data exists continuously in between.
-	 * If these two arguments were left blank, the code assumes that the entire input csv file is raw data 
-	 * and automatically computes the number of columns in the input csv file. 	
-	 */	 
-	string filePath, outputPath, outputPath2, inputPath,LogoutputPath;
-	int K,convThreshold, colIndex1=-1, colIndex2=-1;
-	float sampleRate;	
-
-	for (int i=1; i<argc;++i){
-		if (string(argv[i])=="--inputPath") {
-			inputPath=argv[i+1];
-
-			if(!boost::filesystem::exists(inputPath) || !boost::filesystem::is_directory(inputPath))
-			{
-				logFile << "Incorrect input path";
-				cout << "Incorrect input path";
-				return 1;
-			}
-
-			const std::string ext = ".csv";
-			boost::filesystem::recursive_directory_iterator it(inputPath);
-			boost::filesystem::recursive_directory_iterator endit;
-
-			bool fileFound = false;
-			while(it != endit) {
-				if(boost::filesystem::is_regular_file(*it) && it->path().extension() == ext){
-					fileFound = true;
-					filePath = it->path().string();
-					break;
-				}
-				++it;
-			}
-			if (!fileFound){
-				logFile << "CSV file is not found in the input path";
-				cout << "CSV file is not found in the input path";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--K") K=atoi(argv[i+1]);
-		else if (string(argv[i])=="--sampleRate") sampleRate=stof(argv[i+1]);
-		else if (string(argv[i])=="--convThreshold") convThreshold=stof(argv[i+1]);
-		else if (string(argv[i])=="--outputPath"){
-			boost::filesystem::path p(argv[i+1]);
-
-			if(!boost::filesystem::exists(p) || !boost::filesystem::is_directory(p))
-			{
-				logFile << "Incorrect output path";
-				cout << "Incorrect output path";
-				return 1;
-			}
-
-			LogoutputPath=argv[i+1];
-			boost::filesystem::path joinedPath = p / boost::filesystem::path("KNN_Indices.csv");
-			outputPath = joinedPath.string();
-			boost::filesystem::path joinedPath2 = p / boost::filesystem::path("KNN_Distances.csv");
-			outputPath2 = joinedPath2.string();
-		}
-		else if (string(argv[i])=="--colIndex1") {
-			colIndex1=stof(argv[i+1]);
-			if (colIndex1<1) {
-				logFile << "colIndex1 should be greater than 1";
-				cout << "colIndex1 should be greater than 1";
-				return 1;
-			}
-		}
-		else if (string(argv[i])=="--colIndex2") {
-			colIndex2=stof(argv[i+1]); 
-			if (colIndex2<1) {
-				logFile << "colIndex2 should be greater than 1";
-				cout << "colIndex2 should be greater than 1";
-				return 1;
-			}
-		}    
-	}	
-
-	logFile<<"------------The following Input Arguments were read------------"<<endl;
-	logFile<<"The full path to the input file: "<< filePath<<endl;	
-	logFile<<"The desired number of NN to be computed: "<< K <<endl;
-	logFile<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 
-	logFile<<"The convergance threshold: "<< convThreshold <<endl; 				
-	logFile<<"The full path to the output file1: "<< outputPath<<endl;
-	logFile<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {logFile<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {logFile<<"The optioanl column index ends at: "<< colIndex2<<endl;}		
-
-	cout<<"------------The following Input Arguments were read------------"<<endl;	
-	cout<<"The full path to the input file: "<< filePath<<endl;
-	cout<<"The desired number of NN to be computed: "<< K <<endl;	
-	cout<<"The sampleRate(The rate at which we do sampling): "<< sampleRate <<endl; 	
-	cout<<"The convergance threshold: "<< convThreshold <<endl; 	
-	cout<<"The full path to the output file1: "<< outputPath<<endl;
-	cout<<"The full path to the output file2: "<< outputPath2<<endl;
-	if (colIndex1 != -1) {cout<<"The optioanl column index starts from: "<< colIndex1<<endl;}	
-	if (colIndex2 != -1) {cout<<"The optioanl column index ends at: "<< colIndex2<<endl;}	
-
-	/**
-	 * Size of Dataset without the header (i.e.(#Rows in dataset)-1).
-	 */	
-	string cmd="wc -l "+filePath;
-	string outputCmd = exec(cmd.c_str());
-	const int N=stoi(outputCmd.substr(0, outputCmd.find(" ")))-1;
-	/**
-	 * Dimension of Dataset (#Columns)
-	 * is computed automatically if not passed as argument in command line
-	 * otherwise, the range (beginning and end) for the column index of input csv file is needed
-	 */
-	int Dim;
-	string cmd2="head -n 1 "+ filePath + " |tr '\\,' '\\n' |wc -l ";
-	Dim = stoi(exec(cmd2.c_str())); 
-
-	logFile<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-	cout<<"The input csv file contains "<<N<<" rows of raw data with "<< Dim<< " columns(features)"<<endl; 
-	/**
-	 * A 2D Array containing the entire input dataset (read from filePath).
-	 */
-	double** dataPoints = new double*[N];
-	for (int i = 0; i < N; ++i) { dataPoints[i] = new double[Dim]; }
-	/**
-	 * indices of K-NN for all the points in dataset
-	 */
-	int** B_Index = new int*[N];
-	for (int i = 0; i < N; ++i) { B_Index[i] = new int[K]; }
-	/**
-	 * corresponding distance for K-NN indices stored in B_Index
-	 */
-	double** B_Dist = new double*[N];
-	for (int i = 0; i < N; ++i) { B_Dist[i] = new double[K]; }
-	/**
-	 * corresponding flag for K-NN indices stored in B_Index
-	 */
-	short** B_IsNew = new short*[N];
-	for (int i = 0; i < N; ++i) { B_IsNew[i] = new short[K]; }
-	/**
-	 * Data structure for new[v]
-	 */
-	vector<int> *New_Index = new std::vector<int>[N];
-	/**
-	 * Data structure for REVERSE(new[v]) or new'
-	 */
-	vector<int> *Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for SAMPLE(new'[v],pk)
-	 */
-	vector<int> *Sampled_Reverse_New_Index = new vector<int>[N];
-	/**
-	 * Data Structure for new[v] U SAMPLE(new'[v],pk)
-	 */
-	list<int> *New_Final_List = new list<int>[N];
-	/**
-	 * Iterators to access data stored in the list
-	 */
-	list<int>::iterator it, it2, it_temp;
-	/**
-	 * An approximation of zero in computing distances. Two points with the distance
-	 * smaller than epsilon are considered as one point.
-	 */
-	double epsilon = 1e-10; //
-	short* allEntriesFilled = new short[N];
-	/**
-	 * At first, let's Read Dataset from Input File Using Memory Mapping
-	 */
-	mapped_file_source mmap(filePath);
-    stream<mapped_file_source> is(mmap, std::ios::binary);
-    if (is.fail())
-	{
-		logFile << "error in Opening Input File" << endl;
-		cout << "error in Opening Input File" << endl;
-		return 1;
-	}
-	/**
-	 * Remove the header info
-	 */
-	string dummyLine;
-	getline(is, dummyLine);
-	/**
-	 * Reading the Entire Dataset
-	 */
-	if (argc==11){
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				dataPoints[i][j] = atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}
-	} else {
-		for (int i = 0; i < N; ++i) {
-			string temp, temp2;
-			getline(is, temp);
-			for (int j = 0; j < Dim; ++j) {
-				temp2 = temp.substr(0, temp.find(","));
-				if (j >= colIndex1-1 && j < colIndex2) dataPoints[i][j] = atof(temp2.c_str());
-				temp.erase(0, temp.find(",") + 1);
-			}
-		}	
-	}
-	mmap.close();
-
-	if (colIndex1 != -1) Dim=colIndex2-colIndex1+1;
-	if (Dim <1) {
-		logFile << "Error in Computing the Dimension of input csv file" << endl;
-		cout << "Error in Computing the Dimension of input csv file" << endl;
-		return 1;	
-	}
-	/**
-	 * define a seed for random generator. Using a constant value produces
-	 * the same set of random numbers and is good for debugging. Alternatively,
-	 * we can select the seed number randomly as srand(time(NULL))
-	 */
-	srand(17);
-	/**
-	 * Initialization of Arrays B_IsNew and B_Dist
-	 */
-	for (int i = 0; i < N; ++i) {
-		allEntriesFilled[i]=0;
-		for (int j = 0; j < K; ++j) {
-			B_IsNew[i][j] = 1;
-			B_Dist[i][j] = -1.0;
-		}
-	}
-	/**
-	 * Random Initialization of B_Index
-	 */
-	int randomIndex, iter;
-	for (int i = 0; i < N; ++i) {
-		for (int j = 0; j < K; ++j) {
-			iter = 1;
-			while (iter) {
-				randomIndex = rand() % N;
-				if (randomIndex != i) {
-					B_Index[i][j] = randomIndex;
-					iter = 0;
-				}
-			}
-		}
-	}
-	/**
-	 * Replace the farthest point in B_Index (for u1) with u2 if u2 is closer
-	 * <p>
-	 * This method corresponds to UPDATENN(B[u1],<u2,l,true>) in the paper
-	 * </p>
-	 * @param  Dist  represents B_Dist
-	 * @param  Index represents B_Index
-	 * @param  IsNew represents B_IsNew
-	 * @param  u1    the indice of point that we want to potentially update its K-NN with the point u2
-	 * @param  u2    the indice of potential K-NN fpr point u1
-	 * @param  distance the spatial distance between u1 and u2
-	 * @param  flag updates B_IsNew
-	 * @return 1 if B_Index[u1][.] is updated, 0 otherwise
-	 */
-	auto UpdateNN = [&](int u1, int u2, double distance, int flag = 1) {
-
-		if(allEntriesFilled[u1]==0){		
-			for (int j = 0; j < K; j++) {	
-				if (B_Dist[u1][j] < 0) {
-
-					for (int jj = 0; jj < j; jj++) {if (B_Index[u1][jj] == u2) return 0;}
-
-					B_Dist[u1][j] = distance;
-					B_Index[u1][j] = u2;
-					B_IsNew[u1][j] = flag;
-					if (j==K-1) allEntriesFilled[u1]=1;
-					return 1;}
-			}
-		}
-
-		else{
-			for (int j = 0; j < K; j++) {
-				if (B_Index[u1][j] == u2) return 0;
-			}
-
-			double max = DBL_MIN;
-			int index = -1;
-			for (int j = 0; j < K; j++) {
-				if (B_Dist[u1][j] > max) {
-					max = B_Dist[u1][j];
-					index = j;
-				}
-			}
-			if (index == -1) { cout << "Error"; } 
-			if (distance < max) {
-				B_Dist[u1][index] = distance;
-				B_Index[u1][index] = u2;
-				B_IsNew[u1][index] = flag;
-				return 1;
-			}
-			else { return 0; }
-		}
-		return 0;
-	};
-	/**
-	 * Main Loop of the Algorithm
-	 */
-	bool iterate = true;
-	while (iterate) {
-		/**
-		 * Create "New" for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < K; ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					if (B_IsNew[i][j] == 1) {
-						New_Index[i].push_back(B_Index[i][j]);
-						B_IsNew[i][j] = 0;
-					}
-				}
-			}
-		}
-		/**
-		 * Create "New'"(or REVERSE("New")) for each Datapoint
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				Reverse_New_Index[New_Index[i][j]].push_back(i);
-			}
-		}
-		/**
-		 * Random Sampling from "New'"
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < Reverse_New_Index[i].size(); ++j) {
-				if (float(rand() % 100) < sampleRate*100) {
-					Sampled_Reverse_New_Index[i].push_back(Reverse_New_Index[i][j]);
-				}
-			}
-		}
-		/**
-		 * "New"= "New" U SAMPLE("New'", pK)
-		 */
-		for (int i = 0; i < N; ++i) {
-			for (int j = 0; j < New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(New_Index[i][j]);
-			}
-			for (int j = 0; j < Sampled_Reverse_New_Index[i].size(); ++j) {
-				New_Final_List[i].push_back(Sampled_Reverse_New_Index[i][j]);
-			}
-		}
-		/**
-		 * c=c+UPDATENN(B[u1],<u2,l,true>)
-		 */
-		int c_criteria = 0;
-		for (int i = 0; i < N; ++i) {
-			for (it = New_Final_List[i].begin(); it != New_Final_List[i].end(); it++) {
-				it_temp = it;
-				advance(it_temp, 1);
-				for (it2 = it_temp; it2 != New_Final_List[i].end(); it2++) {
-					if (*it != *it2) {
-						double dist = 0;
-						for (int i = 0; i < Dim; ++i) {
-							dist += pow((dataPoints[*it][i] - dataPoints[*it2][i]), 2);
-						}
-						double dista = sqrt(dist);
-						if (dista < epsilon) {
-							logFile << "Found Duplicate Data for Points "<< *it << " and " << *it2<<endl;; 
-							cout << "Found Duplicate Data for Points "<< *it << " and " << *it2<<endl; 
-							return 1;
-						}
-
-						c_criteria += UpdateNN(*it, *it2, dista, 1);
-						c_criteria += UpdateNN(*it2, *it, dista, 1);
-					}
-				}
-			}
-		}
-		logFile << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		cout << "c_criteria = " << c_criteria << " With Threshold Convergence of " << convThreshold << endl;
-		if (c_criteria < convThreshold) { iterate = false; }
-		/**
-		 * Clear the contents of the used data structures
-		 */
-		for (int i = 0; i < N; ++i) {
-			New_Index[i].clear();
-			Reverse_New_Index[i].clear();
-			Sampled_Reverse_New_Index[i].clear();
-			New_Final_List[i].clear();
-		}
-	}
-	/**
-	 * Sort and output the results
-	 */
-	ofstream outputFileIndex,outputFileDistance;
-	outputFileIndex.open(outputPath);	
-	outputFileDistance.open(outputPath2);
-
-	for (int i=0; i<N; ++i){
-		vector<pair<double,int>> aggregateResults;
-		for (int j=0; j<K; ++j){
-			aggregateResults.push_back(make_pair(B_Dist[i][j], B_Index[i][j]));
-		}		
-		sort(aggregateResults.begin(), aggregateResults.end());	
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileIndex<<aggregateResults[j].second<<",";}
-			else {outputFileIndex<<aggregateResults[j].second<<endl; }
-		}
-
-		for (int j=0; j<K; ++j){
-			if (j != K-1) {outputFileDistance<<aggregateResults[j].first<<",";}
-			else {outputFileDistance<<aggregateResults[j].first<<endl; }
-		}
-	}
-
-	outputFileIndex.close();
-	outputFileDistance.close();
-	logFile.close();
-	/**
-	 * copy Logfile to the file system which could be accessed outside the docker container
-	 */ 
-	string cmd3="cp "+ logFileName+"  "+LogoutputPath;
-	string outputCmd3 = exec(cmd3.c_str());
-
-	return 0;
-}
-
-
diff --git a/clustering/K-NN/Shared-Memory-Serial/VERSION b/clustering/K-NN/Shared-Memory-Serial/VERSION
deleted file mode 100644
index 6e8bf73aa..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.0
diff --git a/clustering/K-NN/Shared-Memory-Serial/ict.yaml b/clustering/K-NN/Shared-Memory-Serial/ict.yaml
deleted file mode 100644
index bfb51259a..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/ict.yaml
+++ /dev/null
@@ -1,62 +0,0 @@
-author:
-- Mahdi Maghrebi
-contact: author@ict.com
-container: labshare/polus-knn-plugin:serial-0.1.0
-description: K-Nearest Neighbors Serial Code
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input csv file containing the raw data
-  format:
-  - csvCollection
-  name: inputPath
-  required: true
-  type: path
-- description: The desired number of Nearest Neighbors (NN) to be computed
-  format:
-  - number
-  name: K
-  required: true
-  type: number
-- description: The rate at which the sampling is conducted. The values closer to 1
-    provides more accurate results but the execution takes longer.
-  format:
-  - number
-  name: sampleRate
-  required: true
-  type: number
-- description: The threshold for the convergence
-  format:
-  - number
-  name: convThreshold
-  required: true
-  type: number
-name: labshare/K-NN(Serial)
-outputs:
-- description: The full path to the output csv collections containing indices and
-    distances of K-NNs
-  format:
-  - csvCollection
-  name: outputPath
-  required: true
-  type: path
-repository: https://github.com/polusai/image-tools
-specVersion: 1.0.0
-title: K-NN (Serial)
-ui:
-- description: Name of the input CSV collection
-  key: inputs.inputPath
-  title: The path to the input csv collection
-  type: path
-- description: Insert an integer
-  key: inputs.K
-  title: The desired number of Nearest Neighbors (NN) to be computed
-  type: number
-- description: Insert a value between 0 and 1
-  key: inputs.sampleRate
-  title: 'The sampling rate '
-  type: number
-- description: Insert an integer. Smaller values result in more accurate solutions.
-  key: inputs.convThreshold
-  title: The threshold for the convergence
-  type: number
-version: 0.1.0
diff --git a/clustering/K-NN/Shared-Memory-Serial/k-nn(serial).cwl b/clustering/K-NN/Shared-Memory-Serial/k-nn(serial).cwl
deleted file mode 100644
index 50835dc48..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/k-nn(serial).cwl
+++ /dev/null
@@ -1,28 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  K:
-    inputBinding:
-      prefix: --K
-    type: double
-  convThreshold:
-    inputBinding:
-      prefix: --convThreshold
-    type: double
-  inputPath:
-    inputBinding:
-      prefix: --inputPath
-    type: Directory
-  outputPath:
-    inputBinding:
-      prefix: --outputPath
-    type: Directory
-  sampleRate:
-    inputBinding:
-      prefix: --sampleRate
-    type: double
-outputs:
-  outputPath: !!python/name:builtins.NotImplementedError ''
-requirements:
-  DockerRequirement:
-    dockerPull: labshare/polus-knn-plugin:serial-0.1.0
diff --git a/clustering/K-NN/Shared-Memory-Serial/plugin.json b/clustering/K-NN/Shared-Memory-Serial/plugin.json
deleted file mode 100644
index 889c06cba..000000000
--- a/clustering/K-NN/Shared-Memory-Serial/plugin.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "name": "K-NN (Serial)",
-    "version": "0.1.0",
-    "title": "K-NN (Serial)",
-    "description": "K-Nearest Neighbors Serial Code",
-    "author": "Mahdi Maghrebi",
-    "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-    "containerId": "labshare/polus-knn-plugin:serial-0.1.0",
-    "inputs": [
-        {
-            "name": "inputPath",
-            "type": "csvCollection",
-            "description": "Input csv file containing the raw data",
-            "required": "True"
-        },
-        {
-            "name": "K",
-            "type": "number",
-            "description": "The desired number of Nearest Neighbors (NN) to be computed",
-            "required": "True"
-        },
-        {
-            "name": "sampleRate",
-            "type": "number",
-            "description": "The rate at which the sampling is conducted. The values closer to 1 provides more accurate results but the execution takes longer.",
-            "required": "True"
-        },
-        {
-            "name": "convThreshold",
-            "type": "number",
-            "description": "The threshold for the convergence",
-            "required": "True"
-        }
-    ],
-    "outputs": [
-        {
-            "name": "outputPath",
-            "type": "csvCollection",
-            "description": "The full path to the output csv collections containing indices and distances of K-NNs"
-        }
-
-    ],
-    "ui": [
-        {
-            "key": "inputs.inputPath",
-			"title": "The path to the input csv collection",
-			"description": "Name of the input CSV collection"
-        },
-        {
-            "key": "inputs.K",
-            "title": "The desired number of Nearest Neighbors (NN) to be computed",
-            "description": "Insert an integer"
-        },
-        {
-            "key": "inputs.sampleRate",
-            "title": "The sampling rate ",
-            "description": "Insert a value between 0 and 1"
-        },
-        {
-            "key": "inputs.convThreshold",
-            "title": "The threshold for the convergence",
-            "description": "Insert an integer. Smaller values result in more accurate solutions."
-        }
-    ]
-}
diff --git a/clustering/k-means-clustering-tool/.bumpversion.cfg b/clustering/k-means-clustering-tool/.bumpversion.cfg
deleted file mode 100644
index 55760884f..000000000
--- a/clustering/k-means-clustering-tool/.bumpversion.cfg
+++ /dev/null
@@ -1,31 +0,0 @@
-[bumpversion]
-current_version = 0.3.5
-commit = True
-tag = False
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
-serialize = 
-	{major}.{minor}.{patch}-{release}{dev}
-	{major}.{minor}.{patch}
-
-[bumpversion:part:release]
-optional_value = _
-first_value = dev
-values = 
-	dev
-	_
-
-[bumpversion:part:dev]
-
-[bumpversion:file:pyproject.toml]
-search = version = "{current_version}"
-replace = version = "{new_version}"
-
-[bumpversion:file:plugin.json]
-
-[bumpversion:file:VERSION]
-
-[bumpversion:file:README.md]
-
-[bumpversion:file:CHANGELOG.md]
-
-[bumpversion:file:src/polus/images/clustering/k_means/__init__.py]
diff --git a/clustering/k-means-clustering-tool/CHANGELOG.md b/clustering/k-means-clustering-tool/CHANGELOG.md
deleted file mode 100644
index 5689120f4..000000000
--- a/clustering/k-means-clustering-tool/CHANGELOG.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# K-Means Clustering(0.3.5)
-
-1. This plugin is updated only to the new plugin standards
-2. Before plugin support only `.csv` as an input files supported  `.csv` and `.feather` file formats. Now this plugin support other vaex supported file formats both as inputs and outputs.
-3. Some additional input arguments added `filePattern`, `fileExtension`
-4. Implemented latest updated filepattern package
-5. This plugin is now installable with pip.
-6. Argparse package is replaced with Typer package for command line arguments.
-7. `baseCommand` added in a plugin manifiest.
-8. `--preview` flag is added which shows outputs to be generated by this plugin.
-9. Use `python -m python -m polus.plugins.clustering.k_means` to run plugin from command line.
-10. No unnitests before and new  pytests added for testing.
-11. Implemented parallel processing
diff --git a/clustering/k-means-clustering-tool/Dockerfile b/clustering/k-means-clustering-tool/Dockerfile
deleted file mode 100644
index 1d0d53a9e..000000000
--- a/clustering/k-means-clustering-tool/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM polusai/bfio:2.1.9
-
-# environment variables defined in polusai/bfio
-ENV EXEC_DIR="/opt/executables"
-ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".arrow"
-ENV POLUS_LOG="INFO"
-
-# Work directory defined in the base container
-WORKDIR ${EXEC_DIR}
-
-# TODO: Change the tool_dir to the tool directory
-ENV TOOL_DIR="clustering/k-means-clustering-tool"
-
-# Copy the repository into the container
-RUN mkdir image-tools
-COPY . ${EXEC_DIR}/image-tools
-
-# Install the tool
-RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir
-
-# Set the entrypoint
-# TODO: Change the entrypoint to the tool entrypoint
-ENTRYPOINT ["python3", "-m", "polus.images.clustering.k_means_clustering"]
-CMD ["--help"]
diff --git a/clustering/k-means-clustering-tool/README.md b/clustering/k-means-clustering-tool/README.md
deleted file mode 100644
index 5dd8ec300..000000000
--- a/clustering/k-means-clustering-tool/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-﻿# K-Means Clustering(v0.3.5)
-
-The K-Means Clustering plugin clusters the data using Scikit-learn K-Means clustering algorithm and outputs csv file. Each instance(row) in the input csv file is assigned to one of the clusters. The output csv file contains the column 'Cluster' that shows which cluster the instance belongs to.
-
-## Inputs:
-
-### Input data:
-The input tabular data that need to be clustered. This plugin supports `.csv` and `.arrow` file formats
-
-### Methods:
-Choose any one of the method mentioned to determine the k-value and cluster the data.
-
-#### Elbow method
-The elbow method runs k-means clustering for a range of values of k and for each k value it calculates the within cluster sum of squared errors (WSS).  The idea behind this method is that SSE tends to decrease towards 0 as k-value increases. The goal here is to choose a k-value that has low WSS and the elbow represents where there is diminishing returns by increasing k.
-
-#### Calinski-Harabasz index
-The Calinski-Harabasz index is defined as the ratio of the sum of between-cluster dispersion to the sum of within-cluster dispersion. To choose k, pick maximum number of clusters to be considered and then choose the value of k with the highest score.
-
-#### Davies-Bouldin index
-The Davies-Bouldin index is defined as the average similarity measure of each cluster with its most similar one, where similarity is a ratio of within-cluster distances to between-cluster distances. To choose k value, pick maximum number of clusters to be considered and choose the value of k with lowest value for DB_index.
-
-### Manual
-Select manual method only when you know the number of clusters required to cluster the data.
-
-### Minimum range:
-Enter starting number of sequence in range function to determine k-value. This parameter is required only when elbow or Calinski Harabasz or Davies Bouldin methods are selected.
-
-### Maximum range:
-Enter ending number of sequence in range function to determine k-value. This parameter is required only when elbow or Calinski Harabasz or Davies Bouldin methods are selected.
-
-### Number of clusters:
-Enter k-value if you already know how many clusters are required. This parameter is required only when manual method is selected.
-
-## Note:
-1. If 'Manual' method is selected, enter number of clusters required.
-2. If 'Elbow' or 'CalinskiHarabasz' or 'DaviesBouldin' methods are selected, then you should enter values for both 'maximumrange' and 'minimumrange'.
-3. The 'minimumrange'value should be >1.
-
-## Output:
-The output is a tabular file containing the cluster data to which each instance in the data belongs to.
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Options
-
-This plugin takes seven input arguments and one output argument:
-
-| Name             | Description                                                                 | I/O    | Type          |
-| ---------------- | --------------------------------------------------------------------------- | ------ | ------------- |
-| `--inpdir`       | Input tabular data                                                          | Input  | genericData   |
-| `--filePattern`  | Pattern to parse tabular files                                              | Input  | string        |
-| `--methods`      | Select either Elbow or Calinski Harabasz or Davies Bouldin or Manual method | Input  | enum          |
-| `--minimumrange` | Enter minimum k-value                                                       | Input  | integer       |
-| `--maximumrange` | Enter maximum k-value                                                       | Input  | integer       |
-| `--numofclus`    | Enter number of clusters                                                    | Input  | integer       |
-| `--outdir`       | Output collection                                                           | Output | genericData   |
-| `--preview`      | Generate JSON file with outputs                                             | Output | JSON          |
diff --git a/clustering/k-means-clustering-tool/VERSION b/clustering/k-means-clustering-tool/VERSION
deleted file mode 100644
index c2c0004f0..000000000
--- a/clustering/k-means-clustering-tool/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.3.5
diff --git a/clustering/k-means-clustering-tool/build-docker.sh b/clustering/k-means-clustering-tool/build-docker.sh
deleted file mode 100644
index 4dedab2f6..000000000
--- a/clustering/k-means-clustering-tool/build-docker.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#!/bin/bash
-
-# Change the name of the tool here
-tool_dir="clustering"
-tool_name="k-means-clustering-tool"
-
-# The version is read from the VERSION file
-version=$(<VERSION)
-tag="polusai/${tool_name}:${version}"
-echo "Building docker image with tag: ${tag}"
-
-# The current directory and the repository root are saved in variables
-cur_dir=$(pwd)
-repo_root=$(git rev-parse --show-toplevel)
-
-# The Dockerfile and .dockerignore files are copied to the repository root before building the image
-cd ${repo_root}
-cp ./${tool_dir}/${tool_name}/Dockerfile .
-cp .gitingore .dockerignore
-docker build . -t ${tag}
-rm Dockerfile .dockerignore
-cd ${cur_dir}
diff --git a/clustering/k-means-clustering-tool/ict.yaml b/clustering/k-means-clustering-tool/ict.yaml
deleted file mode 100644
index 980ba2656..000000000
--- a/clustering/k-means-clustering-tool/ict.yaml
+++ /dev/null
@@ -1,97 +0,0 @@
-author:
-- Jayapriya Nagarajan
-- Kelechi Nina
-- Hamdah Shafqat
-contact: jayapriya.nagarajan@nih.gov
-container: polusai/k-means-clustering-tool:0.3.5-dev0
-description: Cluster the data using K-Means.
-entrypoint: python3 -m polus.images.clustering.k_means
-inputs:
-- description: Input tabular data
-  format:
-  - genericData
-  name: inpDir
-  required: true
-  type: path
-- description: Pattern to parse input files
-  format:
-  - string
-  name: filePattern
-  required: false
-  type: string
-- description: Select Manual or Elbow or Calinski Harabasz or Davies Bouldin methods
-  format:
-  - enum
-  name: methods
-  required: true
-  type: string
-- description: 'Enter minimum k-value:'
-  format:
-  - number
-  name: minimumRange
-  required: false
-  type: number
-- description: 'Enter maximum k-value:'
-  format:
-  - number
-  name: maximumRange
-  required: false
-  type: number
-- description: 'Number of clusters:'
-  format:
-  - number
-  name: numOfClus
-  required: false
-  type: number
-- description: Output a JSON preview of outputs produced by this plugin
-  format:
-  - boolean
-  name: preview
-  required: false
-  type: boolean
-name: polusai/K-MeansClustering
-outputs:
-- description: Output collection
-  format:
-  - genericData
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/polusai/polus-plugins
-specVersion: 1.0.0
-title: K-Means Clustering
-ui:
-- description: Input tabular data for clustering
-  key: inputs.inpDir
-  title: Input tabular data
-  type: path
-- description: Pattern to parse input files
-  key: inputs.filePattern
-  title: FilePattern
-  type: text
-- description: Select Manual or Elbow or Calinski Harabasz or Davies Bouldin methods
-  fields:
-  - Manual
-  - Elbow
-  - CalinskiHarabasz
-  - DaviesBouldin
-  - default
-  key: inputs.methods
-  title: Cluster data based on the methods selected to determine k-value
-  type: select
-- condition: inputs.methods==DaviesBouldin
-  description: 'Enter minimum k-value:'
-  key: inputs.minimumRange
-  title: Enter minimum range
-  type: number
-- condition: inputs.methods==DaviesBouldin
-  description: 'Enter maximum k-value:'
-  key: inputs.maximumRange
-  title: Enter maximum range
-  type: number
-- condition: inputs.methods==Manual
-  description: 'Number of clusters:'
-  key: inputs.numOfClus
-  title: Enter number of clusters
-  type: number
-version: 0.3.5-dev0
diff --git a/clustering/k-means-clustering-tool/k-meansclustering.cwl b/clustering/k-means-clustering-tool/k-meansclustering.cwl
deleted file mode 100644
index 414df644c..000000000
--- a/clustering/k-means-clustering-tool/k-meansclustering.cwl
+++ /dev/null
@@ -1,48 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  filePattern:
-    inputBinding:
-      prefix: --filePattern
-    type: string?
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  maximumRange:
-    inputBinding:
-      prefix: --maximumRange
-    type: double?
-  methods:
-    inputBinding:
-      prefix: --methods
-    type: string
-  minimumRange:
-    inputBinding:
-      prefix: --minimumRange
-    type: double?
-  numOfClus:
-    inputBinding:
-      prefix: --numOfClus
-    type: double?
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  preview:
-    inputBinding:
-      prefix: --preview
-    type: boolean?
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/k-means-clustering-tool:0.3.5-dev0
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/clustering/k-means-clustering-tool/plugin.json b/clustering/k-means-clustering-tool/plugin.json
deleted file mode 100644
index f6b507e01..000000000
--- a/clustering/k-means-clustering-tool/plugin.json
+++ /dev/null
@@ -1,114 +0,0 @@
-{
-  "name": "K-Means Clustering",
-  "version": "0.3.5",
-  "title": "K-Means Clustering",
-  "description": "Cluster the data using K-Means.",
-  "author": "Jayapriya Nagarajan (jayapriya.nagarajan@nih.gov), Kelechi Nina Mezu (nina.mezu@nih.gov), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/polusai/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/k-means-clustering-tool:0.3.5",
-  "baseCommand": [
-    "python3",
-    "-m",
-    "polus.images.clustering.k_means"
-  ],
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "genericData",
-      "description": "Input tabular data",
-      "required": "true"
-    },
-    {
-      "name": "filePattern",
-      "type": "string",
-      "description": "Pattern to parse input files",
-      "required": false
-    },
-    {
-      "name": "methods",
-      "type": "enum",
-      "description": "Select Manual or Elbow or Calinski Harabasz or Davies Bouldin methods",
-      "options": {
-        "values": [
-          "Manual",
-          "Elbow",
-          "CalinskiHarabasz",
-          "DaviesBouldin",
-          "default"
-        ]
-      },
-      "required": "true"
-    },
-    {
-      "name": "minimumRange",
-      "type": "number",
-      "description": "Enter minimum k-value:",
-      "required": "false"
-    },
-    {
-      "name": "maximumRange",
-      "type": "number",
-      "description": "Enter maximum k-value:",
-      "required": "false"
-    },
-    {
-      "name": "numOfClus",
-      "type": "number",
-      "description": "Number of clusters:",
-      "required": "false"
-    },
-    {
-      "name": "preview",
-      "type": "boolean",
-      "description": "Output a JSON preview of outputs produced by this plugin",
-      "required": "false"
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "description": "Output collection",
-      "type": "genericData",
-      "required": true
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Input tabular data",
-      "description": "Input tabular data for clustering"
-    },
-    {
-      "key": "inputs.filePattern",
-      "title": "FilePattern",
-      "description": "Pattern to parse input files",
-      "default": ".+"
-    },
-    {
-      "key": "inputs.methods",
-      "title": "Cluster data based on the methods selected to determine k-value",
-      "description": "Select Manual or Elbow or Calinski Harabasz or Davies Bouldin methods"
-    },
-    {
-      "key": "inputs.minimumRange",
-      "title": "Enter minimum range",
-      "description": "Enter minimum k-value:",
-      "condition": "model.inputs.methods==Elbow or model.inputs.methods==CalinskiHarabasz or model.inputs.methods==DaviesBouldin"
-    },
-    {
-      "key": "inputs.maximumRange",
-      "title": "Enter maximum range",
-      "description": "Enter maximum k-value:",
-      "condition": "model.inputs.methods==Elbow or model.inputs.methods==CalinskiHarabasz or model.inputs.methods==DaviesBouldin"
-    },
-    {
-      "key": "inputs.numOfClus",
-      "title": "Enter number of clusters",
-      "description": "Number of clusters:",
-      "condition": "model.inputs.methods==Manual"
-    }
-  ]
-}
diff --git a/clustering/k-means-clustering-tool/pyproject.toml b/clustering/k-means-clustering-tool/pyproject.toml
deleted file mode 100644
index 39e182234..000000000
--- a/clustering/k-means-clustering-tool/pyproject.toml
+++ /dev/null
@@ -1,35 +0,0 @@
-[tool.poetry]
-name = "polus-images-clustering-k-means"
-version = "0.3.5"
-description = ""
-authors = [
-"Jayapriya Nagarajan <jayapriya.nagarajan@nih.gov>",
-"Kelechi Nina Mezu <nina.mezu@nih.gov>",
-"hamshkhawar <hamdahshafqat.abbasi@nih.gov>"
-]
-readme = "README.md"
-packages = [{include = "polus", from = "src"}]
-
-[tool.poetry.dependencies]
-python = "^3.9"
-filepattern = "^2.0.0"
-typer = "^0.7.0"
-nyxus = "^0.5.0"
-vaex = "^4.7.0"
-scikit_learn="^1.0.2"
-numpy = "<2.0.0"
-
-[tool.poetry.group.dev.dependencies]
-bump2version = "^1.0.1"
-pre-commit = "^3.0.4"
-black = "^23.1.0"
-flake8 = "^6.0.0"
-mypy = "^1.0.0"
-pytest = "^7.2.1"
-ipykernel = "^6.21.2"
-requests = "^2.28.2"
-pandas = "^2.0.1"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/clustering/k-means-clustering-tool/run-plugin.sh b/clustering/k-means-clustering-tool/run-plugin.sh
deleted file mode 100644
index 18c8bea46..000000000
--- a/clustering/k-means-clustering-tool/run-plugin.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-datapath=$(readlink --canonicalize data)
-
-# Inputs
-inp_dir=/data/inputs
-
-# Output paths
-outDir=/data/output
-
-#Additional args
-filePattern=".csv"
-methods=Elbow
-minimumrange=2
-maximumrange=10
-numofclus=3
-
-# Show the help options
-# docker run polusai/k-means-clustering-plugin:${version}
-
-docker run --mount type=bind,source=${datapath},target=/data/ \
-            polusai/k-means-clustering-plugin:${version} \
-            --inpdir ${inpDir} \
-            --filePattern ${filePattern} \
-            --methods ${methods} \
-            --minimumrange ${minimumrange} \
-            --maximumrange ${maximumrange} \
-            --outdir ${outDir} \
diff --git a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__init__.py b/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__init__.py
deleted file mode 100644
index 36136f90c..000000000
--- a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""K_means clustering."""
-
-__version__ = "0.3.5"
diff --git a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__main__.py b/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__main__.py
deleted file mode 100644
index ab977b429..000000000
--- a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/__main__.py
+++ /dev/null
@@ -1,119 +0,0 @@
-"""K_means clustering."""
-import json
-import logging
-import multiprocessing
-import os
-import pathlib
-from functools import partial
-from multiprocessing import cpu_count
-from typing import Any
-from typing import Optional
-
-import filepattern as fp
-import typer
-from polus.images.clustering.k_means import k_means as km
-
-# Initialize the logger
-logging.basicConfig(
-    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
-    datefmt="%d-%b-%y %H:%M:%S",
-)
-logger = logging.getLogger("polus.images.clustering.k_means")
-logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
-POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".arrow")
-
-app = typer.Typer()
-
-
-@app.command()
-def main(
-    inp_dir: pathlib.Path = typer.Option(
-        ...,
-        "--inpDir",
-        help="Input collection-Data need to be clustered",
-    ),
-    file_pattern: str = typer.Option(
-        ".+",
-        "--filePattern",
-        help="pattern to parse tabular files",
-    ),
-    methods: km.Methods = typer.Option(
-        km.Methods.Default,
-        "--methods",
-        help="Select Manual or Elbow or Calinski Harabasz or Davies Bouldin method",
-    ),
-    minimum_range: int = typer.Option(
-        ...,
-        "--minimumRange",
-        help="Enter minimum k-value",
-    ),
-    maximum_range: int = typer.Option(
-        ...,
-        "--maximumRange",
-        help="Enter maximum k-value",
-    ),
-    num_of_clus: int = typer.Option(..., "--numOfClus", help="Number of clusters"),
-    out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"),
-    preview: Optional[bool] = typer.Option(
-        False,
-        "--preview",
-        help="Output a JSON preview of outputs produced by this plugin",
-    ),
-) -> None:
-    """K-means clustering plugin."""
-    logger.info(f"inpDir = {inp_dir}")
-    logger.info(f"filePattern = {file_pattern}")
-    logger.info(f"minimumRange = {minimum_range}")
-    logger.info(f"maximumRange = {maximum_range}")
-    logger.info(f"numOfClus = {num_of_clus}")
-    logger.info(f"outDir = {out_dir}")
-
-    assert inp_dir.exists(), f"{inp_dir} doesnot exist!! Please check input path again"
-    assert out_dir.exists(), f"{out_dir} doesnot exist!! Please check output path again"
-    assert file_pattern in [
-        ".csv",
-        ".arrow",
-    ], f"{file_pattern} tabular files are not supported by this plugin"
-
-    num_threads = max([cpu_count(), 2])
-
-    pattern = ".*" + file_pattern
-    fps = fp.FilePattern(inp_dir, pattern)
-    print(pattern)
-
-    if not fps:
-        msg = f"No {file_pattern} files found."
-        raise ValueError(msg)
-
-    if preview:
-        with open(pathlib.Path(out_dir, "preview.json"), "w") as jfile:
-            out_json: dict[str, Any] = {
-                "filepattern": pattern,
-                "outDir": [],
-            }
-            for file in fps():
-                out_name = str(file[1][0].stem) + POLUS_TAB_EXT
-                out_json["outDir"].append(out_name)
-            json.dump(out_json, jfile, indent=2)
-
-    flist = [f[1][0] for f in fps()]
-
-    with multiprocessing.Pool(processes=num_threads) as executor:
-        executor.map(
-            partial(
-                km.clustering,
-                file_pattern=pattern,
-                methods=methods,
-                minimum_range=minimum_range,
-                maximum_range=maximum_range,
-                num_of_clus=num_of_clus,
-                out_dir=out_dir,
-            ),
-            flist,
-        )
-        executor.close()
-        executor.join()
-
-
-if __name__ == "__main__":
-    app()
diff --git a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/k_means.py b/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/k_means.py
deleted file mode 100644
index 3e58f8a33..000000000
--- a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/k_means.py
+++ /dev/null
@@ -1,216 +0,0 @@
-"""K_means clustering."""
-import logging
-import os
-import pathlib
-from typing import Optional
-
-import numpy
-import numpy as np
-import numpy.matlib
-import vaex
-from sklearn.cluster import KMeans
-from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score
-
-from .utils import Methods
-
-# Initialize the logger
-logging.basicConfig(
-    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
-    datefmt="%d-%b-%y %H:%M:%S",
-)
-logger = logging.getLogger("main")
-logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
-POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".arrow")
-
-
-def elbow(data_array: np.array, minimum_range: int, maximum_range: int) -> np.array:
-    """Determine k value and cluster data using elbow method.
-
-    Args:
-        data_array : Input data.
-        minimum_range : Starting number of sequence in range function to determine k-value.
-        maximum_range : Ending number of sequence in range function to determine k-value.
-
-    Returns:
-        Labeled data.
-    """
-    sse = []
-    label_value = []
-    logger.info("Starting Elbow Method...")
-    K = range(minimum_range, maximum_range + 1)
-    for k in K:
-        kmeans = KMeans(n_clusters=k, random_state=9).fit(data_array)
-        centroids = kmeans.cluster_centers_
-        pred_clusters = kmeans.predict(data_array)
-        curr_sse = 0
-
-        # calculate square of Euclidean distance of each point from its cluster center and add to current WSS
-        logger.info("Calculating Euclidean distance...")
-        for i in range(len(data_array)):
-            curr_center = centroids[pred_clusters[i]]
-            curr_sse += np.linalg.norm(data_array[i] - np.array(curr_center)) ** 2
-        sse.append(curr_sse)
-        labels = kmeans.labels_
-        label_value.append(labels)
-
-    logger.info("Finding elbow point in curve...")
-    # Find the elbow point in the curve
-    points = len(sse)
-    # Get coordinates of all points
-    coord = np.vstack((range(points), sse)).T
-    # First point
-    f_point = coord[0]
-    # Vector between first and last point
-    linevec = coord[-1] - f_point
-    # Normalize the line vector
-    linevecn = linevec / np.sqrt(np.sum(linevec**2))
-    # Vector between all point and first point
-    vecf = coord - f_point
-    # Parallel vector
-    prod = np.sum(vecf * numpy.matlib.repmat(linevecn, points, 1), axis=1)
-    vecfpara = np.outer(prod, linevecn)
-    # Perpendicular vector
-    vecline = vecf - vecfpara
-    # Distance from curve to line
-    dist = np.sqrt(np.sum(vecline**2, axis=1))
-    # Maximum distance point
-    k_cluster = np.argmax(dist) + minimum_range
-    logger.info("k cluster: %s", k_cluster)
-    logger.info("label value: %s", label_value)
-    logger.info("Setting label_data")
-    label_data = label_value[k_cluster]
-    return label_data
-
-
-def calinski_davies(
-    data_array: np.array, methods: Methods, minimum_range: int, maximum_range: int
-) -> np.array:
-    """Determine k value and cluster data using Calinski Harabasz Index method or Davies Bouldin based on method selection.
-
-    Args:
-        data: Input data.
-        methods: Select either Calinski Harabasz or Davies Bouldin method.
-        minimum_range: Starting number of sequence in range function to determine k-value.
-        maximum_range:Ending number of sequence in range function to determine k-value.
-
-    Returns:
-        Labeled data.
-    """
-    K = range(minimum_range, maximum_range + 1)
-    chdb = []
-    label_value = []
-    for k in K:
-        kmeans = KMeans(n_clusters=k, random_state=9).fit(data_array)
-        labels = kmeans.labels_
-        label_value.append(labels)
-        if f"{methods}" == "CalinskiHarabasz":
-            ch_db = calinski_harabasz_score(data_array, labels)
-        else:
-            ch_db = davies_bouldin_score(data_array, labels)
-        chdb.append(ch_db)
-    if f"{methods}" == "CalinskiHarabasz":
-        score = max(chdb)
-    else:
-        score = min(chdb)
-    k_cluster = chdb.index(score)
-    label_data = label_value[k_cluster]
-    return label_data
-
-
-def clustering(
-    file: pathlib.Path,
-    file_pattern: str,
-    methods: Methods,
-    minimum_range: int,
-    maximum_range: int,
-    num_of_clus: int,
-    out_dir: pathlib.Path,
-):
-    """K-means clustering methods to find clusters of similar or more related objects.
-
-    Args:
-        file: Input path.
-        file_pattern: Pattern to parse tabular files.
-        methods: Select either Calinski Harabasz or Davies Bouldin method or Manual.
-        minimum_range: Starting number of sequence in range function to determine k-value.
-        maximum_range:Ending number of sequence in range function to determine k-value.
-    """
-    # Get file name
-    filename = file.stem
-    logger.info("Started reading the file " + file.name)
-    with open(file, encoding="utf-8", errors="ignore") as fr:
-        ncols = len(fr.readline().split(","))
-    chunk_size = max([2**24 // ncols, 1])
-
-    if f"{file_pattern}" == ".csv":
-        df = vaex.read_csv(file, convert=True, chunk_size=chunk_size)
-    else:
-        df = vaex.open(file)
-    # Get list of column names
-    cols = df.get_column_names()
-
-    # Separate data by categorical and numerical data types
-    numerical = []
-    categorical = []
-    for col in cols:
-        if df[col].dtype == str:
-            categorical.append(col)
-        else:
-            numerical.append(col)
-    # Remove label field
-    if "label" in numerical:
-        numerical.remove("label")
-
-    if numerical is None:
-        raise ValueError("There are no numerical features in the data.")
-    else:
-        data = df[numerical]
-
-    if categorical:
-        cat_array = df[categorical]
-    else:
-        logger.info("No categorical features found in the data")
-
-    if f"{methods}" != "Manual":
-        # Check whether minimum range and maximum range value is entered
-        if methods and not (minimum_range or maximum_range):
-            raise ValueError(
-                "Enter both minimumrange and maximumrange to determine k-value."
-            )
-        if minimum_range <= 1:
-            raise ValueError("Minimumrange should be greater than 1.")
-        logger.info(
-            "Determining k-value using " + methods + " and clustering the data."
-        )
-        if f"{methods}" == "CalinskiHarabasz":
-            label_data = calinski_davies(data, methods, minimum_range, maximum_range)
-        if f"{methods}" == "DaviesBouldin":
-            label_data = calinski_davies(data, methods, minimum_range, maximum_range)
-        if f"{methods}" == "Elbow":
-            label_data = elbow(data, minimum_range, maximum_range)
-    else:
-        # Check whether numofclus is entered
-        if not num_of_clus:
-            raise ValueError("Enter number of clusters")
-        kvalue = num_of_clus
-        kmeans = KMeans(n_clusters=kvalue).fit(data)
-        label_data = kmeans.labels_
-
-    # Cluster data using K-Means clustering
-    logger.info("Adding Cluster Data")
-    data["Cluster"] = label_data
-
-    # Add Categorical Data back to data processed
-    if categorical:
-        logger.info("Adding categorical data")
-        for col in categorical:
-            data[col] = cat_array[col].values
-
-    # Save dataframe to feather file or to csv file
-    out_file = pathlib.Path(out_dir, (filename + POLUS_TAB_EXT))
-
-    if f"{POLUS_TAB_EXT}" in [".feather", ".arrow"]:
-        data.export_feather(out_file)
-    else:
-        logger.info("Saving csv file")
-        data.export_csv(out_file, chunk_size=chunk_size)
diff --git a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/utils.py b/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/utils.py
deleted file mode 100644
index 91bb81bf8..000000000
--- a/clustering/k-means-clustering-tool/src/polus/images/clustering/k_means/utils.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""K_means clustering."""
-import enum
-
-
-class Methods(str, enum.Enum):
-    """Clustering methods to determine k-value."""
-
-    ELBOW = "Elbow"
-    CALINSKIHARABASZ = "CalinskiHarabasz"
-    DAVIESBOULDIN = "DaviesBouldin"
-    MANUAL = "Manual"
-    Default = "Elbow"
diff --git a/clustering/k-means-clustering-tool/tests/__init__.py b/clustering/k-means-clustering-tool/tests/__init__.py
deleted file mode 100644
index 36f89f937..000000000
--- a/clustering/k-means-clustering-tool/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""K_means clustering."""
diff --git a/clustering/k-means-clustering-tool/tests/conftest.py b/clustering/k-means-clustering-tool/tests/conftest.py
deleted file mode 100644
index 58dce0fef..000000000
--- a/clustering/k-means-clustering-tool/tests/conftest.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""Test Fixtures."""
-
-import pathlib
-import shutil
-import tempfile
-
-import numpy as np
-import pandas as pd
-import pytest
-
-
-class Generatedata:
-    """Generate tabular data with several different file format."""
-
-    def __init__(self, file_pattern: str, size: int, outname: str) -> None:
-        """Define instance attributes."""
-        self.dirpath = pathlib.Path.cwd()
-        self.inp_dir = tempfile.mkdtemp(dir=self.dirpath)
-        self.out_dir = tempfile.mkdtemp(dir=self.dirpath)
-        self.file_pattern = file_pattern
-        self.size = size
-        self.outname = outname
-        self.x = self.create_dataframe()
-
-    def get_inp_dir(self) -> pathlib.Path:
-        """Get input directory."""
-        return pathlib.Path(self.inp_dir)
-
-    def get_out_dir(self) -> pathlib.Path:
-        """Get output directory."""
-        return pathlib.Path(self.out_dir)
-
-    def create_dataframe(self) -> pd.core.frame.DataFrame:
-        """Create Pandas dataframe."""
-        rng = np.random.default_rng()
-        diction_1 = {
-            "A": np.linspace(0.0, 4.0, self.size, dtype="float32", endpoint=False),
-            "B": np.linspace(0.0, 6.0, self.size, dtype="float32", endpoint=False),
-            "C": np.linspace(0.0, 8.0, self.size, dtype="float32", endpoint=False),
-            "D": np.linspace(0.0, 10.0, self.size, dtype="float32", endpoint=False),
-            "label": rng.integers(low=1, high=4, size=self.size),
-        }
-
-        return pd.DataFrame(diction_1)
-
-    def csv_func(self) -> None:
-        """Convert pandas dataframe to csv file format."""
-        self.x.to_csv(pathlib.Path(self.inp_dir, self.outname), index=False)
-
-    def arrow_func(self) -> None:
-        """Convert pandas dataframe to Arrow file format."""
-        self.x.to_feather(pathlib.Path(self.inp_dir, self.outname))
-
-    def __call__(self) -> None:
-        """To make a class callable."""
-        data_ext = {
-            ".csv": self.csv_func,
-            ".arrow": self.arrow_func,
-        }
-
-        return data_ext[self.file_pattern]()
-
-    def clean_directories(self) -> None:
-        """Remove files."""
-        for d in self.dirpath.iterdir():
-            if d.is_dir() and d.name.startswith("tmp"):
-                shutil.rmtree(d)
-
-
-def pytest_addoption(parser: pytest.Parser) -> None:
-    """Add options to pytest."""
-    parser.addoption(
-        "--slow",
-        action="store_true",
-        dest="slow",
-        default=False,
-        help="run slow tests",
-    )
-
-
-@pytest.fixture(
-    params=[
-        ("CalinskiHarabasz", 500, ".csv", 2, 5),
-        ("DaviesBouldin", 250, ".arrow", 2, 7),
-        ("Elbow", 500, ".arrow", 2, 10),
-        ("Manual", 200, ".arrow", 2, 5),
-    ],
-)
-def get_params(request: pytest.FixtureRequest) -> pytest.FixtureRequest:
-    """To get the parameter of the fixture."""
-    return request.param
diff --git a/clustering/k-means-clustering-tool/tests/test_main.py b/clustering/k-means-clustering-tool/tests/test_main.py
deleted file mode 100644
index 7c516c759..000000000
--- a/clustering/k-means-clustering-tool/tests/test_main.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""K_means clustering."""
-
-import shutil
-
-import filepattern as fp
-import pytest
-import vaex
-from polus.images.clustering.k_means import k_means as km
-from polus.images.clustering.k_means.__main__ import app
-from typer.testing import CliRunner
-
-from .conftest import Generatedata
-
-runner = CliRunner()
-
-
-@pytest.mark.parametrize(
-    ("ext", "minrange", "maxrange"),
-    [(".arrow", 2, 5), (".csv", 2, 7)],
-)
-@pytest.mark.skipif("not config.getoption('slow')")
-def test_elbow(ext: str, minrange: int, maxrange: int) -> None:
-    """Testing elbow function."""
-    d = Generatedata(ext, outname=f"data_1{ext}", size=10000)
-    d()
-    pattern = f".*{ext}"
-    fps = fp.FilePattern(d.get_inp_dir(), pattern)
-
-    for file in fps():
-        if f"{pattern}" == ".csv":
-            df = vaex.read_csv(file[1][0], convert=True)
-        else:
-            df = vaex.open(file[1][0])
-
-        label_data = km.elbow(
-            data_array=df[:, :4].values,
-            minimum_range=minrange,
-            maximum_range=maxrange,
-        )
-
-        assert label_data is not None
-
-    d.clean_directories()
-
-
-@pytest.mark.parametrize(
-    ("method", "datasize", "ext", "minrange", "maxrange"),
-    [
-        ("CalinskiHarabasz", 500, ".arrow", 2, 5),
-        ("DaviesBouldin", 600, ".csv", 2, 7),
-    ],
-)
-@pytest.mark.skipif("not config.getoption('slow')")
-def test_calinski_davies(
-    method: str,
-    datasize: int,
-    ext: str,
-    minrange: int,
-    maxrange: int,
-) -> None:
-    """Testing calinski_davies and davies_bouldin methods."""
-    d = Generatedata(ext, outname=f"data_1{ext}", size=datasize)
-    d()
-    pattern = f".*{ext}"
-    fps = fp.FilePattern(d.get_inp_dir(), pattern)
-
-    for file in fps():
-        if f"{pattern}" == ".csv":
-            df = vaex.read_csv(file[1][0], convert=True)
-        else:
-            df = vaex.open(file[1][0])
-
-        label_data = km.calinski_davies(
-            data_array=df[:, :4].values,
-            methods=method,
-            minimum_range=minrange,
-            maximum_range=maxrange,
-        )
-
-        assert label_data is not None
-
-    d.clean_directories()
-
-
-@pytest.mark.skipif("not config.getoption('slow')")
-def test_clustering(get_params: pytest.FixtureRequest) -> None:
-    """Test clustering function."""
-    method, datasize, ext, minrange, maxrange = get_params
-    d = Generatedata(ext, outname=f"data_1{ext}", size=datasize)
-    d()
-    pattern = f".*{ext}"
-    numclusters = 3
-    fps = fp.FilePattern(d.get_inp_dir(), pattern)
-    for file in fps():
-        km.clustering(
-            file=file[1][0],
-            file_pattern=ext,
-            methods=method,
-            minimum_range=minrange,
-            maximum_range=maxrange,
-            num_of_clus=numclusters,
-            out_dir=d.get_out_dir(),
-        )
-    assert d.get_out_dir().joinpath("data_1.arrow")
-    df = vaex.open(d.get_out_dir().joinpath("data_1.arrow"))
-    assert "Cluster" in df.columns
-    d.clean_directories()
-
-
-def test_cli(get_params: pytest.FixtureRequest) -> None:
-    """Test Cli."""
-    method, data_size, inpext, minrange, maxrange = get_params
-    d = Generatedata(inpext, outname=f"data_1{inpext}", size=data_size)
-    d()
-    shutil.copy(
-        d.get_inp_dir().joinpath(f"data_1{inpext}"),
-        d.get_inp_dir().joinpath(f"data_2{inpext}"),
-    )
-    numclusters = 3
-
-    result = runner.invoke(
-        app,
-        [
-            "--inpDir",
-            d.get_inp_dir(),
-            "--filePattern",
-            inpext,
-            "--methods",
-            method,
-            "--minimumRange",
-            minrange,
-            "--maximumRange",
-            maxrange,
-            "--numOfClus",
-            numclusters,
-            "--outDir",
-            d.get_out_dir(),
-        ],
-    )
-    assert result.exit_code == 0
-
-    d.clean_directories()
diff --git a/clustering/outlier-removal-tool/.bumpversion.cfg b/clustering/outlier-removal-tool/.bumpversion.cfg
deleted file mode 100644
index 106859eb1..000000000
--- a/clustering/outlier-removal-tool/.bumpversion.cfg
+++ /dev/null
@@ -1,29 +0,0 @@
-[bumpversion]
-current_version = 0.2.7
-commit = True
-tag = False
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
-serialize = 
-	{major}.{minor}.{patch}-{release}{dev}
-	{major}.{minor}.{patch}
-
-[bumpversion:part:release]
-optional_value = _
-first_value = dev
-values = 
-	dev
-	_
-
-[bumpversion:part:dev]
-
-[bumpversion:file:pyproject.toml]
-search = version = "{current_version}"
-replace = version = "{new_version}"
-
-[bumpversion:file:VERSION]
-
-[bumpversion:file:README.md]
-
-[bumpversion:file:plugin.json]
-
-[bumpversion:file:src/polus/images/clustering/outlier_removal/__init__.py]
diff --git a/clustering/outlier-removal-tool/.dockerignore b/clustering/outlier-removal-tool/.dockerignore
deleted file mode 100644
index 7c603f814..000000000
--- a/clustering/outlier-removal-tool/.dockerignore
+++ /dev/null
@@ -1,4 +0,0 @@
-.venv
-out
-tests
-__pycache__
diff --git a/clustering/outlier-removal-tool/.gitignore b/clustering/outlier-removal-tool/.gitignore
deleted file mode 100644
index 9ed1c3775..000000000
--- a/clustering/outlier-removal-tool/.gitignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Jupyter Notebook
-.ipynb_checkpoints
-poetry.lock
-../../poetry.lock
-# Environments
-.env
-.myenv
-.venv
-env/
-venv/
-# test data directory
-data
-# yaml file
-.pre-commit-config.yaml
-# hidden files
-.DS_Store
-.ds_store
-# flake8
-.flake8
-../../.flake8
-__pycache__
-.mypy_cache
-requirements.txt
diff --git a/clustering/outlier-removal-tool/CHANGELOG.md b/clustering/outlier-removal-tool/CHANGELOG.md
deleted file mode 100644
index 59c463e1e..000000000
--- a/clustering/outlier-removal-tool/CHANGELOG.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# [0.2.6-dev0] - 2024-01-12
-
-## Added
-
-- Pytests to test this plugin
-- This plugin is now installable with pip.
-- Added support for arrow file format in addition to csv
-
-## Changed
-
-- Updated dependencies (bfio, filepattern, preadator) to latest
-- Argparse package is replaced with Typer package for command line arguments
-- Replaced docker base image with latest container image with pre-installed bfio
-- Replaced pandas with vaex
-- Seperating descriptive from numerical features for outlier detection if present in the tabular data
diff --git a/clustering/outlier-removal-tool/Dockerfile b/clustering/outlier-removal-tool/Dockerfile
deleted file mode 100644
index 3aacb378b..000000000
--- a/clustering/outlier-removal-tool/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM polusai/bfio:2.3.3
-
-# environment variables defined in polusai/bfio
-ENV EXEC_DIR="/opt/executables"
-ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".arrow"
-ENV POLUS_LOG="INFO"
-
-# Work directory defined in the base container
-WORKDIR ${EXEC_DIR}
-
-# TODO: Change the tool_dir to the tool directory
-ENV TOOL_DIR="clustering/outlier-removal-tool"
-
-# Copy the repository into the container
-RUN mkdir image-tools
-COPY . ${EXEC_DIR}/image-tools
-
-# Install the tool
-RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir
-
-# Set the entrypoint
-# TODO: Change the entrypoint to the tool entrypoint
-ENTRYPOINT ["python3", "-m", "polus.images.clustering.outlier_removal"]
-CMD ["--help"]
diff --git a/clustering/outlier-removal-tool/README.md b/clustering/outlier-removal-tool/README.md
deleted file mode 100644
index 453ca1d54..000000000
--- a/clustering/outlier-removal-tool/README.md
+++ /dev/null
@@ -1,52 +0,0 @@
-﻿# Outlier removal (v0.2.7)
-
-The outlier removal plugin removes the outliers from the data based on the method selected and outputs csv file. The output will have separate csv files for inliers and outliers. The input file should be in csv format.
-
-The plugin support vaex supported input csv file that need outliers to be removed. The file should be in csv format. This is a required parameter for the plugin.
-
-## Methods
-
-Choose any one of the methods mentioned to remove outliers from the data.
-
-### Isolation Forest
-
-Ensemble-based unsupervised method for outlier detection. The algorithm isolates outliers instead of normal instances. It works based on the principle that outliers are few and different and hence, the outliers can be identified easier than the normal points. The score is calculated as the path length to isolate the observation. These two methods can be selected to detect outliers>
-
-1. `IsolationForest` Detect outliers globally that deviates significantly from the rest of the datapoints
-2. `IForest` Detect local outliers that are distinct when compared to those of its neighbors.
-
-
-### Global
-
-<img src="images/Global.PNG" width="500" height="500">
-
-### Local
-
-<img src="images/Local.PNG" width="500" height="500">
-
-## Outputs:
-
-Select the output file by passing value to `outputType`. User can select from following options `inlier`, `oulier` or `combined`. The combined file contains `anomaly` column which score each datapoint if it is inlier or outlier.
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Options
-
-This plugin takes three input arguments and one output argument:
-
-| Name        | Description                           | I/O    | Type          |
-| ----------- | ------------------------------------- | ------ | ------------- |
-| `--inpDir`  | Input directory containing tabular files | Input  | genericData   |
-| `--filePattern`  | Pattern to parse tabular file names                  | Input  | string   |
-| `--methods` | Select methods for outlier removal    | Input  | enum          |
-| `--outputType`   | Select type of output file | Input  | enum          |
-| `--outdir`  | Output collection                     | Output | genericData   |
-| `--preview`        | Generate a JSON file with outputs                                  | Output | JSON          |
diff --git a/clustering/outlier-removal-tool/VERSION b/clustering/outlier-removal-tool/VERSION
deleted file mode 100644
index b0032849c..000000000
--- a/clustering/outlier-removal-tool/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.2.7
diff --git a/clustering/outlier-removal-tool/build-docker.sh b/clustering/outlier-removal-tool/build-docker.sh
deleted file mode 100644
index 3d1dfe86e..000000000
--- a/clustering/outlier-removal-tool/build-docker.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#!/bin/bash
-
-# Change the name of the tool here
-tool_dir="clustering"
-tool_name="outlier-removal-tool"
-
-# The version is read from the VERSION file
-version=$(<VERSION)
-tag="polusai/${tool_name}:${version}"
-echo "Building docker image with tag: ${tag}"
-
-# The current directory and the repository root are saved in variables
-cur_dir=$(pwd)
-repo_root=$(git rev-parse --show-toplevel)
-
-# The Dockerfile and .dockerignore files are copied to the repository root before building the image
-cd ${repo_root}
-cp ./${tool_dir}/${tool_name}/Dockerfile .
-cp .gitingore .dockerignore
-docker build . -t ${tag}
-rm Dockerfile .dockerignore
-cd ${cur_dir}
diff --git a/clustering/outlier-removal-tool/bumpversion.cfg b/clustering/outlier-removal-tool/bumpversion.cfg
deleted file mode 100644
index 6a703faaf..000000000
--- a/clustering/outlier-removal-tool/bumpversion.cfg
+++ /dev/null
@@ -1,27 +0,0 @@
-[bumpversion]
-current_version = 0.2.6-dev
-commit = True
-tag = False
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
-serialize = 
-	{major}.{minor}.{patch}-{release}{dev}
-	{major}.{minor}.{patch}
-
-[bumpversion:part:release]
-optional_value = _
-first_value = dev
-values = 
-	dev
-	_
-
-[bumpversion:part:dev]
-
-[bumpversion:file:pyproject.toml]
-search = version = "{current_version}"
-replace = version = "{new_version}"
-
-[bumpversion:file:plugin.json]
-
-[bumpversion:file:VERSION]
-
-[bumpversion:file:src/polus/plugins/clustering/outlier_removal/__init__.py]
diff --git a/clustering/outlier-removal-tool/ict.yaml b/clustering/outlier-removal-tool/ict.yaml
deleted file mode 100644
index c2627dd35..000000000
--- a/clustering/outlier-removal-tool/ict.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-author:
-- Jayapriya Nagarajan
-contact: hamdahshafqat.abbasi@nih.gov
-container: polusai/outlier-removal-tool:0.2.7-dev0
-description: Remove outliers from the data.
-entrypoint: python3 -m polus.images.clustering.outlier_removal
-inputs:
-- description: Input tabular data.
-  format:
-  - genericData
-  name: inpDir
-  required: true
-  type: path
-- description: Filename pattern used to separate data.
-  format:
-  - string
-  name: filePattern
-  required: false
-  type: string
-- description: Select methods for outlier removal
-  format:
-  - enum
-  name: method
-  required: false
-  type: string
-- description: Select type of output file
-  format:
-  - enum
-  name: outputType
-  required: false
-  type: string
-- description: Generate an output preview.
-  format:
-  - boolean
-  name: preview
-  required: false
-  type: boolean
-name: polusai/OutlierRemoval
-outputs:
-- description: Output collection.
-  format:
-  - genericData
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/PolusAI/polus-plugins
-specVersion: 1.0.0
-title: Outlier Removal
-ui:
-- description: Input tabular data to be processed by this plugin.
-  key: inputs.inpDir
-  title: Input tabular data
-  type: path
-- description: Filename pattern used to separate data.
-  key: inputs.filePattern
-  title: Filename pattern
-  type: text
-- description: Select method for outlier removal.
-  fields:
-  - IsolationForest
-  - IForest
-  key: inputs.method
-  title: method
-  type: select
-- description: Select output type.
-  fields:
-  - inlier
-  - outlier
-  - combined
-  key: inputs.outputType
-  title: outputType
-  type: select
-- description: Generate an output preview.
-  key: inputs.preview
-  title: Preview
-  type: checkbox
-version: 0.2.7-dev0
diff --git a/clustering/outlier-removal-tool/images/Global.PNG b/clustering/outlier-removal-tool/images/Global.PNG
deleted file mode 100644
index c4be3b484..000000000
Binary files a/clustering/outlier-removal-tool/images/Global.PNG and /dev/null differ
diff --git a/clustering/outlier-removal-tool/images/Local.PNG b/clustering/outlier-removal-tool/images/Local.PNG
deleted file mode 100644
index 4a1580ca0..000000000
Binary files a/clustering/outlier-removal-tool/images/Local.PNG and /dev/null differ
diff --git a/clustering/outlier-removal-tool/outlierremoval.cwl b/clustering/outlier-removal-tool/outlierremoval.cwl
deleted file mode 100644
index 718c486d3..000000000
--- a/clustering/outlier-removal-tool/outlierremoval.cwl
+++ /dev/null
@@ -1,40 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  filePattern:
-    inputBinding:
-      prefix: --filePattern
-    type: string?
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  method:
-    inputBinding:
-      prefix: --method
-    type: string?
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  outputType:
-    inputBinding:
-      prefix: --outputType
-    type: string?
-  preview:
-    inputBinding:
-      prefix: --preview
-    type: boolean?
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/outlier-removal-tool:0.2.7-dev0
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/clustering/outlier-removal-tool/package-release.sh b/clustering/outlier-removal-tool/package-release.sh
deleted file mode 100644
index f833f6557..000000000
--- a/clustering/outlier-removal-tool/package-release.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-# This script is designed to help package a new version of a plugin
-
-# Get the new version
-version=$(<VERSION)
-
-# Bump the version
-bump2version --config-file bumpversion.cfg --new-version ${version} --allow-dirty part
-
-# Build the container
-./build-docker.sh
-
-# Push to dockerhub
-docker push polusai/outlier-removal-plugin:${version}
-
-## Run pytests
-python -m pytest -s test
diff --git a/clustering/outlier-removal-tool/plugin.json b/clustering/outlier-removal-tool/plugin.json
deleted file mode 100644
index a0b13ed3b..000000000
--- a/clustering/outlier-removal-tool/plugin.json
+++ /dev/null
@@ -1,116 +0,0 @@
-{
-  "name": "Outlier Removal",
-  "version": "0.2.7",
-  "title": "Outlier Removal",
-  "description": "Remove outliers from the data.",
-  "author": "Jayapriya Nagarajan (github.com/Priyaaxle) and Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/PolusAI/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/outlier-removal-tool:0.2.7",
-  "baseCommand": [
-    "python3",
-    "-m",
-    "polus.images.clustering.outlier_removal"
-  ],
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "genericData",
-      "title": "Input tabular data",
-      "description": "Input tabular data.",
-      "required": "True"
-    },
-    {
-      "name": "filePattern",
-      "type": "string",
-      "title": "Filename pattern",
-      "description": "Filename pattern used to separate data.",
-      "required": "False"
-    },
-    {
-      "name": "method",
-      "type": "enum",
-      "title": "Methods",
-      "description": "Select methods for outlier removal",
-      "default": "IsolationForest",
-      "options": {
-        "values": [
-          "IsolationForest",
-          "IForest"
-        ]
-      },
-      "required": "False"
-    },
-    {
-      "name": "outputType",
-      "type": "enum",
-      "title": "outputType",
-      "description": "Select type of output file",
-      "default": "inlier",
-      "options": {
-        "values": [
-          "inlier",
-          "outlier",
-          "combined"
-        ]
-      },
-      "required": "False"
-    },
-    {
-      "name": "preview",
-      "type": "boolean",
-      "title": "Preview",
-      "description": "Generate an output preview.",
-      "required": "False"
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "genericData",
-      "description": "Output collection."
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "type": "genericData",
-      "title": "Input tabular data",
-      "description": "Input tabular data to be processed by this plugin.",
-      "required": "True"
-    },
-    {
-      "key": "inputs.filePattern",
-      "type": "string",
-      "title": "Filename pattern",
-      "description": "Filename pattern used to separate data.",
-      "required": "False",
-      "default": ".*"
-    },
-    {
-      "key": "inputs.method",
-      "type": "enum",
-      "title": "method",
-      "description": "Select method for outlier removal.",
-      "required": "False",
-      "default": "IsolationForest"
-    },
-    {
-      "key": "inputs.outputType",
-      "type": "enum",
-      "title": "outputType",
-      "description": "Select output type.",
-      "required": "False",
-      "default": "inlier"
-    },
-    {
-      "key": "inputs.preview",
-      "type": "boolean",
-      "title": "Preview",
-      "description": "Generate an output preview.",
-      "required": "False"
-    }
-  ]
-}
diff --git a/clustering/outlier-removal-tool/pyproject.toml b/clustering/outlier-removal-tool/pyproject.toml
deleted file mode 100644
index c154bcc6c..000000000
--- a/clustering/outlier-removal-tool/pyproject.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-[tool.poetry]
-name = "polus-images-clustering-outlier-removal"
-version = "0.2.7"
-description = "Remove outliers from the data."
-authors = [
-           "Jayapriya Nagarajan <jayapriya.nagarajan@axleinfo.com>",
-           "Hamdah Shafqat abbasi <hamdahshafqat.abbasi@nih.gov>"
-           ]
-readme = "README.md"
-packages = [{include = "polus", from = "src"}]
-
-[tool.poetry.dependencies]
-python = ">=3.9,<3.12"
-filepattern = "^2.0.4"
-typer = "^0.7.0"
-tqdm = "^4.64.1"
-preadator="0.4.0.dev2"
-vaex = "^4.17.0"
-scikit-learn = "^1.3.2"
-pyod = "^1.1.2"
-
-[tool.poetry.group.dev.dependencies]
-pre-commit = "^3.3.3"
-bump2version = "^1.0.1"
-pytest = "^7.3.2"
-pytest-xdist = "^3.3.1"
-pytest-sugar = "^0.9.7"
-ipykernel = "^6.28.0"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/clustering/outlier-removal-tool/run-docker.sh b/clustering/outlier-removal-tool/run-docker.sh
deleted file mode 100644
index f2c347263..000000000
--- a/clustering/outlier-removal-tool/run-docker.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-version=$(<VERSION)
-datapath=$(readlink --canonicalize data)
-echo ${datapath}
-
-# Inputs
-inpDir=${datapath}/input
-filePattern=".*.csv"
-method="IsolationForest"
-outputType="inlier"
-outDir=${datapath}/output
-
-docker run -v ${datapath}:${datapath} \
-            polusai/outlier-removal-plugin:${version} \
-            --inpDir ${inpDir} \
-            --filePattern ${filePattern} \
-            --method ${method} \
-            --outputType ${outputType} \
-            --outDir ${outDir} \
-            --preview
diff --git a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__init__.py b/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__init__.py
deleted file mode 100644
index b34514580..000000000
--- a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Outlier Removal Plugin."""
-
-__version__ = "0.2.7"
diff --git a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__main__.py b/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__main__.py
deleted file mode 100644
index 35ce74195..000000000
--- a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/__main__.py
+++ /dev/null
@@ -1,115 +0,0 @@
-"""Outlier Removal Plugin."""
-
-import json
-import logging
-from multiprocessing import cpu_count
-from pathlib import Path
-from typing import Any
-from typing import Optional
-
-import filepattern as fp
-import polus.images.clustering.outlier_removal.outlier_removal as rm
-import preadator
-import typer
-
-num_workers = max([cpu_count(), 2])
-
-app = typer.Typer()
-
-# Initialize the logger
-logging.basicConfig(
-    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
-    datefmt="%d-%b-%y %H:%M:%S",
-)
-logger = logging.getLogger("polus.images.clustering.outlier_removal")
-
-
-@app.command()
-def main(  # noqa: PLR0913
-    inp_dir: Path = typer.Option(
-        ...,
-        "--inpDir",
-        "-i",
-        help="Path to folder containing tabular files",
-    ),
-    file_pattern: str = typer.Option(
-        ".*",
-        "--filePattern",
-        "-f",
-        help="Pattern use to parse filenames",
-    ),
-    method: rm.Methods = typer.Option(
-        rm.Methods.DEFAULT,
-        "--method",
-        "-m",
-        help="Select methods for outlier detection",
-    ),
-    output_type: rm.Outputs = typer.Option(
-        rm.Outputs.DEFAULT,
-        "--outputType",
-        "-ot",
-        help="Select type of output file",
-    ),
-    out_dir: Path = typer.Option(
-        ...,
-        "--outDir",
-        "-o",
-        help="Output directory",
-    ),
-    preview: Optional[bool] = typer.Option(
-        False,
-        "--preview",
-        help="Output a JSON preview of files",
-    ),
-) -> None:
-    """Remove outliers from the data."""
-    logger.info(f"--inpDir = {inp_dir}")
-    logger.info(f"--filePattern = {file_pattern}")
-    logger.info(f"--method = {method}")
-    logger.info(f"--outputType = {output_type}")
-    logger.info(f"--outDir = {out_dir}")
-
-    inp_dir = inp_dir.resolve()
-    out_dir = out_dir.resolve()
-
-    assert inp_dir.exists(), f"{inp_dir} does not exist!! Please check input path again"
-    assert (
-        out_dir.exists()
-    ), f"{out_dir} does not exist!! Please check output path again"
-
-    files = fp.FilePattern(inp_dir, file_pattern)
-
-    if preview:
-        with Path.open(Path(out_dir, "preview.json"), "w") as jfile:
-            out_json: dict[str, Any] = {
-                "filepattern": file_pattern,
-                "outDir": [],
-            }
-            for file in files():
-                outname = file[1][0].name.replace(
-                    "".join(file[1][0].suffixes),
-                    f"_{output_type}{rm.POLUS_TAB_EXT}",
-                )
-
-                out_json["outDir"].append(outname)
-            json.dump(out_json, jfile, indent=2)
-
-    else:
-        with preadator.ProcessManager(
-            name="Cluster data using HDBSCAN",
-            num_processes=num_workers,
-            threads_per_process=2,
-        ) as pm:
-            for file in files():
-                pm.submit_process(
-                    rm.outlier_detection,
-                    file[1][0],
-                    method,
-                    output_type,
-                    out_dir,
-                )
-            pm.join_processes()
-
-
-if __name__ == "__main__":
-    app()
diff --git a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/outlier_removal.py b/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/outlier_removal.py
deleted file mode 100644
index cb7364b3f..000000000
--- a/clustering/outlier-removal-tool/src/polus/images/clustering/outlier_removal/outlier_removal.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""Outlier Removal Plugin."""
-import enum
-import logging
-import os
-from pathlib import Path
-
-import numpy as np
-import vaex
-from pyod.models.iforest import IForest
-from sklearn.ensemble import IsolationForest
-from sklearn.preprocessing import StandardScaler
-
-logger = logging.getLogger(__name__)
-logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
-POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".arrow")
-
-CHUNK_SIZE = 10000
-
-
-class Methods(str, enum.Enum):
-    """Available outlier detection methods."""
-
-    ISOLATIONFOREST = "IsolationForest"
-    IFOREST = "IForest"
-    DEFAULT = "IsolationForest"
-
-
-class Outputs(str, enum.Enum):
-    """Output Files."""
-
-    INLIER = "inlier"
-    OUTLIER = "outlier"
-    COMBINED = "combined"
-    DEFAULT = "inlier"
-
-
-def write_outputs(data: vaex.DataFrame, outname: Path) -> None:
-    """Write outputs in either arrow or csv file formats.
-
-    Args:
-        data: vaex dataframe.
-        outname: Name of output file.
-    """
-    if POLUS_TAB_EXT == ".arrow":
-        data.export_feather(outname)
-        logger.info(f"Saving outputs: {outname}")
-    if POLUS_TAB_EXT == ".csv":
-        data.export_csv(outname, chunk_size=CHUNK_SIZE)
-        logger.info(f"Saving outputs: {outname}")
-
-
-def isolationforest(data_set: np.ndarray, method: Methods) -> np.ndarray:
-    """Isolation Forest algorithm.
-
-    Args:
-        data_set: Input data.
-        method: Type of method to remove outliers.
-
-    Returns:
-        ndarray whether or not the data point should be considered as an inlier.
-
-    """
-    if method == Methods.ISOLATIONFOREST:
-        clf = IsolationForest(random_state=19, n_estimators=200)
-
-    if method == Methods.IFOREST:
-        clf = IForest(random_state=10, n_estimators=200)
-
-    if method == Methods.DEFAULT:
-        clf = IsolationForest(random_state=19, n_estimators=200)
-
-    clf.fit(data_set)
-    return clf.predict(data_set)
-
-
-def outlier_detection(
-    file: Path,
-    method: Methods,
-    output_type: Outputs,
-    out_dir: Path,
-) -> None:
-    """Detects outliers using Isolation Forest algorithm.
-
-    Args:
-        file: Input tabular data.
-        method: Select a method to remove outliers.
-        output_type: Select type of output file.
-        out_dir: Path to output directory.
-    """
-    if Path(file.name).suffix == ".csv":
-        data = vaex.from_csv(file, convert=True, chunk_size=CHUNK_SIZE)
-    else:
-        data = vaex.open(file)
-
-    int_columns = [
-        feature
-        for feature in data.get_column_names()
-        if data.data_type(feature) == int or data.data_type(feature) == float
-    ]
-
-    if len(int_columns) == 0:
-        msg = "Features with integer datatype do not exist"
-        raise ValueError(msg)
-
-    # Standardize the data
-    df = StandardScaler().fit_transform(data[int_columns])
-
-    # Detect outliers
-    logger.info("Detecting outliers using " + method)
-    rem_out = isolationforest(df, method)
-
-    data["anomaly"] = rem_out
-
-    if method == Methods.ISOLATIONFOREST or method == Methods.DEFAULT:
-        inliers = data[data["anomaly"] == 1]
-        outliers = data[data["anomaly"] == -1]
-
-    if method == Methods.IFOREST:
-        inliers = data[data["anomaly"] == 0]
-        outliers = data[data["anomaly"] == 1]
-
-    # Drop 'anomaly' column
-    inliers = inliers.drop("anomaly", inplace=True)
-    outliers = outliers.drop("anomaly", inplace=True)
-
-    outname = Path(out_dir, f"{Path(file.name).stem}_{output_type}{POLUS_TAB_EXT}")
-
-    if output_type == Outputs.INLIER:
-        write_outputs(inliers, outname)
-    if output_type == Outputs.OUTLIER:
-        write_outputs(outliers, outname)
-    if output_type == Outputs.COMBINED:
-        write_outputs(data, outname)
-    if output_type == Outputs.DEFAULT:
-        write_outputs(inliers, outname)
diff --git a/clustering/outlier-removal-tool/tests/__init__.py b/clustering/outlier-removal-tool/tests/__init__.py
deleted file mode 100644
index 727cdca8d..000000000
--- a/clustering/outlier-removal-tool/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Test Outlier Removal Plugin."""
diff --git a/clustering/outlier-removal-tool/tests/conftest.py b/clustering/outlier-removal-tool/tests/conftest.py
deleted file mode 100644
index 1829c1afa..000000000
--- a/clustering/outlier-removal-tool/tests/conftest.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""Test fixtures.
-
-Set up all data used in tests.
-"""
-import tempfile
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import pytest
-
-
-@pytest.fixture(
-    params=[
-        (5000, ".csv", "IsolationForest", "combined"),
-        (100000, ".arrow", "IForest", "inlier"),
-        (500000, ".csv", "IsolationForest", "outlier"),
-    ],
-)
-def get_params(request: pytest.FixtureRequest) -> tuple[int, str]:
-    """To get the parameter of the fixture."""
-    return request.param
-
-
-@pytest.fixture()
-def generate_synthetic_data(
-    get_params: tuple[int, str, str, str],
-) -> tuple[Path, Path, str, str, str]:
-    """Generate tabular data."""
-    nrows, file_extension, method, output_type = get_params
-
-    input_directory = Path(tempfile.mkdtemp(prefix="inputs_"))
-    output_directory = Path(tempfile.mkdtemp(prefix="out_"))
-    rng = np.random.default_rng()
-    tabular_data = {
-        "sepal_length": rng.random(nrows).tolist(),
-        "sepal_width": rng.random(nrows).tolist(),
-        "petal_length": rng.random(nrows).tolist(),
-        "petal_width": rng.random(nrows).tolist(),
-        "species": rng.choice(
-            ["Iris-setosa", "Iris-versicolor", "Iris-virginica"],
-            nrows,
-        ).tolist(),
-    }
-
-    df = pd.DataFrame(tabular_data)
-    if file_extension == ".csv":
-        outpath = Path(input_directory, "data.csv")
-        df.to_csv(outpath, index=False)
-    if file_extension == ".arrow":
-        outpath = Path(input_directory, "data.arrow")
-        df.to_feather(outpath)
-
-    return input_directory, output_directory, file_extension, method, output_type
diff --git a/clustering/outlier-removal-tool/tests/test_cli.py b/clustering/outlier-removal-tool/tests/test_cli.py
deleted file mode 100644
index c303a5d54..000000000
--- a/clustering/outlier-removal-tool/tests/test_cli.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""Test Command line Tool."""
-from typer.testing import CliRunner
-from polus.images.clustering.outlier_removal.__main__ import app
-import shutil
-from pathlib import Path
-
-
-def test_cli(generate_synthetic_data: tuple[Path, Path, str, str, str]) -> None:
-    """Test the command line."""
-    inp_dir, out_dir, file_extension, method, output_type = generate_synthetic_data
-    file_pattern = f".*{file_extension}"
-
-    runner = CliRunner()
-    result = runner.invoke(
-        app,
-        [
-            "--inpDir",
-            inp_dir,
-            "--filePattern",
-            file_pattern,
-            "--method",
-            method,
-            "--outputType",
-            output_type,
-            "--outDir",
-            out_dir,
-        ],
-    )
-
-    assert result.exit_code == 0
-    shutil.rmtree(inp_dir)
-    shutil.rmtree(out_dir)
-
-
-def test_short_cli(generate_synthetic_data: tuple[Path, Path, str, str, str]) -> None:
-    """Test short command line."""
-    inp_dir, out_dir, file_extension, method, output_type = generate_synthetic_data
-    file_pattern = f".*{file_extension}"
-
-    runner = CliRunner()
-    result = runner.invoke(
-        app,
-        [
-            "-i",
-            inp_dir,
-            "-f",
-            file_pattern,
-            "-m",
-            method,
-            "-ot",
-            output_type,
-            "-o",
-            out_dir,
-        ],
-    )
-
-    assert result.exit_code == 0
-    shutil.rmtree(inp_dir)
-    shutil.rmtree(out_dir)
diff --git a/clustering/outlier-removal-tool/tests/test_outlier_removal.py b/clustering/outlier-removal-tool/tests/test_outlier_removal.py
deleted file mode 100644
index 5f90fb01c..000000000
--- a/clustering/outlier-removal-tool/tests/test_outlier_removal.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""Test Outlier Removal Plugin."""
-import shutil
-from pathlib import Path
-
-import filepattern as fp
-import numpy as np
-import polus.images.clustering.outlier_removal.outlier_removal as rm
-import vaex
-
-
-def test_outlier_detection(
-    generate_synthetic_data: tuple[Path, Path, str, str, str],
-) -> None:
-    """Test outlier detection of tabular data."""
-    inp_dir, out_dir, file_extension, method, output_type = generate_synthetic_data
-
-    file_pattern = f".*{file_extension}"
-    files = fp.FilePattern(inp_dir, file_pattern)
-    for file in files():
-        rm.outlier_detection(
-            file=file[1][0],
-            method=method,
-            output_type=output_type,
-            out_dir=out_dir,
-        )
-    out_ext = [Path(f.name).suffix for f in out_dir.iterdir()]
-    assert all(out_ext) is True
-    shutil.rmtree(inp_dir)
-    shutil.rmtree(out_dir)
-
-
-def test_isolationforest(
-    generate_synthetic_data: tuple[Path, Path, str, str, str],
-) -> None:
-    """Test isolationforest method."""
-    inp_dir, out_dir, file_extension, method, output_type = generate_synthetic_data
-    file_pattern = f".*{file_extension}"
-    files = fp.FilePattern(inp_dir, file_pattern)
-    for file in files():
-        df = vaex.open(file[1][0])
-        data = df[df.column_names[:-1]].values
-        prediction = rm.isolationforest(data, method)
-        assert len(prediction) != 0
-        assert type(prediction) == np.ndarray
-    shutil.rmtree(inp_dir)
-    shutil.rmtree(out_dir)
diff --git a/clustering/polus-feature-subsetting-plugin/Dockerfile b/clustering/polus-feature-subsetting-plugin/Dockerfile
deleted file mode 100644
index babcd2385..000000000
--- a/clustering/polus-feature-subsetting-plugin/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-
-FROM polusai/bfio:2.1.9
-
-# from bfio container
-# ENV POLUS_EXT=".ome.tif"
-# ENV POLUS_LOG="INFO"
-# ENV EXEC_DIR="/opt/executables"
-# ENV DATA_DIR="/data"
-
-COPY VERSION /
-		
-ARG EXEC_DIR="/opt/executables"
-ARG DATA_DIR="/data"
-
-RUN mkdir -p ${EXEC_DIR} \
-    && mkdir -p ${DATA_DIR}/inputs \
-    && mkdir ${DATA_DIR}/outputs
-
-COPY src ${EXEC_DIR}/
-WORKDIR ${EXEC_DIR}
-
-RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir
-
-ENTRYPOINT ["python3", "/opt/executables/main.py"]
\ No newline at end of file
diff --git a/clustering/polus-feature-subsetting-plugin/README.md b/clustering/polus-feature-subsetting-plugin/README.md
deleted file mode 100644
index 24ccba663..000000000
--- a/clustering/polus-feature-subsetting-plugin/README.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Feature Data Subset
-
-This WIPP plugin subsets data based on a given feature. It works in conjunction with the `polus-feature-extraction-plugin`, where the feature extraction plugin can be used to extract the features such as the mean intensity of every image in the input image collection. 
-
-# Usage
-The details and usage of the plugin inputs is provided in the section below. In addition to the subsetted data, the output directory also consists of a `summary.txt` file which has information as to what images were kept and their new filename if they were renamed.  
-
-### Explanation of inputs 
-Some of the inputs are pretty straighforward and are used commonly across most WIPP plugins. This section is used to provide some details and examples of the inputs that may be a little complicated. The image collection with the following pattern will be used as an example : `r{r+}_t{t+}_p{p+}_z{z+}_c{c+}.ome.tif`, where r,t,p,z,c stand for replicate, timepoint, positon,z-positon, and channel respectively. Consider we have 5 replicates, 3 timepoints, 50 positions, 10 z-planes and 4 channels. 
-
-1. `inpDir` - This contains the path to the input image collection to subset data from. 
-2. `filePattern` - Filepattern of the input images
-3. `groupVar` - This is a mandatory input across which to subset data. This can take either 1 or 2 variables as input and if 2 variables are provided then the second variable will be treated as the minor grouping variable. In our example, if the `z` is provided as input, then within a subcollection, the mean of the feature value will be taken for all images with the same z. Then the z positions will be filtered out based on the input of `percentile` and `removeDirection` variables. Now if `z,c` are provided as input, then 'c' will be treated as the minor grouping variable which means that the mean will be taken for all images with the same z for each channel. Also, the plugin will ensures that the same values of z positions are filtered out across c. 
-4. `csvDir` - This contains the path to the csv collection containing the feature values for each image. This can be the output of the feature extraction plugin.
-5. `feature` - The column name from the csv file that will be used to filter images
-6. `percentile` and `removeDirection` - These two variables denote the critieria with which images are filtered. For example, if percentile is `0.1` and removeDirection is set to `Below` then images with feature value below the 10th percentile will be removed. On the other hand, if removeDirection is set to above then all images with feature value greater than the 10th pecentile will be removed. This enables data subsetting from both `brighfield` and `darkfield` microscopy images.  
-       
- **Optional Arguments**   
-  
-8. `sectionVar` -  This is an optional input to segregate the input image collection into sub-collections. The analysis will be done seperately for each sub-collection. In our example, if the user enters `r,t` as the sectionVar, then we will have 15 subcollections (5*3),1 for each combination of timepoint and replicate. If the user enters `r` as sectionVar, then we will have 5 sub collections, 1 for each replicate. If the user wants to consider the whole image collection as a single section, then no input is required. NOTE: As a post processing step, same number of images will be subsetted across different sections.
-9. `padding` - This is an optional variable with default value of 0. A delay of 3 means that 3 additional planes will captured on either side of the subsetted data. This can be used as a sanity check to ensure that the subsetted data captures the images we want.  For example, in our examples if the following z values were filtered out intitially - 5,6,7 ; then a delay of 3 means that the output dataset will have z positions 2,3,4,5,6,7,8,9,10 if all them exist. 
-10. `writeOutput` - This is an optional argument with default value `True`. If it is set to true, then both the output image collection and `summary.txt` file will be created. If it is set to false, then the output directory will only consist of summary.txt. This option enables the user to tune the hyperparameters such as percentile, removeDirecton, feature without actually creating the output image collection.
-
-
-
-Contact [Gauhar Bains](mailto:gauhar.bains@labshare.org) for more information.
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes one input argument and one output argument:
-
-| Name                | Description                                           | I/O    | Type          |
-| ------------------- | ----------------------------------------------------- | ------ | ------------- |
-| `--csvDir`          | CSV collection containing features                    | Input  | csvCollection |
-| `--padding`         | Number of images to capture outside the cutoff        | Input  | int           |
-| `--feature`         | Feature to use to subset data                         | Input  | string        |
-| `--filePattern`     | Filename pattern used to separate data                | Input  | string        |
-| `--groupVar`        | variables to group by in a section                    | Input  | string        |
-| `--inpDir`          | Input image collection to be processed by this plugin | Input  | collection    |
-| `--percentile`      | Percentile to remove                                  | Input  | int           |
-| `--removeDirection` | remove direction above or below percentile            | Input  | string        |
-| `--sectionVar`      | variables to divide larger sections                   | Input  | string        |
-| `--writeOutput`     | write output image collection or not                  | Input  | boolean       |
-| `--outDir`          | Output collection                                     | Output | collection    |
-
diff --git a/clustering/polus-feature-subsetting-plugin/VERSION b/clustering/polus-feature-subsetting-plugin/VERSION
deleted file mode 100644
index a34eaa5d0..000000000
--- a/clustering/polus-feature-subsetting-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.11
\ No newline at end of file
diff --git a/clustering/polus-feature-subsetting-plugin/build-docker.sh b/clustering/polus-feature-subsetting-plugin/build-docker.sh
deleted file mode 100644
index d9ad13705..000000000
--- a/clustering/polus-feature-subsetting-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/feature-subsetting-plugin:${version}
\ No newline at end of file
diff --git a/clustering/polus-feature-subsetting-plugin/featuresubsetting.cwl b/clustering/polus-feature-subsetting-plugin/featuresubsetting.cwl
deleted file mode 100644
index f1e8ef13e..000000000
--- a/clustering/polus-feature-subsetting-plugin/featuresubsetting.cwl
+++ /dev/null
@@ -1,60 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  csvDir:
-    inputBinding:
-      prefix: --csvDir
-    type: Directory
-  feature:
-    inputBinding:
-      prefix: --feature
-    type: string
-  filePattern:
-    inputBinding:
-      prefix: --filePattern
-    type: string
-  groupVar:
-    inputBinding:
-      prefix: --groupVar
-    type: string
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  padding:
-    inputBinding:
-      prefix: --padding
-    type: string?
-  percentile:
-    inputBinding:
-      prefix: --percentile
-    type: double
-  removeDirection:
-    inputBinding:
-      prefix: --removeDirection
-    type: string
-  sectionVar:
-    inputBinding:
-      prefix: --sectionVar
-    type: string?
-  writeOutput:
-    inputBinding:
-      prefix: --writeOutput
-    type: boolean?
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/feature-subsetting-plugin:0.1.11
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/clustering/polus-feature-subsetting-plugin/ict.yaml b/clustering/polus-feature-subsetting-plugin/ict.yaml
deleted file mode 100644
index 831011caf..000000000
--- a/clustering/polus-feature-subsetting-plugin/ict.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-author:
-- Gauhar Bains
-contact: gauhar.bains@labshare.org
-container: polusai/feature-subsetting-plugin:0.1.11
-description: Subset data using a given feature
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input image collection to be processed by this plugin
-  format:
-  - collection
-  name: inpDir
-  required: true
-  type: path
-- description: Filename pattern used to separate data
-  format:
-  - string
-  name: filePattern
-  required: true
-  type: string
-- description: variables to group by in a section
-  format:
-  - string
-  name: groupVar
-  required: true
-  type: string
-- description: variables to divide larger sections
-  format:
-  - string
-  name: sectionVar
-  required: false
-  type: string
-- description: CSV collection containing features
-  format:
-  - csvCollection
-  name: csvDir
-  required: true
-  type: path
-- description: Feature to use to subset data
-  format:
-  - string
-  name: feature
-  required: true
-  type: string
-- description: Percentile to remove
-  format:
-  - number
-  name: percentile
-  required: true
-  type: number
-- description: remove direction above or below percentile
-  format:
-  - enum
-  name: removeDirection
-  required: true
-  type: string
-- description: Number of images to capture outside the cutoff
-  format:
-  - string
-  name: padding
-  required: false
-  type: string
-- description: write output image collection or not
-  format:
-  - boolean
-  name: writeOutput
-  required: false
-  type: boolean
-name: polusai/FeatureSubsetting
-outputs:
-- description: Output collection
-  format:
-  - collection
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: Feature Subsetting
-ui:
-- description: Input image collection to be processed by this plugin
-  key: inputs.inpDir
-  title: Input collection
-  type: path
-- description: CSV collection containing features
-  key: inputs.csvDir
-  title: CSV collection
-  type: path
-- description: Feature to use to subset data
-  key: inputs.feature
-  title: Feature
-  type: text
-- description: Percentile to remove
-  key: inputs.percentile
-  title: Percentile
-  type: number
-- description: remove direction above or below percentile
-  fields:
-  - Below
-  - Above
-  key: inputs.removeDirection
-  title: Remove Direction
-  type: select
-- description: Filename pattern used to separate data
-  key: inputs.filePattern
-  title: Filename pattern
-  type: text
-- description: variables to group by in a section
-  key: inputs.groupVar
-  title: Grouping Variables
-  type: text
-- description: variables to divide larger sections
-  key: inputs.sectionVar
-  title: Section Variables
-  type: text
-- description: Number of images to capture outside the cutoff
-  key: inputs.padding
-  title: Padding
-  type: text
-- description: write output image collection or not
-  key: inputs.writeOutput
-  title: Write Output
-  type: checkbox
-version: 0.1.11
diff --git a/clustering/polus-feature-subsetting-plugin/plugin.json b/clustering/polus-feature-subsetting-plugin/plugin.json
deleted file mode 100644
index d7bf7c2ce..000000000
--- a/clustering/polus-feature-subsetting-plugin/plugin.json
+++ /dev/null
@@ -1,139 +0,0 @@
-{
-  "name": "Feature Subsetting",
-  "version": "0.1.11",
-  "title": "Feature Subsetting",
-  "description": "Subset data using a given feature",
-  "author": "Gauhar Bains (gauhar.bains@labshare.org)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/feature-subsetting-plugin:0.1.11",
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "collection",
-      "description": "Input image collection to be processed by this plugin",
-      "required": true
-    },
-    {
-      "name": "filePattern",
-      "type": "string",
-      "description": "Filename pattern used to separate data",
-      "required": true
-    },
-    {
-      "name": "groupVar",
-      "type": "string",
-      "description": "variables to group by in a section",
-      "required": true
-    },
-    {
-      "name": "sectionVar",
-      "type": "string",
-      "description": "variables to divide larger sections",
-      "required": false
-    },
-    {
-      "name": "csvDir",
-      "type": "csvCollection",
-      "description": "CSV collection containing features",
-      "required": true
-    },
-    {
-      "name": "feature",
-      "type": "string",
-      "description": "Feature to use to subset data",
-      "required": true
-    },
-    {
-      "name": "percentile",
-      "type": "number",
-      "description": "Percentile to remove",
-      "required": true
-    },
-    {
-      "name": "removeDirection",
-      "type": "enum",
-      "options": {
-        "values": [
-          "Below",
-          "Above"
-        ]
-      },
-      "description": "remove direction above or below percentile",
-      "required": true
-    },
-    {
-      "name": "padding",
-      "type": "string",
-      "description": "Number of images to capture outside the cutoff",
-      "required": false
-    },
-    {
-      "name": "writeOutput",
-      "type": "boolean",
-      "description": "write output image collection or not",
-      "required": false
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "collection",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Input collection",
-      "description": "Input image collection to be processed by this plugin"
-    },
-    {
-      "key": "inputs.csvDir",
-      "title": "CSV collection",
-      "description": "CSV collection containing features"
-    },
-    {
-      "key": "inputs.feature",
-      "title": "Feature",
-      "description": "Feature to use to subset data"
-    },
-    {
-      "key": "inputs.percentile",
-      "title": "Percentile",
-      "description": "Percentile to remove"
-    },
-    {
-      "key": "inputs.removeDirection",
-      "title": "Remove Direction",
-      "description": "remove direction above or below percentile"
-    },
-    {
-      "key": "inputs.filePattern",
-      "title": "Filename pattern",
-      "description": "Filename pattern used to separate data"
-    },
-    {
-      "key": "inputs.groupVar",
-      "title": "Grouping Variables",
-      "description": "variables to group by in a section"
-    },
-    {
-      "key": "inputs.sectionVar",
-      "title": "Section Variables",
-      "description": "variables to divide larger sections"
-    },
-    {
-      "key": "inputs.padding",
-      "title": "Padding",
-      "description": "Number of images to capture outside the cutoff"
-    },
-    {
-      "key": "inputs.writeOutput",
-      "title": "Write Output",
-      "description": "write output image collection or not"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/clustering/polus-feature-subsetting-plugin/src/main.py b/clustering/polus-feature-subsetting-plugin/src/main.py
deleted file mode 100644
index a942d67c2..000000000
--- a/clustering/polus-feature-subsetting-plugin/src/main.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import argparse, logging, subprocess, time, multiprocessing, sys
-import os
-import filepattern
-import pandas as pd
-import shutil
-from pathlib import Path
-import traceback
-
-def filter_planes(feature_dict, removeDirection, percentile):
-    """filter planes by the criteria specified by removeDirection
-    and percentile 
-
-    Args:
-        feature_dict (dictionary): planes and respective feature value
-        removeDirection (string): remove above or below percentile
-        percentile (int): cutoff percentile
-
-    Returns:
-        set: planes that fit the criteria
-    """
-    planes = list(feature_dict.keys())
-    feat_value = [feature_dict[i] for i in planes]
-    thresh = min(feat_value) + percentile * (max(feat_value) - min(feat_value))
-    
-    # filter planes
-    if removeDirection == 'Below':
-        keep_planes = [z for z in planes if feature_dict[z] >= thresh]
-    else:
-        keep_planes = [z for z in planes if feature_dict[z] <= thresh]
-    
-    return set(keep_planes)
-
-def make_uniform(planes_dict, uniques, padding):
-    """ Ensure each section has the same number of images
-
-    This function makes the output collection uniform in
-    the sense that it preserves same number of planes across 
-    sections. It also captures additional planes based
-    on the value of the padding variable
-
-    Args:
-        planes_dict (dict): planes to keep in different sections
-        uniques (list): unique values for the major grouping variable
-        padding (int): additional images to capture outside cutoff
-
-    Returns:
-        dictionary: dictionary containing planes to keep
-    """
-
-    # max no. of planes 
-    max_len = max([len(i) for i in planes_dict.values()])
-
-    # max planes that can be added on each side
-    min_ind = min([min(planes_dict[k]) for k in planes_dict])
-    max_ind = max([max(planes_dict[k]) for k in planes_dict])
-    max_add_left = uniques.index(min_ind)
-    max_add_right = len(uniques) - (uniques.index(max_ind)+1)
-    
-    # add planes in each section based on padding and max number of planes
-    for section_id, planes in planes_dict.items():
-        len_to_add = max_len - len(planes)
-        len_add_left = min(int(len_to_add)/2+padding, max_add_left)
-        len_add_right = min(len_to_add - len_add_left+padding, max_add_right)
-        left_ind = int(uniques.index(min(planes)) - len_add_left) 
-        right_ind = int(uniques.index(max(planes)) + len_add_right)+1
-        planes_dict[section_id] = uniques[left_ind:right_ind]
-    return planes_dict
-
-def main(inpDir,csvDir,outDir,filePattern,groupVar,percentile,
-         removeDirection,sectionVar,feature,padding,writeOutput):
-    """Function containing the main login to subset data
-
-    Args:
-        inpDir (string): path to input image collection
-        csvDir (string): path to csv file containing features
-        outDir (string): path to output collection
-        filePattern (string): input image filepattern
-        groupVar (string): grouping variables
-        percentile (float): cutoff feature percentile
-        removeDirection (string): subset above or below percentile
-        sectionVar (string): sectioning variable
-        feature (string): feature to subset using
-        padding (int): capture additional images outside of cutoff
-        writeOutput (boolean): write output image collection or not
-    """
-
-    # Get all file names in csvDir image collection
-    csvDir_files = [f.name for f in Path(csvDir).iterdir() if f.is_file() and "".join(f.suffixes)=='.csv']
-    
-    # Get all file names in inpDir image collection
-    inpDir_files = [f.name for f in Path(inpDir).iterdir() if f.is_file() and "".join(f.suffixes)=='.ome.tif']
-
-    # read and concat all csv files
-    for ind, file in enumerate(csvDir_files):
-        if ind == 0:
-            feature_df = pd.read_csv(os.path.join(csvDir, file), header=0)
-        else:
-            feature_df = pd.concat([feature_df, pd.read_csv(os.path.join(csvDir, file), header=0)])
-    
-    # store image name and its feature value
-    feature_dict = {k:v for k,v in zip(feature_df['Image'], feature_df[feature])}
-
-    # seperate filepattern variables into different categories
-    _,var = filepattern.get_regex(filePattern)
-    grouping_variables = groupVar.split(',')
-    section_variables = sectionVar.split(',')
-    sub_section_variables = [v for v in var if v not in grouping_variables+section_variables]
-
-    # initialize filepattern object
-    fp = filepattern.FilePattern(inpDir, pattern=filePattern)
-    uniques = fp.uniques
-
-    [maj_grouping_var, min_grouping_var] = grouping_variables if len(grouping_variables)>1 else grouping_variables+[None]
-    keep_planes = {}
-
-    logger.info('Iterating over sections...')
-    # single iteration of this loop gives all images in one section
-    for file in fp(group_by=sub_section_variables+grouping_variables):
-        
-        section_feat_dict = {}
-        section_keep_planes = []
-        section_id = tuple([file[0][i] for i in section_variables]) if section_variables[0] else 1
-        
-        # iterate over files in one section
-        for f in file:
-            if min_grouping_var == None:
-                f[min_grouping_var] = None
-            
-            # stote feature values for images 
-            if f[min_grouping_var] not in section_feat_dict:
-                section_feat_dict[f[min_grouping_var]] = {}
-
-            if f[maj_grouping_var] not in section_feat_dict[f[min_grouping_var]]:
-                section_feat_dict[f[min_grouping_var]][f[maj_grouping_var]] = []
-
-            section_feat_dict[f[min_grouping_var]][f[maj_grouping_var]].append(feature_dict[f['file'].name])
-        
-        # average feature value by grouping variable
-        for key1 in section_feat_dict:
-            for key2 in section_feat_dict[key1]:
-                section_feat_dict[key1][key2] = sum(section_feat_dict[key1][key2])/len(section_feat_dict[key1][key2])
-            
-            # find planes to keep based on specified criteria
-            section_keep_planes.append(filter_planes(section_feat_dict[key1],removeDirection, percentile))
-        
-        # keep same planes within a section, across the minor grouping variable
-        section_keep_planes = list(section_keep_planes[0].union(*section_keep_planes))
-        section_keep_planes = [i for i in range(min(section_keep_planes), max(section_keep_planes)+1) if i in uniques[maj_grouping_var]]
-        keep_planes[section_id] = section_keep_planes
-    
-    # keep same number of planes across different sections
-    keep_planes = make_uniform(keep_planes, uniques[maj_grouping_var], padding)
-    
-    # start writing summary.txt
-    summary = open(os.path.join(outDir, 'metadata_files', 'summary.txt'), 'w')
-
-    logger.info('renaming subsetted data')
-
-    # reinitialize filepattern object
-    fp = filepattern.FilePattern(inpDir, pattern=filePattern)
-
-    # rename subsetted data
-    for file in fp(group_by=sub_section_variables+grouping_variables):
-        section_id = tuple([file[0][i] for i in section_variables]) if section_variables[0] else 1
-        section_keep_planes = keep_planes[section_id]
-        rename_map = {k:v for k,v in zip(keep_planes[section_id], uniques[maj_grouping_var])}
-
-        # update summary.txt with section renaming info
-        summary.write('------------------------------------------------ \n')
-        if sectionVar.strip():
-            summary.write('Section : {} \n'.format({k:file[0][k] for k in section_variables}))
-            logger.info('Renaming files from section : {} \n'.format({k:file[0][k] for k in section_variables}))
-        summary.write('\nThe following values of "{}" variable have been renamed: \n'.format(maj_grouping_var))
-        for k,v in rename_map.items():
-            summary.write('{} ---> {} \n'.format(k,v))
-        summary.write('\n Files : \n \n')
-
-        # rename and write output
-        for f in file:
-            if f[maj_grouping_var] not in keep_planes[section_id]:
-                continue
-
-            # old and new file name
-            old_file_name = f['file'].name
-            file_name_dict = {k.upper():v for k,v in f.items() if k!='file'}
-            file_name_dict[maj_grouping_var.upper()] = rename_map[f[maj_grouping_var]]
-            new_file_name = fp.get_matching(**file_name_dict)[0]['file'].name
-
-            # if write output collection
-            if writeOutput:
-                shutil.copy2(os.path.join(inpDir, old_file_name),os.path.join(outDir, 'images', new_file_name))
-            
-            summary.write('{} -----> {} \n'.format(old_file_name, new_file_name))  
-    summary.close() 
-
-if __name__=="__main__":
-    # Initialize the logger
-    logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
-                        datefmt='%d-%b-%y %H:%M:%S')
-    logger = logging.getLogger("main")
-    logger.setLevel(logging.INFO)
-
-    ''' Argument parsing '''
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='Subset data using a given feature')
-    
-    # Input arguments
-    parser.add_argument('--csvDir', dest='csvDir', type=str,
-                        help='CSV collection containing features', required=True)
-    parser.add_argument('--padding', dest='padding', type=str,
-                        help='Number of images to capture outside the cutoff', required=False)
-    parser.add_argument('--feature', dest='feature', type=str,
-                        help='Feature to use to subset data', required=True)
-    parser.add_argument('--filePattern', dest='filePattern', type=str,
-                        help='Filename pattern used to separate data', required=True)
-    parser.add_argument('--groupVar', dest='groupVar', type=str,
-                        help='variables to group by in a section', required=True)
-    parser.add_argument('--inpDir', dest='inpDir', type=str,
-                        help='Input image collection to be processed by this plugin', required=True)
-    parser.add_argument('--percentile', dest='percentile', type=str,
-                        help='Percentile to remove', required=True)
-    parser.add_argument('--removeDirection', dest='removeDirection', type=str,
-                        help='remove direction above or below percentile', required=True)
-    parser.add_argument('--sectionVar', dest='sectionVar', type=str,
-                        help='variables to divide larger sections', required=False)
-    parser.add_argument('--writeOutput', dest='writeOutput', type=str,
-                        help='write output image collection or not', required=False)
-    # Output arguments
-    parser.add_argument('--outDir', dest='outDir', type=str,
-                        help='Output collection', required=True)
-    
-    # Parse the arguments
-    args = parser.parse_args()
-    csvDir = args.csvDir
-    logger.info('csvDir = {}'.format(csvDir))
-    padding = args.padding
-    padding = 0 if padding==None else int(padding)
-    logger.info('padding = {}'.format(padding))
-    feature = args.feature
-    logger.info('feature = {}'.format(feature))
-    filePattern = args.filePattern
-    logger.info('filePattern = {}'.format(filePattern))
-    groupVar = args.groupVar
-    logger.info('groupVar = {}'.format(groupVar))
-    inpDir = args.inpDir
-    if (Path.is_dir(Path(args.inpDir).joinpath('images'))):
-        # switch to images folder if present
-        fpath = str(Path(args.inpDir).joinpath('images').absolute())
-    logger.info('inpDir = {}'.format(inpDir))
-    percentile = float(args.percentile)
-    logger.info('percentile = {}'.format(percentile))
-    removeDirection = args.removeDirection
-    logger.info('removeDirection = {}'.format(removeDirection))
-    sectionVar = args.sectionVar
-    sectionVar = '' if sectionVar is None else sectionVar
-    logger.info('sectionVar = {}'.format(sectionVar))
-    writeOutput = True if args.writeOutput==None else args.writeOutput == 'true'
-    logger.info('writeOutput = {}'.format(writeOutput))
-    outDir = args.outDir
-    logger.info('outDir = {}'.format(outDir))
-    
-    # create metadata and images folder in outDir
-    if not os.path.isdir(os.path.join(outDir, 'images')):
-        os.mkdir(os.path.join(outDir, 'images'))
-    if not os.path.isdir(os.path.join(outDir, 'metadata_files')):
-        os.mkdir(os.path.join(outDir, 'metadata_files'))
-
-    # Surround with try/finally for proper error catching
-    try:
-        main(inpDir=inpDir,
-             csvDir=csvDir,
-             outDir=outDir,
-             filePattern=filePattern,
-             groupVar=groupVar,
-             percentile=percentile,
-             removeDirection=removeDirection,
-             sectionVar=sectionVar,
-             feature=feature,
-             padding=padding,
-             writeOutput=writeOutput)
-
-    except Exception:
-        traceback.print_exc()  
-
-    finally:
-        logger.info('exiting workflow..')
-        # Exit the program
-        sys.exit()
\ No newline at end of file
diff --git a/clustering/polus-feature-subsetting-plugin/src/requirements.txt b/clustering/polus-feature-subsetting-plugin/src/requirements.txt
deleted file mode 100644
index b7e965ece..000000000
--- a/clustering/polus-feature-subsetting-plugin/src/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-filepattern>=1.4.5
-pandas>=1.1.3
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/Dockerfile b/clustering/polus-hdbscan-clustering-plugin/Dockerfile
deleted file mode 100644
index 37129b3fd..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/Dockerfile
+++ /dev/null
@@ -1,10 +0,0 @@
-FROM polusai/bfio:2.1.9
-
-COPY VERSION /
-COPY src ${EXEC_DIR}/.
-
-RUN apt --no-install-recommends -y autoremove --purge python3.9-minimal python3.9\
-    && apt-get update && apt-get install --no-install-recommends -y build-essential python3.9-dev\
-    && pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir
-
-ENTRYPOINT ["python3", "/opt/executables/main.py"]
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/README.md b/clustering/polus-hdbscan-clustering-plugin/README.md
deleted file mode 100644
index 2169be013..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/README.md
+++ /dev/null
@@ -1,48 +0,0 @@
-﻿# Hierarchical Density-Based Spatial Clustering of Applications with Noise(HDBSCAN) Clustering
-
-The HDBSCAN Clustering plugin clusters the data using [HDBSCAN clustering](https://pypi.org/project/hdbscan/) library. The input and output for this plugin is a CSV file. Each observation (row) in the input CSV file is assigned to one of the clusters. The output CSV file contains the column `cluster` that identifies the cluster to which each observation belongs. A user can supply a regular expression with capture groups if they wish to cluster each group independently, or if they wish to average the numerical features across each group and treat them as a single observation.
-
-## Inputs:
-
-### Input CSV collection:
-The input file(s) that need to be clustered. The file should be in CSV format. This is a required parameter for the plugin.
-
-### Grouping pattern:
-The input for this parameter is a regular expression with capture group. This input splits the data into groups based on the matched pattern. A new column `group` is created in the output CSV file that has the group based on the given pattern. Unless `averageGroups` is set to `true`, providing a grouping pattern will cluster each group independently. 
-
-### Average groups:
-Setting this equal to `true` will use the supplied `groupingPattern` to average the numerical features and produce a single row per group which is then clustered. The resulting cluster is assigned to all observations belonging in that group.
-
-### Label column:
-This is the name of the column containing the labels to be used with `groupingPattern`.
-
-### Minimum cluster size:
-This parameter defines the smallest number of points that should be considered as cluster. This is a required parameter. The input should be an integer and the value should be greater than 1.
-
-### Increment outlier ID:
-This parameter sets the ID of the outlier cluster to `1`, otherwise it will be 0. This is useful for visualization purposes if the resulting cluster IDs are turned into image annotations. 
-
-## Output:
-The output is a CSV file containing the clustered data.
-
-## Building
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Options
-
-This plugin takes four input arguments and one output argument:
-
-| Name                   | Description                                                                                    | I/O    | Type          |
-| ---------------------- | ---------------------------------------------------------------------------------------------- | ------ | ------------- |
-| `--inpDir`             | Input csv collection.                                                                          | Input  | csvCollection |
-| `--groupingPattern`    | Regular expression to group rows. Clustering will be applied across capture groups by default. | Input  | string        |
-| `--averageGroups`      | If set to `true`, will average data across groups. Requires capture groups                     | Input  | string        |
-| `--labelCol`           | Name of the column containing labels for grouping pattern.                                     | Input  | string        |
-| `--minClusterSize`     | Minimum cluster size.                                                                          | Input  | integer       |
-| `--incrementOutlierId` | Increments outlier ID to 1.                                                                    | Input  | string        |
-| `--outDir`             | Output collection                                                                              | Output | csvCollection |
diff --git a/clustering/polus-hdbscan-clustering-plugin/VERSION b/clustering/polus-hdbscan-clustering-plugin/VERSION
deleted file mode 100644
index 5546bd2c5..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.4.7
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/build-docker.sh b/clustering/polus-hdbscan-clustering-plugin/build-docker.sh
deleted file mode 100755
index 7a7f44fe8..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/hdbscan-clustering-plugin:${version}
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/hdbscanclustering.cwl b/clustering/polus-hdbscan-clustering-plugin/hdbscanclustering.cwl
deleted file mode 100644
index de15ec1c9..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/hdbscanclustering.cwl
+++ /dev/null
@@ -1,44 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  averageGroups:
-    inputBinding:
-      prefix: --averageGroups
-    type: boolean?
-  groupingPattern:
-    inputBinding:
-      prefix: --groupingPattern
-    type: string?
-  incrementOutlierId:
-    inputBinding:
-      prefix: --incrementOutlierId
-    type: boolean?
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  labelCol:
-    inputBinding:
-      prefix: --labelCol
-    type: string?
-  minClusterSize:
-    inputBinding:
-      prefix: --minClusterSize
-    type: double
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/hdbscan-clustering-plugin:0.4.7
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/clustering/polus-hdbscan-clustering-plugin/ict.yaml b/clustering/polus-hdbscan-clustering-plugin/ict.yaml
deleted file mode 100644
index eb52cb47a..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/ict.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-author:
-- Jayapriya Nagarajan
-contact: hythem.sidky@nih.gov
-container: polusai/hdbscan-clustering-plugin:0.4.7
-description: Cluster the data using HDBSCAN.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input CSV file collection
-  format:
-  - csvCollection
-  name: inpDir
-  required: true
-  type: path
-- description: Regular expression for optional row grouping.
-  format:
-  - string
-  name: groupingPattern
-  required: false
-  type: string
-- description: Whether to average data across groups. Requires grouping pattern to
-    be defined.
-  format:
-  - boolean
-  name: averageGroups
-  required: false
-  type: boolean
-- description: Name of column containing labels. Required for grouping pattern.
-  format:
-  - string
-  name: labelCol
-  required: false
-  type: string
-- description: Minimum cluster size
-  format:
-  - integer
-  name: minClusterSize
-  required: true
-  type: number
-- description: Increments outlier ID to 1
-  format:
-  - boolean
-  name: incrementOutlierId
-  required: false
-  type: boolean
-name: polusai/HDBSCANClustering
-outputs:
-- description: Output collection
-  format:
-  - csvCollection
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/PolusAI/polus-plugins
-specVersion: 1.0.0
-title: HDBSCAN Clustering
-ui:
-- description: Input csv file collection for clustering.
-  key: inputs.inpDir
-  title: Input CSV file
-  type: path
-- description: Regular expression for optional row grouping.
-  key: inputs.groupingPattern
-  title: Grouping pattern
-  type: text
-- description: Whether to average data across groups. Requires grouping pattern to
-    be defined.
-  key: inputs.averageGroups
-  title: Group averaging
-  type: checkbox
-- description: Name of column containing labels. Required for grouping pattern.
-  key: inputs.labelCol
-  title: Label column
-  type: text
-- description: Minimum number of points in a cluster.
-  key: inputs.minClusterSize
-  title: Minimum cluster size
-  type: number
-- description: Set outlier ID to unity.
-  key: inputs.incrementOutlierId
-  title: Increment outlier ID
-  type: checkbox
-version: 0.4.7
diff --git a/clustering/polus-hdbscan-clustering-plugin/plugin.json b/clustering/polus-hdbscan-clustering-plugin/plugin.json
deleted file mode 100644
index 7d931b345..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/plugin.json
+++ /dev/null
@@ -1,89 +0,0 @@
-{
-  "name": "HDBSCAN Clustering",
-  "version": "0.4.7",
-  "title": "HDBSCAN Clustering",
-  "description": "Cluster the data using HDBSCAN.",
-  "author": "Jayapriya Nagarajan (github.com/Priyaaxle) and Hythem Sidky (hythem.sidky@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/PolusAI/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/hdbscan-clustering-plugin:0.4.7",
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "csvCollection",
-      "description": "Input CSV file collection",
-      "required": "true"
-    },
-    {
-      "name": "groupingPattern",
-      "type": "string",
-      "description": "Regular expression for optional row grouping.",
-      "required": "false"
-    },
-    {
-      "name": "averageGroups",
-      "type": "boolean",
-      "description": "Whether to average data across groups. Requires grouping pattern to be defined.",
-      "required": "false"
-    },
-    {
-      "name": "labelCol",
-      "type": "string",
-      "description": "Name of column containing labels. Required for grouping pattern.",
-      "required": "false"
-    },
-    {
-      "name": "minClusterSize",
-      "type": "integer",
-      "description": "Minimum cluster size",
-      "required": "true"
-    },
-    {
-      "name": "incrementOutlierId",
-      "type": "boolean",
-      "description": "Increments outlier ID to 1",
-      "required": "false"
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "csvCollection",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Input CSV file",
-      "description": "Input csv file collection for clustering."
-    },
-    {
-      "key": "inputs.groupingPattern",
-      "title": "Grouping pattern",
-      "description": "Regular expression for optional row grouping."
-    },
-    {
-      "key": "inputs.averageGroups",
-      "title": "Group averaging",
-      "description": "Whether to average data across groups. Requires grouping pattern to be defined."
-    },
-    {
-      "key": "inputs.labelCol",
-      "title": "Label column",
-      "description": "Name of column containing labels. Required for grouping pattern."
-    },
-    {
-      "key": "inputs.minClusterSize",
-      "title": "Minimum cluster size",
-      "description": "Minimum number of points in a cluster."
-    },
-    {
-      "key": "inputs.incrementOutlierId",
-      "title": "Increment outlier ID",
-      "description": "Set outlier ID to unity."
-    }
-  ]
-}
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/run-docker.sh b/clustering/polus-hdbscan-clustering-plugin/run-docker.sh
deleted file mode 100755
index d88be109a..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/run-docker.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-datapath=$(readlink --canonicalize .)
-echo ${datapath}
-
-# Inputs
-inpDir=/data/input
-groupingPattern="(\d+)-\d+-\d+"
-labelCol=file
-averageGroups=true
-minClusterSize=10
-incrementOutlierId=true
-outDir=/data/output
-
-docker run --mount type=bind,source=${datapath},target=/data/ \
-           --gpus=all \
-           labshare/polus-hdbscan-clustering-plugin:${version} \
-           --inpDir ${inpDir} \
-	       --groupingPattern ${groupingPattern} \
-	       --labelCol ${labelCol} \
-           --averageGroups ${averageGroups} \
-           --minClusterSize ${minClusterSize} \
-           --incrementOutlierId ${incrementOutlierId} \
-           --outDir ${outDir}
diff --git a/clustering/polus-hdbscan-clustering-plugin/src/main.py b/clustering/polus-hdbscan-clustering-plugin/src/main.py
deleted file mode 100644
index 336328b61..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/src/main.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import argparse
-import logging
-import os
-import fnmatch
-import hdbscan
-import numpy as np
-import pandas as pd
-import typing
-
-# Initialize the logger
-logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
-                    datefmt='%d-%b-%y %H:%M:%S')
-logger = logging.getLogger("main")
-logger.setLevel(logging.INFO)
-
-
-def list_files(csv_directory: str) -> typing.List[str]:
-    """List all the .csv files in the directory.
-    
-    Args:
-        csv_directory (str): Path to the directory containing the csv files.
-        
-    Returns:
-        The path to directory, list of names of the subdirectories in dirpath (if any) and the filenames of .csv files.
-        
-    """
-    list_of_files = [os.path.join(dirpath, file_name)
-                     for dirpath, dirnames, files in os.walk(csv_directory)
-                     for file_name in fnmatch.filter(files, '*.csv')]
-    return list_of_files
-
-
-def clustering(data: np.ndarray, min_cluster_size: int, increment_outlier_id: bool) -> np.ndarray:
-    """Cluster data using HDBSCAN.
-    
-    Args:
-        data (array): Data that need to be clustered.
-        min_cluster_size (int): Smallest size grouping that should be considered as a cluster.
-        increment_outlier_id (bool) : Increment outlier ID to unity.
-        
-    Returns:
-        Cluster labels for each row of data.
-    """
-    clusters = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size).fit(data)
-    labels = clusters.labels_.flatten().astype(np.uint16) + 1
-    labels = labels + 1 if increment_outlier_id else labels
-
-    return labels
-    
-   
-# Setup the argument parsing
-def main(inpDir, grouping_pattern, avg_groups, label_col, min_cluster_size, increment_outlier_id, outDir):
-    # Get list of .csv files in the directory including sub folders for clustering
-    input_csvs = list_files(inpDir)
-    if input_csvs is None:
-        raise ValueError('No .csv files found.')
-            
-    for csv in input_csvs:
-        # Get the full path and split to get only the filename.
-        split_file = os.path.normpath(csv)
-        file_name = os.path.split(split_file)[-1]
-        file_prefix, _ = file_name.split('.', 1)
-
-        logger.info('Reading the file ' + file_name)
-
-        # Read csv file
-        df = pd.read_csv(csv)
-
-        # If user provided a regular expression.
-        if grouping_pattern is not None:
-            df = df[df[label_col].str.match(grouping_pattern)].copy()
-            if df.empty:
-                logger.warning(f"Could not find any files matching the pattern {grouping_pattern} in file {csv}. Skipping...")
-                continue
-        
-            #Create a column group with matching string
-            df['group'] = df[label_col].str.extract(grouping_pattern, expand=True).apply(','.join, axis=1)
-
-            # Get column(s) containing data.
-            df_data = df.select_dtypes(exclude='object').copy()
-            df_data['group'] = df['group']
-            
-            # If we want to average features for each group.
-            if avg_groups:
-                df_grouped = df_data.groupby('group').apply(lambda x: x.sort_values('group').mean(numeric_only=True))           
-            
-                # Cluster data using HDBSCAN clustering.
-                logger.info('Clustering the data')
-                cluster_ids = clustering(df_grouped.values, min_cluster_size, increment_outlier_id)
-
-                df_grouped['cluster'] = cluster_ids
-                df = df.merge(df_grouped['cluster'], left_on='group', right_index=True)
-            else: # We want separate clustering results for each group.
-                dfs = []
-                for group, df_ss in df_data.groupby('group'):
-                    # Cluster data using HDBSCAN clustering.
-                    logger.info(f'Clustering data in group {group}')
-
-                    cluster_ids = clustering(df_ss.values, min_cluster_size, increment_outlier_id)
-                    df_ss['cluster'] = cluster_ids
-                    dfs.append(df_ss)
-                
-                df_grouped = pd.concat(dfs)
-                df = df.merge(df_grouped['cluster'], left_index=True, right_index=True)
-            
-        # No grouping. Vanilla clustering. 
-        else:
-            # Get column(s) containing data.
-            df_data = df.select_dtypes(exclude='object').copy()
-            
-            #Cluster data using HDBSCAN clustering
-            logger.info('Clustering the data')
-            cluster_ids = clustering(df_data.values, min_cluster_size, increment_outlier_id)
-            df['cluster'] = cluster_ids
-
-        df.to_csv(os.path.join(outDir, f'{file_prefix}.csv'), index=None, header=True, encoding='utf-8-sig')
-    logger.info("Finished all processes!")
-
-if __name__ == "__main__":
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='HDBSCAN clustering plugin')
-    parser.add_argument('--inpDir', dest='inpDir', type=str,
-                        help='Input collection-Data need to be clustered', required=True)
-    parser.add_argument('--groupingPattern', dest='groupingPattern', type=str,
-                        help='Regular expression to group rows. Clustering will be applied across capture groups.', required=False)
-    parser.add_argument('--averageGroups', dest='averageGroups', type=str,
-                        help='Whether to average data across groups. Requires capture groups.', default='false', required=False)
-    parser.add_argument('--labelCol', dest='labelCol', type=str,
-                        help='Name of column containing labels. Required only for grouping operations.', required=False)
-    parser.add_argument('--minClusterSize', dest='minClusterSize', type=int,
-                        help='Minimum cluster size', required=True)
-    parser.add_argument('--incrementOutlierId', dest='incrementOutlierId', type=str,
-                        help='Increments outlier ID to 1.', default='false', required=False)
-    parser.add_argument('--outDir', dest='outDir', type=str,
-                        help='Output collection', required=True)
-    
-    # Parse the arguments.
-    args = parser.parse_args()
-    
-    # Path to csvfile directory.
-    inpDir = args.inpDir
-    logger.info('inpDir = {}'.format(inpDir))
-
-    # Regular expression for grouping.
-    grouping_pattern = args.groupingPattern
-    logger.info('grouping_pattern = {}'.format(grouping_pattern))
-
-    # Whether to average data for each group.
-    avg_groups = args.averageGroups.lower() != 'false'
-    logger.info('avg_groups = {}'.format(avg_groups))
-
-    # Name of column to use for grouping.
-    label_col = args.labelCol
-    logger.info('label_col = {}'.format(label_col))
-
-    # Minimum cluster size for clustering using HDBSCAN.
-    min_cluster_size = args.minClusterSize
-    logger.info('min_cluster_size = {}'.format(min_cluster_size))
-
-    # Set outlier cluster id as 1.
-    increment_outlier_id = args.incrementOutlierId.lower() != 'false' 
-    logger.info('increment_outlier_id = {}'.format(increment_outlier_id))
-    
-    # Path to save output csvfiles.
-    outDir = args.outDir
-    logger.info('outDir = {}'.format(outDir))
-
-    main(
-        inpDir, 
-        grouping_pattern, 
-        avg_groups, 
-        label_col, 
-        min_cluster_size, 
-        increment_outlier_id, 
-        outDir
-    )
\ No newline at end of file
diff --git a/clustering/polus-hdbscan-clustering-plugin/src/requirements.txt b/clustering/polus-hdbscan-clustering-plugin/src/requirements.txt
deleted file mode 100644
index ffd72e039..000000000
--- a/clustering/polus-hdbscan-clustering-plugin/src/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-hdbscan==0.8.27
-pandas>=1.2.4
diff --git a/features/polus-csv-statistics-plugin/Dockerfile b/features/polus-csv-statistics-plugin/Dockerfile
deleted file mode 100644
index d6b8f9f20..000000000
--- a/features/polus-csv-statistics-plugin/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM polusai/bfio:2.1.9
-
-COPY VERSION /
-		
-ARG EXEC_DIR="/opt/executables"
-ARG DATA_DIR="/data"
-
-RUN mkdir -p ${EXEC_DIR} \
-    && mkdir -p ${DATA_DIR}/inputs \
-    && mkdir ${DATA_DIR}/outputs
-
-COPY src ${EXEC_DIR}/
-WORKDIR ${EXEC_DIR}
-
-RUN pip3 install -r ${EXEC_DIR}/requirements.txt
-
-ENTRYPOINT ["python3", "/opt/executables/main.py"]
\ No newline at end of file
diff --git a/features/polus-csv-statistics-plugin/README.md b/features/polus-csv-statistics-plugin/README.md
deleted file mode 100644
index 51ac1c4a9..000000000
--- a/features/polus-csv-statistics-plugin/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# CSV Statistics
-
-This WIPP plugin performs statistics on values in each column of a csv file if the data is numeric. Rows of data are grouped together by rows that have a matching value in a column with header named `file`. If no columns have the `file` header, then this plugin throws and error.
-
-Available statistics are:
-
-1. [mean (arithmetic mean)](https://en.wikipedia.org/wiki/Mean#Arithmetic_mean_(AM))
-2. [median](https://en.wikipedia.org/wiki/Median#The_sample_median)
-3. [std (standard deviation)](https://en.wikipedia.org/wiki/Standard_deviation)
-4. [var (variance)](https://en.wikipedia.org/wiki/Variance)
-5. [skew (Fisher-Pearson skewness)](https://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm)
-6. [kurt (excess kurtosis)](https://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm)
-7. count (number of rows sampled)
-8. [iqr (Interquartile_range)](https://en.wikipedia.org/wiki/Interquartile_range)
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes two input argument and one output argument:
-
-| Name            | Description                                         | I/O    | Type          |
-| --------------- | --------------------------------------------------- | ------ | ------------- |
-| `--statistics`  | Types of statistics to calculate                    | Input  | array         |
-| `--inpDir`      | Input csv collection to be processed by this plugin | Input  | csvCollection |
-| `--filePattern` | The filePattern of the images in represented in csv | Input  | string        |
-| `--groupBy`     | The variable(s) of how the images should be grouped | Input  | string        |
-| `--outDir`      | Output collection                                   | Output | csvCollection |
diff --git a/features/polus-csv-statistics-plugin/VERSION b/features/polus-csv-statistics-plugin/VERSION
deleted file mode 100644
index 7dff5b892..000000000
--- a/features/polus-csv-statistics-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.2.1
\ No newline at end of file
diff --git a/features/polus-csv-statistics-plugin/build-docker.sh b/features/polus-csv-statistics-plugin/build-docker.sh
deleted file mode 100755
index ff8f13c78..000000000
--- a/features/polus-csv-statistics-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/csv-statistics-plugin:${version}
\ No newline at end of file
diff --git a/features/polus-csv-statistics-plugin/csvstatistics.cwl b/features/polus-csv-statistics-plugin/csvstatistics.cwl
deleted file mode 100644
index b96e2edd6..000000000
--- a/features/polus-csv-statistics-plugin/csvstatistics.cwl
+++ /dev/null
@@ -1,36 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  filePattern:
-    inputBinding:
-      prefix: --filePattern
-    type: string?
-  groupBy:
-    inputBinding:
-      prefix: --groupBy
-    type: string?
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  statistics:
-    inputBinding:
-      prefix: --statistics
-    type: string
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/csv-statistics-plugin:0.2.1
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/features/polus-csv-statistics-plugin/ict.yaml b/features/polus-csv-statistics-plugin/ict.yaml
deleted file mode 100644
index 908864376..000000000
--- a/features/polus-csv-statistics-plugin/ict.yaml
+++ /dev/null
@@ -1,60 +0,0 @@
-author:
-- Nick Schaub
-contact: nick.schaub@nih.gov
-container: polusai/csv-statistics-plugin:0.2.1
-description: Calculate simple statistics to groups of data in a csv file.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: The csv statistics to generate
-  format:
-  - array
-  name: statistics
-  required: true
-  type: array
-- description: Input csv collection to be processed by this plugin
-  format:
-  - csvCollection
-  name: inpDir
-  required: true
-  type: path
-- description: The filepattern of the images represented in the csv files
-  format:
-  - string
-  name: filePattern
-  required: false
-  type: string
-- description: The variable(s) to group the images by
-  format:
-  - string
-  name: groupBy
-  required: false
-  type: string
-name: polusai/CSVStatistics
-outputs:
-- description: Output collection
-  format:
-  - csvCollection
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: CSV Statistics
-ui:
-- description: Types of statistics to calculate
-  key: inputs.statistics
-  title: Statistics
-  type: text
-- description: Input csv collection to be processed by this plugin
-  key: inputs.inpDir
-  title: Input collection
-  type: path
-- description: The filepattern of the images represented in the csv files
-  key: inputs.filePattern
-  title: Filepattern
-  type: text
-- description: The variable(s) to group the images by
-  key: inputs.groupBy
-  title: Groupby Variables(s)
-  type: text
-version: 0.2.1
diff --git a/features/polus-csv-statistics-plugin/plugin.json b/features/polus-csv-statistics-plugin/plugin.json
deleted file mode 100644
index a0d8e41f0..000000000
--- a/features/polus-csv-statistics-plugin/plugin.json
+++ /dev/null
@@ -1,146 +0,0 @@
-{
-  "name": "CSV Statistics",
-  "version": "0.2.1",
-  "title": "CSV Statistics",
-  "description": "Calculate simple statistics to groups of data in a csv file.",
-  "author": "Nick Schaub (nick.schaub@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/csv-statistics-plugin:0.2.1",
-  "inputs": [
-    {
-      "name": "statistics",
-      "type": "array",
-      "description": "The csv statistics to generate",
-      "options": {
-        "items": {
-          "type": "string",
-          "title": "Select statistic",
-          "description": "Csv Statistics",
-          "required": true,
-          "oneOf": [
-            {
-              "description": "Mean",
-              "enum": [
-                "mean"
-              ]
-            },
-            {
-              "description": "Median",
-              "enum": [
-                "median"
-              ]
-            },
-            {
-              "description": "Standard Deviation",
-              "enum": [
-                "std"
-              ]
-            },
-            {
-              "description": "Variance",
-              "enum": [
-                "var"
-              ]
-            },
-            {
-              "description": "Skewness",
-              "enum": [
-                "skew"
-              ]
-            },
-            {
-              "description": "Kurtosis",
-              "enum": [
-                "kurt"
-              ]
-            },
-            {
-              "description": "Total Count",
-              "enum": [
-                "count"
-              ]
-            },
-            {
-              "description": "Maximum Value",
-              "enum": [
-                "max"
-              ]
-            },
-            {
-              "description": "Minimum Value",
-              "enum": [
-                "min"
-              ]
-            },
-            {
-              "description": "Inter Quartile Range",
-              "enum": [
-                "iqr"
-              ]
-            },
-            {
-              "description": "All parameters",
-              "enum": [
-                "all"
-              ]
-            }
-          ],
-          "default": "count",
-          "widget": "select",
-          "minItems": 1,
-          "uniqueItems": "true"
-        }
-      }
-    },
-    {
-      "name": "inpDir",
-      "type": "csvCollection",
-      "description": "Input csv collection to be processed by this plugin",
-      "required": true
-    },
-    {
-      "name": "filePattern",
-      "type": "string",
-      "description": "The filepattern of the images represented in the csv files",
-      "required": false
-    },
-    {
-      "name": "groupBy",
-      "type": "string",
-      "description": "The variable(s) to group the images by",
-      "required": false
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "csvCollection",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.statistics",
-      "title": "Statistics",
-      "description": "Types of statistics to calculate"
-    },
-    {
-      "key": "inputs.inpDir",
-      "title": "Input collection",
-      "description": "Input csv collection to be processed by this plugin"
-    },
-    {
-      "key": "inputs.filePattern",
-      "title": "Filepattern",
-      "description": "The filepattern of the images represented in the csv files"
-    },
-    {
-      "key": "inputs.groupBy",
-      "title": "Groupby Variables(s)",
-      "description": "The variable(s) to group the images by"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/features/polus-csv-statistics-plugin/src/main.py b/features/polus-csv-statistics-plugin/src/main.py
deleted file mode 100644
index 7a01596bf..000000000
--- a/features/polus-csv-statistics-plugin/src/main.py
+++ /dev/null
@@ -1,309 +0,0 @@
-import argparse, logging, math
-from pathlib import Path
-from filepattern.functions import get_regex, parse_filename, output_name
-
-
-def get_vars(file, groupBy, regex, var):
-    if isinstance(file, list):
-        r = parse_filename(file[0], regex, var)
-    else:
-        r = parse_filename(file, regex, var)
-    vars = {var:value for var,value in r.items() if var in groupBy}
-    return vars
-
-def get_number(s):
-    """ Check that s is number
-    
-    If s is a number, returns it as a float. If not, returns s without modification.
-
-    Inputs:
-        s - An input string or number
-    Outputs:
-        value - Either float(s) or False if s cannot be cast to float
-    """
-    try:
-        return float(s)
-    except ValueError:
-        return s
-    
-def count(data_list,data_dict):
-    if 'count' in data_dict.keys():
-        return
-    data_dict['count'] = len(data_list)
-    return data_dict
-    
-def minval(data_list,data_dict):
-    if 'min' in data_dict.keys():
-        return
-    data_dict['min'] = min(data_list)
-    return data_dict
-
-def maxval(data_list,data_dict):
-    if 'max' in data_dict.keys():
-        return
-    data_dict['max'] = max(data_list)
-    return data_dict
-    
-def mean(data_list,data_dict):
-    if 'mean' in data_dict.keys():
-        return
-    count(data_list,data_dict)
-    data_dict['mean'] = sum(data_list)/data_dict['count']
-    return data_dict
-    
-def var(data_list,data_dict):
-    if 'var' in data_dict.keys():
-        return
-    mean(data_list,data_dict)
-    data_dict['var'] = sum([x**2 for x in data_list])/data_dict['count'] - data_dict['mean']**2
-    return data_dict
-
-def median(data_list,data_dict):
-    if 'median' in data_dict.keys():
-        return
-    count(data_list,data_dict)
-    data_list.sort()
-    data_dict['median'] = (data_list[(data_dict['count']-1)//2] + data_list[data_dict['count']//2]) / 2
-    return data_dict
-    
-def std(data_list,data_dict):
-    if 'std' in data_dict.keys():
-        return
-    var(data_list,data_dict)
-    try:
-        data_dict['std'] = math.sqrt(data_dict['var'])
-    except ValueError as err:
-        print(data_list)
-        print(data_dict['mean'])
-        print(data_dict['var'])
-        ValueError(err)
-    return data_dict
-    
-def skewness(data_list,data_dict):
-    if 'skew' in data_dict.keys():
-        return
-    std(data_list,data_dict)
-    try:
-        data_dict['skew'] = sum([(x-data_dict['mean'])**3 for x in data_list])/(data_dict['count']*data_dict['std']**3)
-    except ZeroDivisionError:
-        data_dict['skew'] = 'NaN'
-    return data_dict
-    
-def kurtosis(data_list,data_dict):
-    if 'kurt' in data_dict.keys():
-        return
-    std(data_list,data_dict)
-    try:
-        data_dict['kurt'] = sum([(x-data_dict['mean'])**4 for x in data_list])/(data_dict['count']*data_dict['std']**4) - 3
-    except ZeroDivisionError:
-        data_dict['kurt'] = 'NaN'
-    return data_dict
-
-def iqr(data_list,data_dict):
-    if 'iqr' in data_dict.keys():
-        return
-    count(data_list, data_dict)
-    data_list.sort()
-    cnt = (int(data_dict['count']))//2
-    l_half = data_list[:cnt]
-    u_half = data_list[-cnt:]
-    q1 = (l_half[(len(l_half)-1)//2] + l_half[len(l_half)//2]) / 2
-    q3 = (u_half[(len(u_half)-1)//2] + u_half[len(u_half)//2]) / 2
-    data_dict['iqr'] = q3-q1
-    return data_dict
-
-# Dictionary of input statistics
-STATS = {'mean': mean,
-         'median': median,
-         'std': std,
-         'var': var,
-         'skew': skewness,
-         'kurt': kurtosis,
-         'count': count,
-         'max': maxval,
-         'min': minval,
-         'iqr': iqr}
-
-if __name__=="__main__":
-    # Initialize the logger
-    logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
-                        datefmt='%d-%b-%y %H:%M:%S')
-    logger = logging.getLogger("main")
-    logger.setLevel(logging.INFO)
-
-    # Setup the argument parsing
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='Calculate simple statistics to groups of data in a csv file.')
-    parser.add_argument('--statistics', dest='statistics', type=str,
-                        help='Types of statistics to calculate', required=True)
-    parser.add_argument('--inpDir', dest='inpDir', type=str,
-                        help='Input csv collection to be processed by this plugin', required=True)
-    parser.add_argument('--filePattern', dest='filePattern', type=str,
-                        help='The filepattern of the files in the file column', required=False)
-    parser.add_argument('--groupBy', dest='groupBy', type=str,
-                        help='The varaibles in the filepattern to groupby', required=False)
-    parser.add_argument('--outDir', dest='outDir', type=str,
-                        help='Output collection', required=True)
-
-    # Parse the arguments
-    args = parser.parse_args()
-    if args.statistics == 'all':
-        statistics = [i for i,v in STATS.items() ]
-    else:
-        statistics = args.statistics.split(',')
-    logger.info('statistics = {}'.format(statistics))
-    inpDir = args.inpDir
-    logger.info('inpDir = {}'.format(inpDir))
-
-    filePattern = args.filePattern
-    logger.info('filePattern = {}'.format(filePattern))
-    groupBy = args.groupBy
-    logger.info('groupBy = {}'.format(groupBy))
-    outDir = args.outDir
-    logger.info('outDir = {}'.format(outDir))
-
-    # Get a list of all input files
-    csv_files = [f for f in Path(inpDir).iterdir() if f.name.endswith('csv')]
-    
-    if filePattern:
-        assert groupBy, 'groupBy variables must be specified when grouping with a filePattern'
-        # Get the regex and variables of the filePattern
-        regex, variables = get_regex(filePattern)
-        ind = ''.join([v for v in variables if v not in groupBy])
-
-    else:
-        logger.info('Grouping by filenames')
-        
-    if groupBy:
-        assert filePattern, 'filePattern must be specified when specifying groupBy variables'
-
-    file_column = 'intensity_image'
-
-    # Open each csv files
-    for feat_file in csv_files:
-        fpath = str(feat_file.absolute())
-        out = str(Path(outDir).joinpath(feat_file.name).absolute())
-        with open(fpath,'r') as fr:
-            with open(str(out),'w') as fw:
-                # Read the first line, which should contain headers
-                first_line = fr.readline()
-                headers = first_line.replace(file_column, 'file').rstrip('\n').split(',')
-                var_ind = {key:val for key,val in enumerate(headers)} # map headers to line positions
-                # If no column is labeled file, throw an error
-                if 'file' not in headers:
-                    raise ValueError('At least one column must have a header title file.')
-
-                # Generate the output dictionary template and format string
-                line_dict = {'file': 'NaN'}
-                for key in headers:
-                    if key == 'file':
-                        continue
-                    for stat in statistics:
-                        line_dict[key + '_' + stat] = 'NaN'
-
-                        # Generate the line template
-                line_template = ','.join([('{' + h + '}') for h in line_dict.keys()]) + '\n'
-
-                # Write headers to the new file
-                fw.write(','.join(line_dict.keys()) + '\n')
-
-                # Get the first line of data
-                line = fr.readline()
-
-                # Read each line in the stitching vector
-                fnum = 0
-                fcheck = 0
-                while line:
-                    # Parse the current line as a dictionary
-                    p_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))}
-                    for key,val in p_line.items():
-                        v = get_number(val)
-                        p_line[key] = [v]
-
-                    line = fr.readline()
-                    np_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))}
-                    match = False
-                    
-                    if filePattern:
-                        # The groupBy variable values for current file
-                        group_list = [
-                            {k:v for k,v in parse_filename(p_line['file'][0], regex, variables).items() if k in groupBy}
-                            ]
-                        
-                        p_line_vars = get_vars(p_line['file'], ind, regex, variables)
-                        np_line_vars = get_vars(np_line['file'], ind, regex, variables)
-                        
-                        if p_line_vars == np_line_vars:
-                            match = True
-                    
-                    else:
-                        if p_line['file'][0] == np_line['file']:
-                            match = True
-                    
-                    # Loop through rows until the filename changes
-                    while line and match:
-                        # Store the values in a feature list
-                        for key,val in np_line.items():
-                            if isinstance(val,str):
-                                # p_line[key].append(get_number(val[0]))
-                                p_line[key].append(get_number(val))
-                        
-                        if filePattern:
-                            # Add the file's groupBy variables to the output row label list
-                            group_list.append(
-                                {k:v for k,v in parse_filename(np_line['file'], regex, variables).items() 
-                                 if k in groupBy}
-                                )
-
-                        # Get the next line
-                        line = fr.readline()
-                        np_line = {var_ind[ind]:val for ind,val in enumerate(line.rstrip('\n').split(','))}
-                        
-                        match = False
-                        
-                        if filePattern and line:
-                            np_line_vars = get_vars(np_line['file'], ind, regex, variables)
-                            if p_line_vars == np_line_vars:
-                                match = True
-                                
-                    
-                        elif line:
-                            if p_line['file'][0] == np_line['file']:
-                                match = True
-
-                    # Get the mean of the feature list, save in the file dictionary
-                    for key,val in p_line.items():
-                        
-                        # Set the file name
-                        if key=='file':
-                            if filePattern:
-                                ind_list = {
-                                    k:v for k,v in parse_filename(val[0], regex, variables).items() if k not in groupBy
-                                    }
-                                line_dict['file'] = output_name(filePattern, group_list, ind_list)
-                            
-                            else:
-                                line_dict['file'] = val[0]
-                                
-                            continue
-
-                        # Grab only float values
-                        inp_data = [d for d in val if isinstance(d,float)]
-
-                        # If inp_data contains no floats, skip it
-                        if len(inp_data) == 0:
-                            continue
-
-                        # Calculate the statistics for the feature
-                        data_dict = {}
-                        for stat in statistics:
-                            STATS[stat](inp_data,data_dict)
-                            line_dict[key + '_' + stat] = data_dict[stat]
-                    fw.write(line_template.format(**line_dict))
-                    line_dict = {key:'NaN' for key in line_dict.keys()}
-
-                    # Checkpoint
-                    fnum += 1
-                    if fnum > fcheck:
-                        fcheck += 1
-                        logger.info('Unique Files parsed: {}'.format(fnum))
\ No newline at end of file
diff --git a/features/polus-csv-statistics-plugin/src/requirements.txt b/features/polus-csv-statistics-plugin/src/requirements.txt
deleted file mode 100644
index 6dd96c62d..000000000
--- a/features/polus-csv-statistics-plugin/src/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-filepattern==1.4.7
\ No newline at end of file
diff --git a/formats/arrow-to-tabular-tool/.bumpversion.cfg b/formats/arrow-to-tabular-tool/.bumpversion.cfg
deleted file mode 100644
index 7a2f0851d..000000000
--- a/formats/arrow-to-tabular-tool/.bumpversion.cfg
+++ /dev/null
@@ -1,27 +0,0 @@
-[bumpversion]
-current_version = 0.2.3
-commit = True
-tag = False
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
-serialize = 
-	{major}.{minor}.{patch}-{release}{dev}
-	{major}.{minor}.{patch}
-
-[bumpversion:part:release]
-optional_value = _
-first_value = dev
-values = 
-	dev
-	_
-
-[bumpversion:part:dev]
-
-[bumpversion:file:pyproject.toml]
-search = version = "{current_version}"
-replace = version = "{new_version}"
-
-[bumpversion:file:plugin.json]
-
-[bumpversion:file:VERSION]
-
-[bumpversion:file:src/polus/images/formats/arrow_to_tabular/__init__.py]
diff --git a/formats/arrow-to-tabular-tool/.gitignore b/formats/arrow-to-tabular-tool/.gitignore
deleted file mode 100644
index c4aa6d8e4..000000000
--- a/formats/arrow-to-tabular-tool/.gitignore
+++ /dev/null
@@ -1,175 +0,0 @@
- #Byte-compiled / optimized / DLL files
- __pycache__/
- *.py[cod]
- *$py.class
-
- # C extensions
- *.so
-
- # Distribution / packaging
- .Python
- build/
- develop-eggs/
- dist/
- downloads/
- eggs/
- .eggs/
- lib/
- lib64/
- parts/
- sdist/
- var/
- wheels/
- share/python-wheels/
- *.egg-info/
- .installed.cfg
- *.egg
- MANIFEST
-
- # PyInstaller
- #  Usually these files are written by a python script from a template
- #  before PyInstaller builds the exe, so as to inject date/other infos into it.
- *.manifest
- *.spec
-
- # Installer logs
- pip-log.txt
- pip-delete-this-directory.txt
-
- # Unit test / coverage reports
- htmlcov/
- .tox/
- .nox/
- .coverage
- .coverage.*
- .cache
- nosetests.xml
- coverage.xml
- *.cover
- *.py,cover
- .hypothesis/
- .pytest_cache/
- cover/
-
- # Translations
- *.mo
- *.pot
-
- # Django stuff:
- *.log
- local_settings.py
- db.sqlite3
- db.sqlite3-journal
-
- # Flask stuff:
- instance/
- .webassets-cache
-
- # Scrapy stuff:
- .scrapy
-
- # Sphinx documentation
- docs/_build/
-
- # PyBuilder
- .pybuilder/
- target/
-
- # Jupyter Notebook
- .ipynb_checkpoints
-
- # IPython
- profile_default/
- ipython_config.py
-
- # pyenv
- #   For a library or package, you might want to ignore these files since the code is
- #   intended to run in multiple environments; otherwise, check them in:
- # .python-version
-
- # pipenv
- #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
- #   However, in case of collaboration, if having platform-specific dependencies or dependencies
- #   having no cross-platform support, pipenv may install dependencies that don't work, or not
- #   install all needed dependencies.
- #Pipfile.lock
-
- # poetry
- #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
- #   This is especially recommended for binary packages to ensure reproducibility, and is more
- #   commonly ignored for libraries.
- #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
- poetry.lock
- ../../poetry.lock
-
- # pdm
- #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
- #pdm.lock
- #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
- #   in version control.
- #   https://pdm.fming.dev/#use-with-ide
- .pdm.toml
-
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
- __pypackages__/
-
- # Celery stuff
- celerybeat-schedule
- celerybeat.pid
-
- # SageMath parsed files
- *.sage.py
-
- # Environments
- .env
- .venv
- env/
- venv/
- ENV/
- env.bak/
- venv.bak/
-
- # Spyder project settings
- .spyderproject
- .spyproject
-
- # Rope project settings
- .ropeproject
-
- # mkdocs documentation
- /site
-
- # mypy
- .mypy_cache/
- .dmypy.json
- dmypy.json
-
- # Pyre type checker
- .pyre/
-
- # pytype static type analyzer
- .pytype/
-
- # Cython debug symbols
- cython_debug/
-
- # PyCharm
- #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
- #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
- #  and can be added to the global gitignore or merged into this file.  For a more nuclear
- #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
- #.idea/
-
- # vscode
- .vscode
-
- # test data directory
- data
- # yaml file
- .pre-commit-config.yaml
-
- # hidden files
- .DS_Store
- .ds_store
- # flake8
- .flake8
diff --git a/formats/arrow-to-tabular-tool/Dockerfile b/formats/arrow-to-tabular-tool/Dockerfile
deleted file mode 100644
index bba2b8535..000000000
--- a/formats/arrow-to-tabular-tool/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-FROM polusai/bfio:2.1.9
-
-# environment variables defined in polusai/bfio
-ENV EXEC_DIR="/opt/executables"
-ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".csv"
-ENV POLUS_LOG="INFO"
-
-# Work directory defined in the base container
-WORKDIR ${EXEC_DIR}
-
-# TODO: Change the tool_dir to the tool directory
-ENV TOOL_DIR="formats/arrow-to-tabular-tool"
-
-# Copy the repository into the container
-RUN mkdir image-tools
-COPY . ${EXEC_DIR}/image-tools
-
-# Install the tool
-RUN pip3 install "${EXEC_DIR}/image-tools/${TOOL_DIR}" --no-cache-dir
-
-# Set the entrypoint
-# TODO: Change the entrypoint to the tool entrypoint
-ENTRYPOINT ["python3", "-m", "polus.images.formats.arrow_to_tabular"]
-CMD ["--help"]
diff --git a/formats/arrow-to-tabular-tool/README.md b/formats/arrow-to-tabular-tool/README.md
deleted file mode 100644
index 5b9d36e25..000000000
--- a/formats/arrow-to-tabular-tool/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Arrow to Tabular (v0.2.0)
-This WIPP plugin allows analysts to convert Arrow Feather File Format (V2) into the following file formats for researchers: \
-    - `.parquet` \
-    - `.csv`
-
-Contact [Kelechi Nina Mezu](mailto:nina.mezu@nih.gov), [Hamdah Shafqat Abbasi](mailto:hamdahshafqat.abbasi@nih.gov) for more information.
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`bash build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the
-contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes two input arguments and one output argument:
-
-| Name            | Description                                                  | I/O    | Type       |
-| --------------- | ------------------------------------------------------------ | ------ | ---------- |
-| `--inpDir`      | Input generic data collection to be processed by this plugin | Input  | collection |
-| `--fileFormat`  | Filename pattern to convert                                  | Input  | string     |
-| `--outDir`      | Output collection                                            | Output | collection |
-| `--preview`     | Generate a JSON file with outputs                            | Output | JSON       |
diff --git a/formats/arrow-to-tabular-tool/VERSION b/formats/arrow-to-tabular-tool/VERSION
deleted file mode 100644
index 717903969..000000000
--- a/formats/arrow-to-tabular-tool/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.2.3
diff --git a/formats/arrow-to-tabular-tool/arrowtotabular.cwl b/formats/arrow-to-tabular-tool/arrowtotabular.cwl
deleted file mode 100644
index df3754cbc..000000000
--- a/formats/arrow-to-tabular-tool/arrowtotabular.cwl
+++ /dev/null
@@ -1,28 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  fileFormat:
-    inputBinding:
-      prefix: --fileFormat
-    type: string
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/arrow-to-tabular-tool:0.2.3-dev0
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/formats/arrow-to-tabular-tool/build-docker.sh b/formats/arrow-to-tabular-tool/build-docker.sh
deleted file mode 100755
index f75e537e8..000000000
--- a/formats/arrow-to-tabular-tool/build-docker.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-
-#!/bin/bash
-
-# Change the name of the tool here
-tool_dir="formats"
-tool_name="arrow-to-tabular-tool"
-
-# The version is read from the VERSION file
-version=$(<VERSION)
-tag="polusai/${tool_name}:${version}"
-echo "Building docker image with tag: ${tag}"
-
-# The current directory and the repository root are saved in variables
-cur_dir=$(pwd)
-repo_root=$(git rev-parse --show-toplevel)
-
-# The Dockerfile and .dockerignore files are copied to the repository root before building the image
-cd ${repo_root}
-cp ./${tool_dir}/${tool_name}/Dockerfile .
-cp .gitingore .dockerignore
-docker build . -t ${tag}
-rm Dockerfile .dockerignore
-cd ${cur_dir}
diff --git a/formats/arrow-to-tabular-tool/ict.yaml b/formats/arrow-to-tabular-tool/ict.yaml
deleted file mode 100644
index 3c5880a9a..000000000
--- a/formats/arrow-to-tabular-tool/ict.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-author:
-- Kelechi Nina
-- Hamdah Shafqat
-contact: nina.mezu@nih.gov
-container: polusai/arrow-to-tabular-tool:0.2.3-dev0
-description: WIPP plugin to converts Arrow file format to Tabular Data.
-entrypoint: python3 -m polus.images.formats.arrow_to_tabular
-inputs:
-- description: Output file type to convert from feather file
-  format:
-  - enum
-  name: fileFormat
-  required: true
-  type: string
-- description: Input image collection to be processed by this plugin
-  format:
-  - genericData
-  name: inpDir
-  required: true
-  type: path
-name: polusai/ArrowtoTabular
-outputs:
-- description: Output collection
-  format:
-  - genericData
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/PolusAI/polus-plugins
-specVersion: 1.0.0
-title: Arrow to Tabular
-ui:
-- description: 'Output file type to convert from feather file. ex: .parquet or .csv'
-  fields:
-  - .csv
-  - .parquet
-  - default
-  key: inputs.fileFormat
-  title: Filename pattern
-  type: select
-- description: Input generic data collection to be processed by this plugin
-  key: inputs.inpDir
-  title: Input collection
-  type: path
-version: 0.2.3-dev0
diff --git a/formats/arrow-to-tabular-tool/package-release.sh b/formats/arrow-to-tabular-tool/package-release.sh
deleted file mode 100755
index 98bf85f66..000000000
--- a/formats/arrow-to-tabular-tool/package-release.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-# This script is designed to help package a new version of a plugin
-
-# Get the new version
-version=$(<VERSION)
-
-# Bump the version
-bump2version --config-file bumpversion.cfg --new-version ${version} --allow-dirty part
-
-# Build the container
-./build-docker.sh
-
-# Push to dockerhub
-docker push polusai/arrow-to-tabular-plugin:${version}
diff --git a/formats/arrow-to-tabular-tool/plugin.json b/formats/arrow-to-tabular-tool/plugin.json
deleted file mode 100644
index a78e1b465..000000000
--- a/formats/arrow-to-tabular-tool/plugin.json
+++ /dev/null
@@ -1,59 +0,0 @@
-{
-  "name": "Arrow to Tabular",
-  "version": "0.2.3",
-  "title": "Arrow to Tabular",
-  "description": "WIPP plugin to converts Arrow file format to Tabular Data.",
-  "author": "Kelechi Nina Mezu (nina.mezu@nih.gov), Hamdah Shafqat Abbasi (hamdahshafqat.abbasi@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/PolusAI/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/arrow-to-tabular-tool:0.2.3",
-  "baseCommand": [
-    "python3",
-    "-m",
-    "polus.images.formats.arrow_to_tabular"
-  ],
-  "inputs": [
-    {
-      "name": "fileFormat",
-      "type": "enum",
-      "description": "Output file type to convert from feather file",
-      "default": "default",
-      "options": {
-        "values": [
-          ".csv",
-          ".parquet",
-          "default"
-        ]
-      },
-      "required": true
-    },
-    {
-      "name": "inpDir",
-      "type": "genericData",
-      "description": "Input image collection to be processed by this plugin",
-      "required": true
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "genericData",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.fileFormat",
-      "title": "Filename pattern",
-      "description": "Output file type to convert from feather file. ex: .parquet or .csv",
-      "default": ".csv"
-    },
-    {
-      "key": "inputs.inpDir",
-      "title": "Input collection",
-      "description": "Input generic data collection to be processed by this plugin"
-    }
-  ]
-}
diff --git a/formats/arrow-to-tabular-tool/pyproject.toml b/formats/arrow-to-tabular-tool/pyproject.toml
deleted file mode 100644
index 7d692e6ad..000000000
--- a/formats/arrow-to-tabular-tool/pyproject.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-[tool.poetry]
-name = "polus-images-formats-arrow-to-tabular"
-version = "0.2.3"
-description = "This package converts Arrow file format to Tabular csv or parquet data"
-authors = ["Kelechi Nina Mezu <nina.mezu@nih.gov>","Hamdah Shafqat abbasi <hamdahshafqat.abbasi@nih.gov>"]
-readme = "README.md"
-packages = [{include = "polus", from = "src"}]
-
-[tool.poetry.dependencies]
-python = ">=3.9,<3.12"
-filepattern = "^2.0.4"
-typer = "^0.7.0"
-tqdm = "^4.64.1"
-blake3 = "^0.3.3"
-fcsparser = "^0.2.4"
-llvmlite = "^0.39.1"
-fastapi = "^0.92.0"
-vaex = "^4.7.0"
-
-
-[tool.poetry.group.dev.dependencies]
-bump2version = "^1.0.1"
-pre-commit = "^3.1.0"
-black = "^23.1.0"
-flake8 = "^6.0.0"
-mypy = "^1.0.1"
-pytest = "^7.2.1"
-pandas = "^1.5.3"
-
-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
diff --git a/formats/arrow-to-tabular-tool/run-plugin.sh b/formats/arrow-to-tabular-tool/run-plugin.sh
deleted file mode 100755
index 22f347eb2..000000000
--- a/formats/arrow-to-tabular-tool/run-plugin.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-#!/bin/bash
-version=$(<VERSION)
-datapath=$(readlink --canonicalize data)
-
-# Inputs
-inpDir=/data/input
-
-# Output paths
-outDir=/data/output
-
-# Output tabular file format
-fileFormat='.csv'
-
-# Show the help options
-docker run polusai/arrow-to-tabular-plugin:${version}
-
-# Run the plugin
-docker run --mount type=bind,source=${datapath},target=/data/  \
-            --env POLUS_LOG=${LOGLEVEL} \
-            polusai/arrow-to-tabular-plugin:${version} \
-            --inpDir ${inpDir} \
-            --fileFormat ${fileFormat} \
-            --outDir ${outDir}
diff --git a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__init__.py b/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__init__.py
deleted file mode 100644
index 9a724a7f3..000000000
--- a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Arrow to Tabular."""
-__version__ = "0.2.3"
-
-from .arrow_to_tabular import arrow_tabular
diff --git a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__main__.py b/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__main__.py
deleted file mode 100644
index dc6040323..000000000
--- a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/__main__.py
+++ /dev/null
@@ -1,111 +0,0 @@
-"""Arrow to Tabular."""
-import json
-import logging
-import os
-import pathlib
-from concurrent.futures import ProcessPoolExecutor, as_completed
-from multiprocessing import cpu_count
-from typing import Any, Optional
-
-import filepattern as fp
-import typer
-from tqdm import tqdm
-
-from polus.images.formats.arrow_to_tabular.arrow_to_tabular import (
-    Format,
-    arrow_tabular,
-)
-
-# Set number of processors for scalability
-max_workers = max(1, cpu_count() // 2)
-
-# Initialize the logger
-logging.basicConfig(
-    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
-    datefmt="%d-%b-%y %H:%M:%S",
-)
-logger = logging.getLogger("polus.images.formats.arrow_to_tabular")
-
-
-def main(
-    inp_dir: pathlib.Path = typer.Option(
-        ...,
-        "--inpDir",
-        help="Path to the input data",
-    ),
-    file_format: Format = typer.Option(
-        None, "--fileFormat", help="Filepattern of desired tabular output file"
-    ),
-    out_dir: pathlib.Path = typer.Option(..., "--outDir", help="Output collection"),
-    preview: Optional[bool] = typer.Option(
-        False,
-        "--preview",
-        help="Output a JSON preview of outputs produced by this plugin",
-    ),
-) -> None:
-    """Execute Main function."""
-    logger.info(f"inpDir = {inp_dir}")
-    logger.info(f"outDir = {out_dir}")
-    logger.info(f"fileFormat = {file_format}")
-
-    inp_dir = inp_dir.resolve()
-    out_dir = out_dir.resolve()
-
-    assert inp_dir.exists(), f"{inp_dir} doesnot exists!! Please check input path again"
-    assert (
-        out_dir.exists()
-    ), f"{out_dir} doesnot exists!! Please check output path again"
-    FILE_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
-
-    if file_format == Format.Default:
-        file_format = FILE_EXT
-    elif file_format == Format.CSV:
-        file_format = ".csv"
-    elif file_format == Format.PARQUET:
-        file_format = ".parquet"
-    elif file_format == None:
-        file_format = FILE_EXT
-
-    assert file_format in [
-        ".csv",
-        ".parquet",
-    ], f"This tabular file format: {file_format} is not support supported by this plugin!! Choose either CSV or Parquet FileFormat"
-
-    pattern_list = [".feather", ".arrow"]
-    pattern = [f.suffix for f in inp_dir.iterdir() if f.suffix in pattern_list][0]
-    assert (
-        pattern in pattern_list
-    ), f"This input file extension {pattern} is not support supported by this plugin!! It should be either .feather and .arrow files"
-    filepattern = {".feather": ".*.feather", ".arrow": ".*.arrow"}
-
-    featherPattern = filepattern[pattern]
-
-    fps = fp.FilePattern(inp_dir, featherPattern)
-
-    if preview:
-        with open(pathlib.Path(out_dir, "preview.json"), "w") as jfile:
-            out_json: dict[str, Any] = {
-                "filepattern": featherPattern,
-                "outDir": [],
-            }
-            for file in fps():
-                out_name = str(file[1][0].stem) + file_format
-                out_json["outDir"].append(out_name)
-            json.dump(out_json, jfile, indent=2)
-
-    with ProcessPoolExecutor(max_workers) as executor:
-        processes = []
-        for files in fps:
-            file = files[1][0]
-            processes.append(executor.submit(arrow_tabular, file, file_format, out_dir))
-
-        for process in tqdm(
-            as_completed(processes), desc="Arrow --> Tabular", total=len(processes)
-        ):
-            process.result()
-
-    logger.info("Finished all processes!")
-
-
-if __name__ == "__main__":
-    typer.run(main)
diff --git a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/arrow_to_tabular.py b/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/arrow_to_tabular.py
deleted file mode 100644
index 719f324bb..000000000
--- a/formats/arrow-to-tabular-tool/src/polus/images/formats/arrow_to_tabular/arrow_to_tabular.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""Arrow to Tabular."""
-import logging
-import pathlib
-
-from enum import Enum
-import vaex
-
-logger = logging.getLogger(__name__)
-
-
-
-class Format(str, Enum):
-     """Extension types to be converted."""
-     CSV = ".csv"
-     PARQUET = ".parquet"
-     Default = "default"
-
-
-def arrow_tabular(file: pathlib.Path, file_format: str, out_dir: pathlib.Path) -> None:
-    """Convert Arrow file into tabular file.
-    This plugin uses vaex to open an arrow file and converts into csv or parquet tabular data.
-
-    Args:
-        file : Path to input file.
-        file_format : Filepattern of desired tabular output file.
-        out_dir: Path to output directory.
-    """
-    file_name = pathlib.Path(file).stem
-    logger.info("Arrow Conversion: Copy ${file_name} into outDir for processing...")
-
-    output_file = pathlib.Path(out_dir, (file_name + file_format))
-
-    logger.info("Arrow Conversion: Converting file into PyArrow Table")
-
-    data = vaex.open(file)
-    logger.info("Arrow Conversion: table converted")
-    ncols = len(data)
-    chunk_size = max([2**24 // ncols, 1])
-
-    logger.info("Arrow Conversion: checking for file format")
-
-    if file_format == ".csv":
-        logger.info("Arrow Conversion: Converting PyArrow Table into .csv file")
-        # Streaming contents of Arrow Table into csv
-        return data.export_csv(output_file, chunksize=chunk_size)
-
-    elif file_format == ".parquet":
-        logger.info("Arrow Conversion: Converting PyArrow Table into .parquet file")
-        return data.export_parquet(output_file)
-    else:
-        logger.error(
-            "Arrow Conversion Error: This format is not supported in this plugin"
-        )
diff --git a/formats/arrow-to-tabular-tool/tests/__init__.py b/formats/arrow-to-tabular-tool/tests/__init__.py
deleted file mode 100644
index d7bcf679b..000000000
--- a/formats/arrow-to-tabular-tool/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Arrow to Tabular."""
diff --git a/formats/arrow-to-tabular-tool/tests/test_main.py b/formats/arrow-to-tabular-tool/tests/test_main.py
deleted file mode 100644
index 9dd214714..000000000
--- a/formats/arrow-to-tabular-tool/tests/test_main.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""Testing of Arrow to Tabular plugin."""
-import os
-import pathlib
-import random
-import string
-
-import filepattern as fp
-import numpy as np
-import pandas as pd
-import pytest
-from polus.images.formats.arrow_to_tabular.arrow_to_tabular import arrow_tabular
-
-
-@pytest.fixture()
-def generate_arrow():
-    """Create pandas dataframe and convert into to arrow file format."""
-    dirpath = os.path.abspath(os.path.join(__file__, "../.."))
-    inpDir = pathlib.Path(dirpath, "data/input")
-    outDir = pathlib.Path(dirpath, "data/output")
-    if not inpDir.exists():
-        inpDir.mkdir(parents=True, exist_ok=True)
-    if not outDir.exists():
-        outDir.mkdir(exist_ok=True, parents=True)
-
-    df = pd.DataFrame(
-        {
-            "A": [random.choice(string.ascii_letters) for i in range(100)],
-            "B": np.random.randint(low=1, high=100, size=100),
-            "C": np.random.normal(0.0, 1.0, size=100),
-        },
-    )
-    df.to_feather(pathlib.Path(inpDir, "data.arrow"))
-    df.to_feather(pathlib.Path(inpDir, "data1.arrow"))
-
-    return inpDir, outDir
-
-
-def test_arrow_tabular(generate_arrow):
-    """Test of Arrow to Parquet file format."""
-    pattern = ".parquet"
-    filePattern = {".csv": ".*.csv", ".parquet": ".*.parquet"}
-    out_pattern = filePattern[pattern]
-    in_pattern = ".*.arrow"
-    fps = fp.FilePattern(generate_arrow[0], in_pattern)
-    for file in fps():
-        arrow_tabular(file[1][0], pattern, generate_arrow[1])
-
-    assert (
-        all(
-            file[1][0].suffix
-            for file in fp.FilePattern(generate_arrow[1], out_pattern)()
-        )
-        is True
-    )
-    [os.remove(f) for f in generate_arrow[1].iterdir() if f.name.endswith(pattern)]
-
-    pattern = ".csv"
-    out_pattern = filePattern[pattern]
-    fps = fp.FilePattern(generate_arrow[0], in_pattern)
-    for file in fps():
-        arrow_tabular(file[1][0], pattern, generate_arrow[1])
-
-    assert (
-        all(
-            file[1][0].suffix
-            for file in fp.FilePattern(generate_arrow[1], out_pattern)()
-        )
-        is True
-    )
diff --git a/formats/polus-fcs-to-csv-converter-plugin/Dockerfile b/formats/polus-fcs-to-csv-converter-plugin/Dockerfile
deleted file mode 100644
index 78be1a4e0..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/Dockerfile
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM polusai/bfio:2.1.9
-
-COPY VERSION ${EXEC_DIR}
-COPY src ${EXEC_DIR}/
-
-RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir
-
-ENTRYPOINT ["python3", "/opt/executables/main.py"]
\ No newline at end of file
diff --git a/formats/polus-fcs-to-csv-converter-plugin/README.md b/formats/polus-fcs-to-csv-converter-plugin/README.md
deleted file mode 100644
index fd4dc62a3..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/README.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# Fcs to Csv file converter
-
-The fcs to csv file converter plugin converts fcs file to csv file.The input file should be in .fcs file format and output will be .csv file format.
-
-## Input:
-The input should be a file in fcs format.
-
-## Output:
-The output is a csv file.
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes eight input argument and one output argument:
-
-| Name       | Description               | I/O    | Type          |
-| ---------- | ------------------------- | ------ | ------------- |
-| `--inpDir` | Input fcs file collection | Input  | collection    |
-| `--outDir` | Output collection         | Output | csvCollection |
-
-
diff --git a/formats/polus-fcs-to-csv-converter-plugin/VERSION b/formats/polus-fcs-to-csv-converter-plugin/VERSION
deleted file mode 100644
index 28af839c0..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.2.5
\ No newline at end of file
diff --git a/formats/polus-fcs-to-csv-converter-plugin/build-docker.sh b/formats/polus-fcs-to-csv-converter-plugin/build-docker.sh
deleted file mode 100644
index 9a33106b5..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/fcs-to-csv-converter-plugin:${version}
\ No newline at end of file
diff --git a/formats/polus-fcs-to-csv-converter-plugin/fcstocsvfileconverter.cwl b/formats/polus-fcs-to-csv-converter-plugin/fcstocsvfileconverter.cwl
deleted file mode 100644
index c5a18f7d7..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/fcstocsvfileconverter.cwl
+++ /dev/null
@@ -1,24 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/fcs-to-csv-converter-plugin:0.2.5
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/formats/polus-fcs-to-csv-converter-plugin/ict.yaml b/formats/polus-fcs-to-csv-converter-plugin/ict.yaml
deleted file mode 100644
index cf47ce872..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/ict.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-author:
-- Jayapriya Nagarajan
-contact: jayapriya.nagarajan@nih.gov
-container: polusai/fcs-to-csv-converter-plugin:0.2.5
-description: Converts fcs file to csv file
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Fcs file collection
-  format:
-  - collection
-  name: inpDir
-  required: true
-  type: path
-name: polusai/FcstoCsvfileconverter
-outputs:
-- description: Output collection
-  format:
-  - csvCollection
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: Fcs to Csv file converter
-ui:
-- description: Fcs file collection
-  key: inputs.inpDir
-  title: Fcs file collection
-  type: path
-version: 0.2.5
diff --git a/formats/polus-fcs-to-csv-converter-plugin/plugin.json b/formats/polus-fcs-to-csv-converter-plugin/plugin.json
deleted file mode 100644
index 1e76ee40c..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/plugin.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "name": "Fcs to Csv file converter",
-  "version": "0.2.5",
-  "title": "Fcs to Csv file converter",
-  "description": "Converts fcs file to csv file",
-  "author": "Jayapriya Nagarajan (jayapriya.nagarajan@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/fcs-to-csv-converter-plugin:0.2.5",
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "collection",
-      "description": "Fcs file collection",
-      "required": "true"
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "csvCollection",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Fcs file collection",
-      "description": "Fcs file collection"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/formats/polus-fcs-to-csv-converter-plugin/src/main.py b/formats/polus-fcs-to-csv-converter-plugin/src/main.py
deleted file mode 100644
index 6866d35ff..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/src/main.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from pathlib import Path
-import fcsparser
-import os
-import argparse
-import logging
-
-
-# Initialize the logger
-logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
-                    datefmt='%d-%b-%y %H:%M:%S')
-logger = logging.getLogger("main")
-logger.setLevel(logging.INFO)
-
-def fcs_csv(file,outDir):
-    """Convert fcs file to csv.
-    
-    Args:
-        file (str): Path to the directory containing the fcs file.
-        outDir (str): Path to save the output csv file.
-        
-    Returns:
-        Converted csv file.
-        
-    """
-    file_name = Path(file).stem
-    logger.info('Started converting the fcs file ' + file_name)
-    meta, data = fcsparser.parse(file, meta_data_only=False, reformat_meta=True)
-    logger.info('Saving csv file ' + file_name)
-    #Export the file as csv
-    os.chdir(outDir)
-    export_csv = data.to_csv (r'%s.csv'%file_name, index = None, header=True,encoding='utf-8-sig')
-    return export_csv
-
-# Setup the argument parsing
-def main():
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='Convert fcs file to csv file.')
-    parser.add_argument('--inpDir',                       #Path to select files in input directory
-                        dest='inpDir', 
-                        type=str,
-                        help='Input fcs file collection', 
-                        required=True)
-    parser.add_argument('--outDir',                       #Path to save files in output directory
-                        dest='outDir', 
-                        type=str,
-                        help='Output csv collection', 
-                        required=True)
-    
-    # Parse the arguments
-    args = parser.parse_args()
-    
-    #Path to input file directory
-    inpDir = args.inpDir
-    logger.info('inpDir = {}'.format(inpDir))
-    
-    #Path to save output csv files
-    outDir = args.outDir
-    logger.info('outDir = {}'.format(outDir))
-    
-    inp_dir = Path(inpDir)
-    inpdir_meta = inp_dir.parent.joinpath('metadata_files')
-    if not inpdir_meta.is_dir():
-        raise FileNotFoundError('metadata_files not found')
-    
-    #List the files in the directory
-    logger.info('Checking for .fcs files in the directory ')
-    fcs_filelist = list(Path(inpdir_meta).glob('*.fcs'))
-    if not fcs_filelist:
-        raise FileNotFoundError('No .fcs files found in the directory.' )
-         
-    for each_file in fcs_filelist:
-        #Read the fcs file and convert to csv file
-        csv_file = fcs_csv(each_file, outDir)
-    logger.info("Finished all processes!")
-
-if __name__ == "__main__":
-    main()
-        
diff --git a/formats/polus-fcs-to-csv-converter-plugin/src/requirements.txt b/formats/polus-fcs-to-csv-converter-plugin/src/requirements.txt
deleted file mode 100644
index 6ece1ecb4..000000000
--- a/formats/polus-fcs-to-csv-converter-plugin/src/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-fcsparser==0.2.1
diff --git a/transforms/tabular/polus-csv-merger-plugin/README.md b/transforms/tabular/polus-csv-merger-plugin/README.md
deleted file mode 100644
index 704f340fa..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# CSV Row Merger
-
-This WIPP plugin merges all csv files in a csv collection into one or more csv files using either row or column merging.
-
-**If row merging**, csv files are assumed to have headers (column titles) in the first row. If headers are not the same between all files, csv files that don't have a specific column header will have the column filled with 'NaN' values. A column titled `file` is created in the output file, and this contains the name of the original input csv file associated with the row of data. **This plugin creates a csvCollection with a single csv file.**
-
-**If column merging**, it is assumed that all files have a column titled `file` that is used to merge columns across csv files. If some files have a `file` column value that does not match another csv file, then a new row is generated with the specified value in `file` and missing column values are filled with `NaN` values. **This plugin creates a csvCollection with a single csv file.**
-
-**When column merging, if sameRows==true**, then no `file` column needs to be present. All files with the same number of columns will be merged into one csv file. **This plugin creates a csvCollection with as many csv files as there are unique numbers of rows in the csv collection.**
-
-If `stripExtension` is set to true, then the `.csv` file extension is removed from the file name in the `file` column.
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes two input argument and one output argument:
-
-| Name               | Description                                                | I/O    | Type          |
-|--------------------|------------------------------------------------------------|--------|---------------|
-| `--inpDir`         | Input image collection to be processed by this plugin      | Input  | collection    |
-| `--stripExtension` | Should csv be removed from the filename in the output file | Input  | boolean       |
-| `--dim`            | Perform `rows` or `columns` merger                         | Input  | string        |
-| `--sameRows`       | Only merge csv files with the same number of rows?         | Input  | boolean       |
-| `--outDir`         | Output csv file                                            | Output | csvCollection |
-
diff --git a/transforms/tabular/polus-csv-merger-plugin/VERSION b/transforms/tabular/polus-csv-merger-plugin/VERSION
deleted file mode 100644
index 60a2d3e96..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.4.0
\ No newline at end of file
diff --git a/transforms/tabular/polus-csv-merger-plugin/build-docker.sh b/transforms/tabular/polus-csv-merger-plugin/build-docker.sh
deleted file mode 100755
index 758b23c9f..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/csv-merger-plugin:${version}
\ No newline at end of file
diff --git a/transforms/tabular/polus-csv-merger-plugin/csvmerger.cwl b/transforms/tabular/polus-csv-merger-plugin/csvmerger.cwl
deleted file mode 100644
index 12855a69b..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/csvmerger.cwl
+++ /dev/null
@@ -1,36 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  dim:
-    inputBinding:
-      prefix: --dim
-    type: string
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  sameRows:
-    inputBinding:
-      prefix: --sameRows
-    type: boolean
-  stripExtension:
-    inputBinding:
-      prefix: --stripExtension
-    type: boolean
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/csv-merger-plugin:0.4.0
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/transforms/tabular/polus-csv-merger-plugin/ict.yaml b/transforms/tabular/polus-csv-merger-plugin/ict.yaml
deleted file mode 100644
index 2fe4cf2a3..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/ict.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-author:
-- Nicholas Schaub
-contact: nick.schaub@nih.gov
-container: polusai/csv-merger-plugin:0.4.0
-description: Merge all csv files in a csv collection into a single csv file.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input image collection to be processed by this plugin
-  format:
-  - csvCollection
-  name: inpDir
-  required: true
-  type: path
-- description: Should csv be removed from the filename when indicating which file
-    a row in a csv file came from?
-  format:
-  - boolean
-  name: stripExtension
-  required: true
-  type: boolean
-- description: Merging dimension
-  format:
-  - enum
-  name: dim
-  required: true
-  type: string
-- description: Perform column merge on all files with the same number of rows?
-  format:
-  - boolean
-  name: sameRows
-  required: true
-  type: boolean
-name: polusai/CSVMerger
-outputs:
-- description: Output csv file
-  format:
-  - csvCollection
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: CSV Merger
-ui:
-- description: Input image collection to be processed by this plugin
-  key: inputs.inpDir
-  title: Input collection
-  type: path
-- default: true
-  description: Should csv be removed from the filename when indicating which file
-    a row in a csv file came from?
-  key: inputs.stripExtension
-  title: Remove CSV Extension
-  type: checkbox
-- description: Merge along rows or columns?
-  fields:
-  - rows
-  - columns
-  key: inputs.dim
-  title: Merging dimension
-  type: select
-- condition: inputs.dim=='columns'
-  default: false
-  description: Merge only csvs with matching number of rows?
-  key: inputs.sameRows
-  title: 'Merge CSVs with equal rows:'
-  type: checkbox
-version: 0.4.0
diff --git a/transforms/tabular/polus-csv-merger-plugin/plugin.json b/transforms/tabular/polus-csv-merger-plugin/plugin.json
deleted file mode 100644
index c6a710ad4..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/plugin.json
+++ /dev/null
@@ -1,77 +0,0 @@
-{
-  "name": "CSV Merger",
-  "version": "0.4.0",
-  "title": "CSV Merger",
-  "description": "Merge all csv files in a csv collection into a single csv file.",
-  "author": "Nicholas Schaub (nick.schaub@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/csv-merger-plugin:0.4.0",
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "csvCollection",
-      "description": "Input image collection to be processed by this plugin",
-      "required": true
-    },
-    {
-      "name": "stripExtension",
-      "type": "boolean",
-      "description": "Should csv be removed from the filename when indicating which file a row in a csv file came from?",
-      "required": true
-    },
-    {
-      "name": "dim",
-      "type": "enum",
-      "options": {
-        "values": [
-          "rows",
-          "columns"
-        ]
-      },
-      "description": "Merging dimension",
-      "required": true
-    },
-    {
-      "name": "sameRows",
-      "type": "boolean",
-      "description": "Perform column merge on all files with the same number of rows?",
-      "required": true
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "csvCollection",
-      "description": "Output csv file"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Input collection",
-      "description": "Input image collection to be processed by this plugin"
-    },
-    {
-      "key": "inputs.stripExtension",
-      "title": "Remove CSV Extension",
-      "description": "Should csv be removed from the filename when indicating which file a row in a csv file came from?",
-      "default": true
-    },
-    {
-      "key": "inputs.dim",
-      "title": "Merging dimension",
-      "description": "Merge along rows or columns?",
-      "default": true
-    },
-    {
-      "key": "inputs.sameRows",
-      "title": "Merge CSVs with equal rows:",
-      "description": "Merge only csvs with matching number of rows?",
-      "default": false,
-      "condition": "model.inputs.dim=='columns'"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/transforms/tabular/polus-csv-merger-plugin/run-plugin.sh b/transforms/tabular/polus-csv-merger-plugin/run-plugin.sh
deleted file mode 100644
index 67db6b6a2..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/run-plugin.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-
-#!/bin/bash
-version=$(<VERSION)
-datapath=''
-
-# Inputs
-inpDir=/data/cytoplasm
-
-# Output paths
-outDir=/data/output
-
-#Other params
-stripExtension=false
-dim=rows
-sameRows= true
-
-# Log level, must be one of ERROR, CRITICAL, WARNING, INFO, DEBUG
-LOGLEVEL=INFO
-
-docker run --mount type=bind,source=${datapath},target=/data/  \
-            --env POLUS_LOG=${LOGLEVEL} \
-            polusai/polus-csv-merger-plugin:${version} \
-            --inpDir ${inpDir} \
-            --stripExtension ${stripExtension} \
-            --dim ${dim} \
-            --outDir ${outDir}
-            
\ No newline at end of file
diff --git a/transforms/tabular/polus-csv-merger-plugin/src/main.py b/transforms/tabular/polus-csv-merger-plugin/src/main.py
deleted file mode 100644
index f38965ca7..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/src/main.py
+++ /dev/null
@@ -1,242 +0,0 @@
-import argparse
-import logging 
-import os
-import csv
-import numpy as np
-from io import StringIO
-import copy
-from pathlib import Path
-import logging
-import vaex
-import pandas as pd
-import shutil
-import functools as ft
-
-POLUS_LOG = getattr(logging, os.environ.get('POLUS_LOG', 'INFO'))
-
-FILE_EXT = os.environ.get('POLUS_EXT', '.csv')
-
-if __name__=="__main__":
-    # Initialize the logger
-    logging.basicConfig(format='%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s',
-                        datefmt='%d-%b-%y %H:%M:%S')
-    logger = logging.getLogger("main")
-    logger.setLevel(logging.INFO)
-
-
-    # Setup the argument parsing
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='Merge all csv files in a csv collection into a single csv file.')
-    parser.add_argument('--inpDir', dest='inpDir', type=str,
-                        help='Input image collection to be processed by this plugin', required=True)
-    parser.add_argument('--stripExtension', dest='stripExtension', type=str,
-                        help='Should csv be removed from the filename when indicating which file a row in a csv file came from?', required=True)
-    parser.add_argument('--outDir', dest='outDir', type=str,
-                        help='Output csv file', required=True)
-    parser.add_argument('--dim', dest='dim', type=str,
-                        help='rows or columns', required=True)
-    parser.add_argument('--sameRows', dest='sameRows', type=str,
-                        help='Only merge csvs if they contain the same number of rows', required=False)
-    
-        
-    # Parse the arguments
-    args = parser.parse_args()
-    inpDir = args.inpDir
-    logger.info('inpDir = {}'.format(inpDir))
-    stripExtension = args.stripExtension == 'true'
-    logger.info('stripExtension = {}'.format(stripExtension))
-    outDir = args.outDir
-    logger.info('outDir = {}'.format(outDir))
-    dim = args.dim
-    logger.info('dim = {}'.format(dim))
-    same_rows = args.sameRows == 'true'
-    logger.info('sameRows = {}'.format(stripExtension))
-    
-    # Get input files
-    inpDir_files = [str(f.absolute()) for f in Path(inpDir).iterdir() if f.name.endswith('.csv')]
-    inpDir_files.sort() # be a little fancy and merge alphabetically
-    
-    ''' If sameRows is set to true, nothing fancy to do. Just do the work and get out '''
-    # Case One: If merging by columns and have same rows:
-    if dim=='columns' and same_rows:
-        logger.info("Merging data with identical number of rows...")
-            
-        # Determine the number of output files, and a list of files to be merged in each file
-        out_files = {}
-        for f in inpDir_files:
-            with open(f,'r') as fr:
-                count = sum(1 for row in fr)
-            if count not in out_files.keys():
-                out_files[count] = [f]
-            else:
-                out_files[count].append(f)
-            
-        count = 1
-        for key in out_files.keys():
-            
-            outPath = str(Path(outDir).joinpath('merged_{}.csv'.format(count)).absolute())
-            
-            count += 1
-            
-            inp_files = [open(f) for f in out_files[key]]
-            
-            if FILE_EXT == ".feather":
-                dfs = list()
-                for l in range(key):
-                    for f in inpDir_files:
-                        df = pd.read_csv(f)
-                        dfs.append(df)
-                        df_final = ft.reduce(lambda left, right: pd.merge(left, right), dfs)
-                        vaex_df = vaex.from_pandas(df_final)
-                        vaex_df.export(outPath)
-            else:
-                with open(outPath,'w') as fw:
-                    for l in range(key):
-                        fw.write(','.join([f.readline().rstrip('\n') for f in inp_files]))
-                        fw.write('\n')
-                        
-    else:
-        # Get the column headers
-        logger.info("Getting all unique headers...")
-        headers = set([])
-        identifiers = {}
-        for in_file in inpDir_files:
-            with open(in_file,'r') as fr:
-                
-                # Get header information
-                line = fr.readline()
-                
-                if dim=='columns' and 'file' not in line[0:-1].split(','):
-                    ValueError('No file columns found in csv: {}'.format(in_file))
-                
-                h = line.rstrip('\n').split(',')
-                headers.update(h)
-                
-                # Check to see if column identifiers for Plots API exist in the 2nd row
-                line = fr.readline()
-                ident = line.rstrip('\n').split(',')
-                no_identifier = sum(1 for f in ident if f not in 'FC')
-                               
-        if 'file' in headers:
-            headers.remove('file')
-        headers = list(headers)
-        headers.sort()
-        headers.insert(0,'file')
-        if identifiers:
-            for h in headers:
-                if h not in identifiers.keys():
-                    identifiers[h] = 'F'
-        logger.info("Unique headers: {}".format(headers))
-
-        # Generate the line template
-        line_template = ','.join([('{' + h + '}') for h in headers]) + '\n'
-        line_dict = {key:'NaN' for key in headers}
-        
-        # Generate the path to the output file
-        outPath = str(Path(outDir).joinpath('merged.feather').absolute()) if FILE_EXT == 'feather' else str(Path(outDir).joinpath('merged.csv').absolute())
-        
-        # Case Two: Merger along rows only
-        if dim=='rows':
-            logger.info("Merging the data along rows...")
-            with open(outPath,'w') as out_file:
-                out_file.write(','.join(headers) + '\n')
-                if identifiers:
-                    out_file.write(line_template.format(**identifiers))
-                for f in inpDir_files:
-                    file_dict = copy.deepcopy(line_dict)
-                    if stripExtension:
-                        file_dict['file'] = str(Path(f).name).replace('.csv','')
-                    else:
-                        file_dict['file'] = str(Path(f).name)
-                    logger.info("Merging file: {}".format(file_dict['file']))
-
-                    with open(f,'r') as in_file:
-                        file_map = in_file.readline().rstrip('\n')
-                        file_map = file_map.split(',')
-                        numel = len(file_map)
-                        file_map = {ind:key for ind,key in enumerate(file_map)}
-                        
-                        # Check to see if column identifiers are present in the file, skip 2nd row if they are present
-                        if identifiers:
-                            line = in_file.readline()
-                            ident = line.rstrip('\n').split(',')
-                            no_identifier = sum(1 for f in ident if f not in 'FC')
-                            in_file.seek(0)
-                            in_file.readline()
-                            if not no_identifier:
-                                in_file.readline()
-
-                        for line in in_file:
-                            for el,val in enumerate(line.rstrip('\n').split(',')):
-                                file_dict[file_map[el]] = val
-                            out_file.write(line_template.format(**file_dict))
-                            
-            
-            # Write Merged file
-            if FILE_EXT == '.feather':
-                logger.info("Merging the data along rows for feather file")
-                temp_df = pd.read_csv(outPath)
-                df = vaex.from_pandas(temp_df)
-                os.chdir(outDir)
-                df.export(outPath)
-            
-        # Case Three: Merger along columns only
-        elif dim=='columns':
-            logger.info("Merging the data along columns...")
-            outPath = str(Path(outDir).joinpath('merged.csv').absolute())
-            
-            # Load the first csv and generate a dictionary to hold all values
-            out_dict = {}
-            with open(inpDir_files[0],'r') as in_file:
-                file_map = in_file.readline().rstrip('\n')
-                file_map = file_map.split(',')
-                numel = len(file_map)
-                file_map = {ind:key for ind,key in zip(range(numel),file_map)}
-                
-                for line in in_file:
-                    file_dict = copy.deepcopy(line_dict)
-                    for el,val in enumerate(line.rstrip('\n').split(',')):
-                        file_dict[file_map[el]] = val
-                    if file_dict['file'] in out_dict.keys():
-                        UserWarning('Skipping row for file since it is already in the output file dictionary: {}'.format(file_dict['file']))
-                    else:
-                        out_dict[file_dict['file']] = file_dict
-                        
-            # Loop through the remaining files and update the output dictionary
-            for f in inpDir_files[1:]:
-                
-                with open(f,'r') as in_file:
-                    file_dict = copy.deepcopy(line_dict)
-                    file_map = in_file.readline().rstrip('\n')
-                    file_map = file_map.split(',')
-                    file_ind = [i for i,v in enumerate(file_map) if v == 'file'][0]
-                    numel = len(file_map)
-                    file_map = {ind:key for ind,key in zip(range(numel),file_map)}
-                    
-                    for line in in_file:
-                        file_dict = copy.deepcopy(line_dict)
-                        line_vals = line.rstrip('\n').split(',')
-                        for el,val in enumerate(line_vals):
-                            if line_vals[file_ind] not in out_dict.keys():
-                                out_dict[line_vals[file_ind]] = copy.deepcopy(line_dict)
-                                out_dict[line_vals[file_ind]]['file'] = line_vals[file_ind]
-                            if el == file_ind:
-                                continue
-                            if out_dict[line_vals[file_ind]][file_map[el]] != 'NaN':
-                                Warning('Skipping duplicate value ({}) found in {}'.format(val,in_file))
-                            else:
-                                out_dict[line_vals[file_ind]][file_map[el]] = val
-                                
-            # Write the output file using ENV Variable
-            with open(outPath,'w') as out_file:
-            # Write headers
-                out_file.write(','.join(headers) + '\n')
-            
-                for val in out_dict.values():
-                    out_file.write(line_template.format(**val))
-                    
-            # Write Merged Feather by reading lines into dataframe
-            if FILE_EXT == '.feather':
-                df = pd.DataFrame.from_dict(out_dict, orient='index')
-                vaex_df = vaex.from_pandas(df)
-                vaex_df.export(outPath)
\ No newline at end of file
diff --git a/transforms/tabular/polus-csv-merger-plugin/src/requirements.txt b/transforms/tabular/polus-csv-merger-plugin/src/requirements.txt
deleted file mode 100644
index 93c1cadaf..000000000
--- a/transforms/tabular/polus-csv-merger-plugin/src/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-filepattern==1.4.7
-vaex==4.7.0
-blake3==0.2.1
\ No newline at end of file
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/Dockerfile b/transforms/tabular/polus-generalized-linear-model-plugin/Dockerfile
deleted file mode 100644
index 380ca7b12..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM r-base:4.0.3
-
-	
-ARG EXEC_DIR="/opt/executables"
-ARG DATA_DIR="/data"
-
-RUN apt-get update && apt-get install -y python3
-
-#Create folders
-RUN mkdir -p /{EXEC_DIR} \
-    && mkdir -p /{DATA_DIR}/csv \
-    && mkdir -p /{DATA_DIR}/outputs
-
-#Copy executable
-COPY src /{EXEC_DIR}/
-
-RUN Rscript /{EXEC_DIR}/Requirements.R
-WORKDIR /{EXEC_DIR}
-
-ENTRYPOINT ["Rscript", "/opt/executables/main.R"]
\ No newline at end of file
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/README.md b/transforms/tabular/polus-generalized-linear-model-plugin/README.md
deleted file mode 100644
index 24976017b..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-﻿# Generalized Linear Model
-
-The generalized linear model plugin models the data using bigglm() function in R and outputs csv file containing the summary of the analysis.The input file should be in csv format.
-
-## Inputs:
-### Input csv collection:
-The input file that needs to be modeled. The file should be in csv format. This is a required parameter for the plugin.
-
-### Predict column:
-Enter the column name that needs to be predicted.
-
-### Exclude:
-Enter column names that needs to be excluded from the analysis.
-
-### Methods:
-There are 3 options:
-1. Primary factors - If only the relationship between primary factors and the column to be predicted is required, then choose this option.
-2. Interaction - To find how interaction of variables affect the response, choose interactions for two-way interaction.
-3. Second order - To find second order effect use this option. Degree is 2.
-
-### Modeltype:
-Choose any one of the modeltype based on list of options- Binomial, Gaussian, Gamma, Poisson, Quasi, Quasibinomial, Quasipoisson, NegativeBinomial, Multinomial based on the distribution of dataset.
-      
-## Note:
-If multiple csv files needs to be processed, the column names should be the same for all files.
-
-## Output:
-The output is a csv file containing the summary of the analysis with p-value and regression value for the factors..
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes eight input argument and one output argument:
-
-| Name                   | Description             | I/O    | Type   |
-|------------------------|-------------------------|--------|--------|
-| `--inpdir` | Input csv collection| Input | csvCollection |
-| `--predictcolumn` | Column needs to be predicted | Input | string |
-| `--exclude` | Enter columns to be excluded| Input | string |
-| `--glmmethod` | Analyse either primaryfactors or interaction or second order effects for modeling | Input | enum |
-| `--modeltype` | Select the distribution to be considered for modeling| Input | enum |
-| `--outdir` | Output collection | Output | csvCollection |
-
-
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/VERSION b/transforms/tabular/polus-generalized-linear-model-plugin/VERSION
deleted file mode 100644
index 3a4036fb4..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.2.5
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/build-docker.sh b/transforms/tabular/polus-generalized-linear-model-plugin/build-docker.sh
deleted file mode 100644
index 836a38c2e..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/generalized-linear-model-plugin:${version}
\ No newline at end of file
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/generalizedlinearmodel.cwl b/transforms/tabular/polus-generalized-linear-model-plugin/generalizedlinearmodel.cwl
deleted file mode 100644
index b287c550d..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/generalizedlinearmodel.cwl
+++ /dev/null
@@ -1,32 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  exclude:
-    inputBinding:
-      prefix: --exclude
-    type: string?
-  glmmethod:
-    inputBinding:
-      prefix: --glmmethod
-    type: string
-  inpdir:
-    inputBinding:
-      prefix: --inpdir
-    type: Directory
-  modeltype:
-    inputBinding:
-      prefix: --modeltype
-    type: string
-  outdir:
-    inputBinding:
-      prefix: --outdir
-    type: Directory
-  predictcolumn:
-    inputBinding:
-      prefix: --predictcolumn
-    type: string
-outputs:
-  outdir: !!python/name:builtins.NotImplementedError ''
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/generalized-linear-model-plugin:0.2.5
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/ict.yaml b/transforms/tabular/polus-generalized-linear-model-plugin/ict.yaml
deleted file mode 100644
index 1a6437102..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/ict.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-author:
-- Jayapriya Nagarajan
-contact: jayapriya.nagarajan@nih.gov
-container: polusai/generalized-linear-model-plugin:0.2.5
-description: Modeling the data using Generalized linear model.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input csv file collection
-  format:
-  - csvCollection
-  name: inpdir
-  required: true
-  type: path
-- description: Enter the column to be predicted
-  format:
-  - string
-  name: predictcolumn
-  required: true
-  type: string
-- description: Columns to be excluded from the dataset
-  format:
-  - string
-  name: exclude
-  required: false
-  type: string
-- description: Select either primary or interaction or second order effects for modeling
-  format:
-  - enum
-  name: glmmethod
-  required: true
-  type: string
-- description: Select the family to be considered for modeling
-  format:
-  - enum
-  name: modeltype
-  required: true
-  type: string
-name: polusai/GeneralizedLinearModel
-outputs:
-- description: Output collection
-  format:
-  - csvCollection
-  name: outdir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: Generalized Linear Model
-ui:
-- description: Input csv file for modeling
-  key: inputs.inpdir
-  title: Input csv file
-  type: path
-- description: Enter the column name that needs to be predicted
-  key: inputs.predictcolumn
-  title: Column to be predicted
-  type: text
-- description: Enter columns that need to be excluded
-  key: inputs.exclude
-  title: Columns to be removed
-  type: text
-- description: Analyse either primaryfactors or interaction or second order effects
-    for modeling
-  fields:
-  - PrimaryFactors
-  - Interaction
-  - SecondOrder
-  key: inputs.glmmethod
-  title: 'Select method type for modeling '
-  type: select
-- description: Select either binomial or gaussian or Gamma or poisson or quasi or
-    quasibinomial or quasipoisson or negativebinomial or multinomial
-  fields:
-  - Binomial
-  - Gaussian
-  - Gamma
-  - Poisson
-  - NegativeBinomial
-  - Quasi
-  - Quasibinomial
-  - Quasipoisson
-  - Multinomial
-  key: inputs.modeltype
-  title: Select the distribution to be considered for modeling
-  type: select
-version: 0.2.5
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/plugin.json b/transforms/tabular/polus-generalized-linear-model-plugin/plugin.json
deleted file mode 100644
index 81ff1a83e..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/plugin.json
+++ /dev/null
@@ -1,98 +0,0 @@
-{
-  "name": "Generalized Linear Model",
-  "version": "0.2.5",
-  "title": "Generalized Linear Model",
-  "description": "Modeling the data using Generalized linear model.",
-  "author": "Jayapriya Nagarajan (jayapriya.nagarajan@nih.gov)",
-  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/generalized-linear-model-plugin:0.2.5",
-  "inputs": [
-    {
-      "name": "inpdir",
-      "type": "csvCollection",
-      "description": "Input csv file collection",
-      "required": "true"
-    },
-    {
-      "name": "predictcolumn",
-      "type": "string",
-      "description": "Enter the column to be predicted",
-      "required": "true"
-    },
-    {
-      "name": "exclude",
-      "type": "string",
-      "description": "Columns to be excluded from the dataset",
-      "required": "false"
-    },
-    {
-      "name": "glmmethod",
-      "type": "enum",
-      "description": "Select either primary or interaction or second order effects for modeling",
-      "options": {
-        "values": [
-          "PrimaryFactors",
-          "Interaction",
-          "SecondOrder"
-        ]
-      },
-      "required": "true"
-    },
-    {
-      "name": "modeltype",
-      "type": "enum",
-      "description": "Select the family to be considered for modeling",
-      "options": {
-        "values": [
-          "Binomial",
-          "Gaussian",
-          "Gamma",
-          "Poisson",
-          "NegativeBinomial",
-          "Quasi",
-          "Quasibinomial",
-          "Quasipoisson",
-          "Multinomial"
-        ]
-      },
-      "required": "true"
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outdir",
-      "type": "csvCollection",
-      "description": "Output collection"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpdir",
-      "title": "Input csv file",
-      "description": "Input csv file for modeling"
-    },
-    {
-      "key": "inputs.predictcolumn",
-      "title": "Column to be predicted",
-      "description": "Enter the column name that needs to be predicted"
-    },
-    {
-      "key": "inputs.exclude",
-      "title": "Columns to be removed",
-      "description": "Enter columns that need to be excluded"
-    },
-    {
-      "key": "inputs.glmmethod",
-      "title": "Select method type for modeling ",
-      "description": "Analyse either primaryfactors or interaction or second order effects for modeling"
-    },
-    {
-      "key": "inputs.modeltype",
-      "title": "Select the distribution to be considered for modeling",
-      "description": "Select either binomial or gaussian or Gamma or poisson or quasi or quasibinomial or quasipoisson or negativebinomial or multinomial"
-    }
-  ]
-}
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/src/Requirements.R b/transforms/tabular/polus-generalized-linear-model-plugin/src/Requirements.R
deleted file mode 100644
index e543ea1c0..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/src/Requirements.R
+++ /dev/null
@@ -1,9 +0,0 @@
-install.packages('argparse')
-install.packages('DescTools')
-install.packages('tidymodels')
-install.packages('logging')
-install.packages('biglm')
-install.packages('parallel')
-install.packages('corrplot')
-install.packages('ff')
-install.packages('nnet')
\ No newline at end of file
diff --git a/transforms/tabular/polus-generalized-linear-model-plugin/src/main.R b/transforms/tabular/polus-generalized-linear-model-plugin/src/main.R
deleted file mode 100644
index ad114abcc..000000000
--- a/transforms/tabular/polus-generalized-linear-model-plugin/src/main.R
+++ /dev/null
@@ -1,237 +0,0 @@
-suppressWarnings(library("argparse"))
-suppressWarnings(library("DescTools"))
-suppressWarnings(library("logging"))
-suppressWarnings(library("broom"))
-suppressWarnings(library("biglm"))
-suppressWarnings(library("corrplot"))
-suppressWarnings(library("parallel"))
-suppressWarnings(library("ff"))
-suppressWarnings(library("nnet"))
-suppressWarnings(library("MASS"))
-
-
-# Initialize the logger
-basicConfig()
-
-# Setup the argument parsing
-addRequiredArgs <- function(parser) {
-  parser$add_argument("--inpdir",type = "character",help="Input csv file")
-  invisible(NULL)
-}
-addPredictArgs <- function(parser) {
-  parser$add_argument("--predictcolumn",type = "character",help="Column to predict")
-  invisible(NULL)
-}
-addExcludeArgs <- function(parser) {
-  parser$add_argument("--exclude",type = "character",help="Column to exclude from the analysis")
-  invisible(NULL)
-}
-addMethodArgs <- function(parser) {
-  parser$add_argument("--glmmethod",type = "character",help="Analyse only primary factors or interactions or second order effects")
-  invisible(NULL)
-}
-addModelArgs <- function(parser) {
-  parser$add_argument("--modeltype",type = "character",help="Select either binomial or gaussian or Gamma or poisson or quasi or quasibinomial or quasipoisson or negativebinomial")
-  invisible(NULL)
-}
-addOutputArgs <- function(parser) {
-  parser$add_argument("--outdir", type = "character",help="Output csv file")
-  invisible(NULL)
-}
-getAllParser <- function() {
-  parser <- ArgumentParser(description="ALL PARSER")
-  addRequiredArgs(parser)
-  addPredictArgs(parser)
-  addExcludeArgs(parser)
-  addMethodArgs(parser)
-  addModelArgs(parser)
-  addOutputArgs(parser)
-  return(parser)
-}
-
-# Parse the arguments
-parser <- getAllParser()
-args <- parser$parse_args()
-
-#Path to csvfile directory
-inpfile <- args$inpdir
-loginfo('inpfile = %s', inpfile)
-
-#Column to be predicted
-predictcolumn <- args$predictcolumn
-loginfo('predictcolumn = %s', predictcolumn)
-
-#Columns to be excluded
-exclude_col <- args$exclude
-excludes<-as.list(strsplit(exclude_col, ",")[[1]])
-loginfo('exclude = %s', excludes)
-
-#Select glmmethod-primary or interaction or secondorder
-glmmethod <- args$glmmethod
-loginfo('glmmethod = %s', glmmethod)
-
-#Select modeltype based on distribution of data
-modeltype <- args$modeltype
-loginfo('modeltype = %s', modeltype)
-
-#Path to save output csvfiles
-csvfile <- args$outdir
-loginfo('csvfile = %s', csvfile)
-
-#Get list of .csv files in the directory including sub folders for modeling
-files_to_read = list.files(
-  path = inpfile,        
-  pattern = ".*csv", 
-  recursive = TRUE,          
-  full.names = TRUE
-)
-
-#Read the csv files
-datalist = lapply(files_to_read,read.csv)
-
-for (dataset in datalist) {
-  for (file_csv in files_to_read) {
-    #Get filename
-    file_name <- SplitPath(file_csv)$filename
-    
-    #Check whether any column needs to be excluded
-    if(length(excludes) > 0) {
-      for (i in 1:length(excludes)) {
-        if(!excludes[i] %in% colnames(dataset)) {
-          logwarn('column to exclude from %s is not found',file_name)
-        }
-      }
-      datasub <-dataset[ , !(names(dataset) %in% excludes)]
-    }
-    else if(length(excludes) == 0) {
-      datasub <-dataset
-    }
-    # Remove columns with all values as zero
-    datasub <- datasub[colSums(datasub) > 0]
-    
-    #Check whether predict column is present in dataframe
-    if(!(predictcolumn %in% colnames(datasub))) {
-      logwarn('predict column name is not found in %s',file_name)
-      next
-    }
-    
-    #Get column names without predict variable
-    drop_dep <- datasub[ , !(names(datasub) %in% predictcolumn)]
-    resp_var <- colnames(drop_dep)
-  
-    #Number of cores
-    num_of_cores = detectCores()
-    loginfo('Cores = %s', num_of_cores)
-    
-    #Chunk Size
-    chunk <- floor((nrow(datasub)/ncol(datasub))*num_of_cores)
-    
-    #Function to determine chunks
-    make.data<-function(formula,data,chunksize,...){
-      n<-nrow(data)
-      cursor<-0
-      datafun<-function(reset=FALSE){
-        if (reset){
-          cursor<<-0
-          return(NULL)
-        }
-        if (cursor>=n)
-          return(NULL)
-        start<-cursor+1
-        cursor<<-cursor+min(chunksize, n-cursor)
-        data[start:cursor,]
-      }
-    }
-    
-    #Convert to ffdf object
-    datasub_ff = as.ffdf(datasub)
-    
-    #Chunk data
-    chunk_data <-make.data(formula(paste(predictcolumn,paste(resp_var,collapse= "+"),sep="~")), datasub_ff, chunksize=chunk)
-
-    if((modeltype == 'Gaussian') || (modeltype == 'Poisson') || (modeltype == 'Binomial') || (modeltype == 'Quasibinomial') || (modeltype == 'Quasipoisson') || (modeltype == 'Quasi')) {
-      modeltype <- tolower(modeltype)
-    }
-    
-    if (modeltype == 'NegativeBinomial') {
-      fit <- glm.nb(as.formula(paste(predictcolumn,1,sep="~")), data = datasub)
-      mu <- exp(coef(fit))
-      val_pred<-eval(parse(text=paste('datasub',predictcolumn, sep = "$")))
-      theta_val = theta.ml(val_pred, mu,nrow(datasub), limit = 22, eps = .Machine$double.eps^0.25, trace = FALSE)
-    }
-    
-    model_list <- c('gaussian','Gamma', 'binomial', 'poisson', 'quasi', 'quasibinomial', 'quasipoisson' )
-    
-    model_data <- function(pred_var, data_model) {
-      if((modeltype %in% model_list)) {
-        reg_model <- bigglm(formula(paste(predictcolumn,paste(pred_var,collapse= "+"),sep="~")), data = data_model, family = eval(parse(text=paste(modeltype,"()", sep = ""))), chunksize = chunk)
-      }
-      else if(modeltype == 'NegativeBinomial') {
-        reg_model <- bigglm(formula(paste(predictcolumn,paste(pred_var,collapse= "+"),sep="~")), data = data_model, family = negative.binomial(theta= theta_val), chunksize=chunk)
-      }
-      else if(modeltype == 'Multinomial') {
-        reg_model <- multinom(formula(paste(paste("as.factor(",predictcolumn,")"),paste(pred_var,collapse= "+"),sep="~")), data = data_model, maxit=10, MaxNWts = 10000)
-      }
-      return(reg_model)
-    }
-    
-    #Model data based on the options selected
-    #Get only main effects of the variables
-    if (glmmethod == 'PrimaryFactors') {
-      if (modeltype != 'Multinomial') {
-        test_glm<- model_data(resp_var,chunk_data)
-      }
-      else if (modeltype == 'Multinomial') {
-        test_glm<- model_data(resp_var,datasub_ff)
-      }
-    }
-    #Get interaction values
-    else if (glmmethod == 'Interaction') {
-      datasub_pred <- datasub[ , !(names(datasub) %in% predictcolumn)]
-      #Get correlation between variables
-      tmp <- cor(datasub_pred)
-      tmp[upper.tri(tmp)] <- 0
-      diag(tmp) <- 0
-      
-      #Remove variables with no interaction
-      data_no_int <- which(tmp >= 0.1 | tmp < -0.1, arr.ind = TRUE)
-      data_frame<-data.frame(row = rownames(data_no_int), col = colnames(tmp)[data_no_int[, "col"]],
-                             value = tmp[tmp >= 0.1 | tmp < -0.1])
-      colnames(data_frame)<- c("variable1","variable2","coef")
-      
-      #Interaction variables
-      data_frame$variableint <- paste(data_frame$variable1, data_frame$variable2, sep="*")
-      data_list <- as.character(data_frame$variableint)
-      if (modeltype != 'Multinomial') {
-        test_glm<- model_data(data_list,chunk_data)
-      }
-      else if (modeltype == 'Multinomial') {
-        test_glm<- model_data(data_list, datasub_ff)
-      }
-    }
-    #Get second order polynomial values
-    else if (glmmethod == 'SecondOrder') {
-      var_resp <- paste('poly(',resp_var,',2)')
-      if (modeltype != 'Multinomial') {
-        test_glm<- model_data(var_resp,chunk_data)
-      }
-      else if (modeltype == 'Multinomial') {
-        test_glm<- model_data(var_resp,datasub_ff)
-      }
-    }
-    
-    #Set output directory
-    setwd(csvfile)
-    file_save <- paste0(file_name,".csv")
-    
-    #Convert summary of the analysis to a dataframe
-    tidy_summary <- tidy(test_glm)
-    
-    #Reorder the columns
-    tidy_final <- tidy_summary[c("term", "p.value", "estimate","std.error")]
-    colnames(tidy_final) <- c("Factors","P-Value","Estimate","Std.Error")
-    
-    #Write the dataframe to csv file
-    write.csv(tidy_final, file_save)
-  }
-}
\ No newline at end of file
diff --git a/utils/polus-csv-collection-merger/Dockerfile b/utils/polus-csv-collection-merger/Dockerfile
deleted file mode 100644
index 9137b8539..000000000
--- a/utils/polus-csv-collection-merger/Dockerfile
+++ /dev/null
@@ -1,4 +0,0 @@
-FROM alpine
-COPY VERSION /
-COPY script.sh script.sh
-ENTRYPOINT ["sh", "script.sh"]
\ No newline at end of file
diff --git a/utils/polus-csv-collection-merger/README.md b/utils/polus-csv-collection-merger/README.md
deleted file mode 100644
index ea885e526..000000000
--- a/utils/polus-csv-collection-merger/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Polus CSV Collection Merger Plugin
-
-This plugin helps to merge multiple CSV Collections in WIPP into one collection for later analysis.
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-**This plugin is in development and is subject for change**
-
-## Options
-
-This plugin takes four input parameters and one output parameter:
-
-| Name                 | Description                                    | I/O    | WIPP Type     |
-|----------------------|------------------------------------------------|--------|---------------|
-| `input-collection-a` | Input CSV collection A                         | Input  | csvCollection |
-| `input-collection-b` | Input CSV collection B                         | Input  | csvCollection |
-| `append-a`           | Option to append collection ID to files from A | Input  | boolean       |
-| `append-b`           | Option to append collection ID to files from B | Input  | boolean       |
-| `output`             | Output CSV collection                          | Output | csvCollection |
-
-## Build the plugin
-
-```bash
-docker build . -t labshare/polus-csv-collection-merger:0.1.1
-```
-
-
-## Run the plugin
-
-### Manually
-
-To test, create 3 folders: `<COLLECTION A>` and `<COLLECTION B>` should contain csv collections you would like to merge. `<COLLECTION C>` is the target folder which will contain the merged files.
-
-Run the docker container 
-```bash
-docker run -v <COLLECTION A>:/a \
-           -v <COLLECTION B>:/b \
-           -v <COLLECTION C>:/c \
-           labshare/polus-csv-collection-merger:0.1.1 \
-           --input-collection-a /a \
-           --input-collection-b /b \
-           --append-a 'true' \
-           --append-b 'true' \
-           --output /c
-```
\ No newline at end of file
diff --git a/utils/polus-csv-collection-merger/VERSION b/utils/polus-csv-collection-merger/VERSION
deleted file mode 100644
index 6da28dde7..000000000
--- a/utils/polus-csv-collection-merger/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.1.1
\ No newline at end of file
diff --git a/utils/polus-csv-collection-merger/csvcollectionsmerger.cwl b/utils/polus-csv-collection-merger/csvcollectionsmerger.cwl
deleted file mode 100644
index fb0684d11..000000000
--- a/utils/polus-csv-collection-merger/csvcollectionsmerger.cwl
+++ /dev/null
@@ -1,28 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  append-a:
-    inputBinding:
-      prefix: --append-a
-    type: boolean?
-  append-b:
-    inputBinding:
-      prefix: --append-b
-    type: boolean?
-  input-collection-a:
-    inputBinding:
-      prefix: --input-collection-a
-    type: Directory
-  input-collection-b:
-    inputBinding:
-      prefix: --input-collection-b
-    type: Directory
-  output:
-    inputBinding:
-      prefix: --output
-    type: Directory
-outputs:
-  output: !!python/name:builtins.NotImplementedError ''
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/csv-collection-merger:0.1.2
diff --git a/utils/polus-csv-collection-merger/ict.yaml b/utils/polus-csv-collection-merger/ict.yaml
deleted file mode 100644
index 308334ce3..000000000
--- a/utils/polus-csv-collection-merger/ict.yaml
+++ /dev/null
@@ -1,61 +0,0 @@
-author:
-- Konstantin taletskiy
-contact: konstantin.taletskiy@labshare.org
-container: polusai/csv-collection-merger:0.1.2
-description: Merge two csv collections. You have an option to prepend collection name
-  to avoid name conflicts.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input csv collection A.
-  format:
-  - csvCollection
-  name: input-collection-a
-  required: true
-  type: path
-- description: Append collection name to collection A.
-  format:
-  - boolean
-  name: append-a
-  required: false
-  type: boolean
-- description: Input csv collection B.
-  format:
-  - csvCollection
-  name: input-collection-b
-  required: true
-  type: path
-- description: Append collection name to collection B.
-  format:
-  - boolean
-  name: append-b
-  required: false
-  type: boolean
-name: polusai/CSVcollectionsmerger
-outputs:
-- description: Output csv collection for the plugin
-  format:
-  - csvCollection
-  name: output
-  required: true
-  type: path
-repository: https://github.com/polusai/image-tools
-specVersion: 1.0.0
-title: CSV collections merger
-ui:
-- description: Pick a collection...
-  key: inputs.input-collection-a
-  title: 'CSV Collection A: '
-  type: path
-- description: Pick an option...
-  key: inputs.append-a
-  title: 'Append collection name to filenames in A: '
-  type: checkbox
-- description: Pick a collection...
-  key: inputs.input-collection-b
-  title: 'CSV Collection B: '
-  type: path
-- description: Pick an option...
-  key: inputs.append-b
-  title: 'Append collection name to filenames in B: '
-  type: checkbox
-version: 0.1.2
diff --git a/utils/polus-csv-collection-merger/plugin.json b/utils/polus-csv-collection-merger/plugin.json
deleted file mode 100644
index d777c0c06..000000000
--- a/utils/polus-csv-collection-merger/plugin.json
+++ /dev/null
@@ -1,61 +0,0 @@
-{
-  "name": "CSV collections merger",
-  "version": "0.1.2",
-  "title": "CSV collections merger",
-  "description": "Merge two csv collections. You have an option to prepend collection name to avoid name conflicts.",
-  "author": "Konstantin taletskiy (konstantin.taletskiy@labshare.org)",
-  "containerId": "polusai/csv-collection-merger:0.1.2",
-  "inputs": [
-    {
-      "name": "input-collection-a",
-      "type": "csvCollection",
-      "description": "Input csv collection A."
-    },
-    {
-      "name": "append-a",
-      "type": "boolean",
-      "required": "false",
-      "description": "Append collection name to collection A."
-    },
-    {
-      "name": "input-collection-b",
-      "type": "csvCollection",
-      "description": "Input csv collection B."
-    },
-    {
-      "name": "append-b",
-      "type": "boolean",
-      "required": "false",
-      "description": "Append collection name to collection B."
-    }
-  ],
-  "outputs": [
-    {
-      "name": "output",
-      "type": "csvCollection",
-      "description": "Output csv collection for the plugin"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.input-collection-a",
-      "title": "CSV Collection A: ",
-      "description": "Pick a collection..."
-    },
-    {
-      "key": "inputs.append-a",
-      "title": "Append collection name to filenames in A: ",
-      "description": "Pick an option..."
-    },
-    {
-      "key": "inputs.input-collection-b",
-      "title": "CSV Collection B: ",
-      "description": "Pick a collection..."
-    },
-    {
-      "key": "inputs.append-b",
-      "title": "Append collection name to filenames in B: ",
-      "description": "Pick an option..."
-    }
-  ]
-}
\ No newline at end of file
diff --git a/utils/polus-csv-collection-merger/script.sh b/utils/polus-csv-collection-merger/script.sh
deleted file mode 100644
index 646306ddd..000000000
--- a/utils/polus-csv-collection-merger/script.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/sh
-
-while [[ $# -gt 0 ]]
-do
-key="$1"
-
-case $key in
-    --input-collection-a)
-    INPUT_A="$2"
-    shift # past argument
-    shift # past value
-    ;;
-    --input-collection-b)
-    INPUT_B="$2"
-    shift # past argument
-    shift # past value
-    ;;
-    --append-a)
-    APPEND_A="$2"
-    shift # past argument
-    shift # past value
-    ;;
-    --append-b)
-    APPEND_B="$2"
-    shift # past argument
-    shift # past value
-    ;;
-    --output)
-    OUTPUT="$2"
-    shift # past argument
-    shift # past value
-    ;;
-esac
-done
-
-echo "INPUT COLLECTION A  = ${INPUT_A}"
-echo "INPUT COLLECTION B  = ${INPUT_B}"
-echo "APPEND A  = ${APPEND_A}"
-echo "APPEND B  = ${APPEND_B}"
-echo "OUTPUT  = ${OUTPUT}"
-
-COLLECTION_A="$(basename $INPUT_A)"
-COLLECTION_B="$(basename $INPUT_B)"
-echo "      "
-
-echo "Copying files from collection A ($COLLECTION_A):"
-for f in $INPUT_A/*; do echo "$(basename $f)"; done
-if [ "$APPEND_A" = "true" ]; then
-    for f in $INPUT_A/*; do cp "$f" "$OUTPUT"/"$COLLECTION_A"_"$(basename $f)"; done
-else
-    for f in $INPUT_A/*; do cp "$f" "$OUTPUT"/"$(basename $f)"; done
-fi
-echo "      "
-
-echo "Copying files from collection B ($COLLECTION_B):"
-for f in $INPUT_B/*; do echo "$(basename $f)"; done
-if [ "$APPEND_B" = "true" ]; then
-    for f in $INPUT_B/*; do cp "$f" "$OUTPUT"/"$COLLECTION_B"_"$(basename $f)"; done
-else
-    for f in $INPUT_B/*; do cp "$f" "$OUTPUT"/"$(basename $f)"; done
-fi
\ No newline at end of file
diff --git a/visualization/polus-graph-pyramid-builder-plugin/Dockerfile b/visualization/polus-graph-pyramid-builder-plugin/Dockerfile
deleted file mode 100644
index d303a4f86..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-# Get image containing bfio
-FROM polusai/bfio:2.1.9
-
-COPY VERSION /
-		
-ARG EXEC_DIR="/opt/executables"
-ARG DATA_DIR="/data"
-
-#Create folders
-RUN mkdir -p ${EXEC_DIR} \
-    && mkdir -p ${DATA_DIR}/inputs \
-    && mkdir ${DATA_DIR}/outputs
-
-#Copy executable
-COPY src ${EXEC_DIR}/
-
-RUN pip3 install -r ${EXEC_DIR}/requirements.txt --no-cache-dir
-
-RUN python3 ${EXEC_DIR}/dl_fi.py
-
-WORKDIR ${EXEC_DIR}
-
-# Default command. Additional arguments are provided through the command line
-ENTRYPOINT ["python3", "/opt/executables/main.py"]
diff --git a/visualization/polus-graph-pyramid-builder-plugin/README.md b/visualization/polus-graph-pyramid-builder-plugin/README.md
deleted file mode 100644
index 4b9a1f17d..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Polus CZI Extraction Plugin
-
-This WIPP plugin will import a csv collection and build a DeepZoom pyramid of graphs, where each graph contains a heatmap of each column plotted against another column. All n-columns are plotted against each other, excluding tranposed graphs and graphs where each axis has the same column. This leads to a total of (n^2-n)/2 graphs.
-
-Two types of graphs will be produced: 
-1) Linear sclaed graphs
-2) Log scaled graphs
-
-  The output will contain dzi and csv files for both linear and log scaled outputs. 
-  There were will be two different directories that contain the pyramid images for the linear and log scaled outputs
-
-For more information on WIPP, visit the [official WIPP page](https://isg.nist.gov/deepzoomweb/software/wipp).
-
-## Building
-
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
-
-## Install WIPP Plugin
-
-If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
-
-## Options
-
-This plugin takes one input argument and one output argument:
-
-| Name     | Description            | I/O    | Type             |
-| -------- | ---------------------- | ------ | ---------------- |
-| `inpDir` | Input CSV   collection | Input  | CSV   Collection |
-| `outDir` | Output pyramid         | Output | Pyramid          |
-
-## Run the plugin
-
-### Run the Docker Container
-
-```bash
-docker run -v /path/to/data:/data graph-pyramid-builder \
-  --inpDir /data/input \
-  --outDir /data/output
-```
diff --git a/visualization/polus-graph-pyramid-builder-plugin/VERSION b/visualization/polus-graph-pyramid-builder-plugin/VERSION
deleted file mode 100644
index e05cb3329..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-1.3.8
diff --git a/visualization/polus-graph-pyramid-builder-plugin/build-docker.sh b/visualization/polus-graph-pyramid-builder-plugin/build-docker.sh
deleted file mode 100755
index e96c75517..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/build-docker.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-
-version=$(<VERSION)
-docker build . -t polusai/graph-pyramid-builder-plugin:${version}
diff --git a/visualization/polus-graph-pyramid-builder-plugin/graphpyramidbuilding.cwl b/visualization/polus-graph-pyramid-builder-plugin/graphpyramidbuilding.cwl
deleted file mode 100644
index a4013e3a1..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/graphpyramidbuilding.cwl
+++ /dev/null
@@ -1,32 +0,0 @@
-class: CommandLineTool
-cwlVersion: v1.2
-inputs:
-  bincount:
-    inputBinding:
-      prefix: --bincount
-    type: string
-  inpDir:
-    inputBinding:
-      prefix: --inpDir
-    type: Directory
-  outDir:
-    inputBinding:
-      prefix: --outDir
-    type: Directory
-  scale:
-    inputBinding:
-      prefix: --scale
-    type: string?
-outputs:
-  outDir:
-    outputBinding:
-      glob: $(inputs.outDir.basename)
-    type: Directory
-requirements:
-  DockerRequirement:
-    dockerPull: polusai/graph-pyramid-builder-plugin:1.3.8
-  InitialWorkDirRequirement:
-    listing:
-    - entry: $(inputs.outDir)
-      writable: true
-  InlineJavascriptRequirement: {}
diff --git a/visualization/polus-graph-pyramid-builder-plugin/ict.yaml b/visualization/polus-graph-pyramid-builder-plugin/ict.yaml
deleted file mode 100644
index 5a34788a5..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/ict.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-author:
-- Madhuri Vihani
-- Nick Schaub
-contact: Madhuri.Vihani@nih.gov
-container: polusai/graph-pyramid-builder-plugin:1.3.8
-description: Generates heatmaps from the data in a csv and builds a DeepZoom pyramid
-  for visualization.
-entrypoint: '[python3, main.py]'
-inputs:
-- description: Input collection
-  format:
-  - csvCollection
-  name: inpDir
-  required: true
-  type: path
-- description: The number of bins in each graph
-  format:
-  - enum
-  name: bincount
-  required: true
-  type: string
-- description: The scale used to generate the graphs
-  format:
-  - enum
-  name: scale
-  required: false
-  type: string
-name: polusai/GraphPyramidBuilding
-outputs:
-- description: DeepZoom pyramid output
-  format:
-  - pyramid
-  name: outDir
-  required: true
-  type: path
-repository: https://github.com/labshare/polus-plugins
-specVersion: 1.0.0
-title: Graph Pyramid Building
-ui:
-- description: Collection name...
-  key: inputs.inpDir
-  title: 'Input CSV collection: '
-  type: path
-- description: The number of bins for each column of data, less than 255 and must
-    be even.
-  fields:
-  - '200'
-  - '150'
-  - '100'
-  - '50'
-  - '20'
-  key: inputs.bincount
-  title: Bincount
-  type: select
-- description: Create logarithmically scaled, linearly scaled, or both graphs
-  fields:
-  - linear
-  - log
-  - both
-  key: inputs.scale
-  title: Scale
-  type: select
-version: 1.3.8
diff --git a/visualization/polus-graph-pyramid-builder-plugin/plugin.json b/visualization/polus-graph-pyramid-builder-plugin/plugin.json
deleted file mode 100644
index c25cfba72..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/plugin.json
+++ /dev/null
@@ -1,72 +0,0 @@
-{
-  "name": "Graph Pyramid Building Plugin",
-  "version": "1.3.8",
-  "title": "Graph Pyramid Building",
-  "description": "Generates heatmaps from the data in a csv and builds a DeepZoom pyramid for visualization.",
-  "author": "Madhuri Vihani (Madhuri.Vihani@nih.gov), Nick Schaub (Nick.Schaub@nih.gov)",
-  "institution": "National Center for the Advancing Translational Sciences, National Institutes of Health",
-  "repository": "https://github.com/labshare/polus-plugins",
-  "website": "https://ncats.nih.gov/preclinical/core/informatics",
-  "citation": "",
-  "containerId": "polusai/graph-pyramid-builder-plugin:1.3.8",
-  "inputs": [
-    {
-      "name": "inpDir",
-      "type": "csvCollection",
-      "description": "Input collection",
-      "required": true
-    },
-    {
-      "name": "bincount",
-      "type": "enum",
-      "options": {
-        "values": [
-          "200",
-          "150",
-          "100",
-          "50",
-          "20"
-        ]
-      },
-      "description": "The number of bins in each graph",
-      "required": true
-    },
-    {
-      "name": "scale",
-      "type": "enum",
-      "options": {
-        "values": [
-          "linear",
-          "log",
-          "both"
-        ]
-      },
-      "description": "The scale used to generate the graphs",
-      "required": false
-    }
-  ],
-  "outputs": [
-    {
-      "name": "outDir",
-      "type": "pyramid",
-      "description": "DeepZoom pyramid output"
-    }
-  ],
-  "ui": [
-    {
-      "key": "inputs.inpDir",
-      "title": "Input CSV collection: ",
-      "description": "Collection name..."
-    },
-    {
-      "key": "inputs.bincount",
-      "title": "Bincount",
-      "description": "The number of bins for each column of data, less than 255 and must be even."
-    },
-    {
-      "key": "inputs.scale",
-      "title": "Scale",
-      "description": "Create logarithmically scaled, linearly scaled, or both graphs"
-    }
-  ]
-}
diff --git a/visualization/polus-graph-pyramid-builder-plugin/src/dl_fi.py b/visualization/polus-graph-pyramid-builder-plugin/src/dl_fi.py
deleted file mode 100644
index 5e79e147c..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/src/dl_fi.py
+++ /dev/null
@@ -1,2 +0,0 @@
-import imageio
-imageio.plugins.freeimage.download()
\ No newline at end of file
diff --git a/visualization/polus-graph-pyramid-builder-plugin/src/main.py b/visualization/polus-graph-pyramid-builder-plugin/src/main.py
deleted file mode 100644
index 23f70d759..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/src/main.py
+++ /dev/null
@@ -1,806 +0,0 @@
-import pandas, multiprocessing, argparse, logging, matplotlib, copy, imageio
-from pathlib import Path
-from multiprocessing import Pool
-
-from matplotlib.colors import ListedColormap
-import matplotlib.pyplot as plt
-matplotlib.use('agg')
-
-import math
-import numpy as np
-import decimal
-from decimal import Decimal
-
-from textwrap import wrap
-
-# Chunk Scale
-CHUNK_SIZE = 1024
-
-# DZI file template
-DZI = '<?xml version="1.0" encoding="utf-8"?><Image TileSize="' + str(CHUNK_SIZE) + '" Overlap="0" Format="png" xmlns="http://schemas.microsoft.com/deepzoom/2008"><Size Width="{}" Height="{}"/></Image>'
-
-# Initialize the logger    
-logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-                    datefmt='%d-%b-%y %H:%M:%S')
-logger = logging.getLogger("main")
-logger.setLevel(logging.INFO)
-
-def is_number(value):
-    """ This function checks to see if the value can be converted to a number """
-    try:
-        float(value)
-        return True
-    except:
-        return False
-
-def load_csv(fpath):
-    """ Load a csv and select data
-    
-    Data is loaded from a csv, and data columns containing numeric values are 
-    returned in a pandas Dataframe. The second row of the csv may contain
-    column classifiers, so the second row is first loaded and checked to
-    determine if the classifiers are present.
-    Inputs:
-        fpath - Path to csv file
-    Outputs:
-        data - A pandas Dataframe
-        cnames - Names of columns
-    """
-
-    # Check if the first row is column coding, and if it is then find valid columns
-    data = pandas.read_csv(fpath,nrows=1)
-    is_coded = True
-    cnames = []
-    for ind,fname in zip(range(len(data.columns)),data.columns):
-        if data[fname][0] != 'F' and data[fname][0] != 'C':
-            is_coded = False
-            if is_number(data[fname][0]):
-                cnames.append([fname,ind])
-            else:
-                logging.info('Column {} does not appear to contain numeric values. Not building graphs for this column.'.format(fname))
-        elif data[fname][0] == 'F':
-            cnames.append([fname,ind])
-        else:
-            logging.info('Skipping column {} for reason: one hot encodings'.format(fname))
-    
-    # Load the data
-    if is_coded:
-        data = pandas.read_csv(fpath,skiprows=[1],usecols=[c[0] for c in cnames])
-
-    else:
-        data = pandas.read_csv(fpath,usecols=[c[0] for c in cnames])
-
-    return data, cnames
-
-def bin_data(data, bin_stats):
-    """ This function bins the data 
-    Inputs:
-        data - pandas dataframe of data
-        bin_stats - stats of the data 
-    Outputs:
-        bins - binned data ranging from (0, bincount)
-        graph_index - Numeric value of column index from original csv
-        graph_dict - a dictionary containing the indexes of graphs
-    """
-
-    column_names = data.columns
-    nfeats = data.shape[1] 
-    nrows = data.shape[0]
-
-    # Handle NaN values
-    data_ind = pandas.notnull(data)
-    data[~data_ind] = 255
-    
-    data = data.astype(np.uint16)       # cast to save memory
-    data[data>=bincount] = bincount - 1 # in case of numerical precision issues
-
-    
-    if nrows < 2**8:
-        dtype = np.uint8
-    elif nrows < 2**16:
-        dtype = np.uint16
-    elif nrows < 2**32:
-        dtype = np.uint32
-    else:
-        dtype = np.uint64
-        
-    totalgraphs = int((nfeats**2 - nfeats)/2)
-    bins = np.zeros((totalgraphs, bincount, bincount), dtype=dtype)
-    graph_index = []
-    graph_dict = {}
-
-    # Create a linear index for feature bins
-    i = 0
-    for feat1 in range(nfeats):
-        name1 = column_names[feat1]
-        feat1_tf = data[name1] * bincount
-        
-        for feat2 in range(feat1 + 1, nfeats):
-            graph_dict[(feat1, feat2)] = i
-            name2 = column_names[feat2]
-            
-            feat2_tf = data[name2]
-            feat2_tf = feat2_tf[data_ind[name1] & data_ind[name2]]
-                      
-            if feat2_tf.size<=1:
-                continue
-            
-            # sort linear matrix indices
-            SortedFeats = np.sort(feat1_tf[data_ind[name1] & data_ind[name2]] + feat2_tf)
-            
-            # Do math to get the indices
-            ind2 = np.nonzero(np.diff(SortedFeats))[0]                              # nonzeros are cumulative sum of all bin values
-            ind2 = np.append(ind2,SortedFeats.size-1)
-            rows = (SortedFeats[ind2]/bincount).astype(np.uint8)    # calculate row from linear index
-            cols = np.mod(SortedFeats[ind2],bincount)               # calculate column from linear index
-            counts = np.diff(ind2)                                  # calculate the number of values in each bin
-            
-            bins[i,rows[0],cols[0]] = ind2[0] + 1 
-            bins[i,rows[1:],cols[1:]] = counts 
-            graph_index.append([feat1,feat2])
-            i = i + 1
-
-    return bins, graph_index, graph_dict
-
-def transform_data(data,column_names, typegraph):
-    """ Bin the data
-    
-    Data from a pandas Dataframe is binned in two dimensions. Binning is performed by
-    binning data in one column along one axis and another column is binned along the
-    other axis. All combinations of columns are binned without repeats or transposition.
-    There are only bincount number of bins in each dimension, and each bin is 1/bincount the size of the
-    difference between the maximum and minimum of each column. 
-    If the data needs to be logarithmically scaled, then the data is transformed by the algorithm presented
-    in this paper: https://iopscience.iop.org/article/10.1088/0957-0233/24/2/027001
-    Inputs:
-        data - A pandas Dataframe, with nfeats number of columns
-        column_names - Names of Dataframe columns
-        typegraph - Defines whether logarithmic scale or linear scalef
-    Outputs:
-        bins - A numpy matrix that has shape (int((nfeats**2 - nfeats)/2),bincount,bincount)
-        bin_feats - A list containing the minimum and maximum values of each column
-        index - Numeric value of column index from original csv
-        diction - a dictionary containing the indexes of graphs
-    """
-
-    nfeats = len(column_names)
-
-    # If logarithmic, need to transform the data
-    # https://iopscience.iop.org/article/10.1088/0957-0233/24/2/027001
-    # Adjusts for behavior near zero
-
-    if typegraph == "log":
-        C = 1/np.log(10)# Derivative of Natural Log e, d(ln(x))/dx = 1/x
-        data = data.astype(np.float64)
-        data = np.sign(data) * np.log10(1 + (abs(data/C)))
-
-    bin_stats = {'min': data.min(),
-                 'max': data.max(),
-                 'binwidth': (data.max()-data.min()+10**-6)/bincount}    
- 
-    
-    # Transform data into bin positions for fast binning
-    data = ((data - bin_stats['min'])/bin_stats['binwidth']).apply(np.floor)
-
-    bins, index, diction = bin_data(data, bin_stats)
-    return bins, bin_stats, index, diction
-
-""" 2. Plot Generation """
-def format_ticks(out):
-    """ Generate tick labels
-    Polus Plots uses D3 to generate the plots. This function tries to mimic the
-    formatting of tick labels. In place of using scientific notation a scale
-    prefix is appended to the end of the number. See _prefix comments to see the
-    suffixes that are used. Numbers that are larger or smaller than 10**24 or
-    10**-24 respectively are not handled and may throw an error. Values outside
-    of this range do not currently have an agreed upon prefix in the measurement
-    science community.
-        
-    Inputs:
-        out - the values of the ticks used in graph
-    Outputs:
-        fticks - a list of strings containing formatted tick labels
-    """
-    _prefix = {
-        -24: 'y',  # yocto
-        -21: 'z',  # zepto
-        -18: 'a',  # atto
-        -15: 'f',  # femto
-        -12: 'p',  # pico
-         -9: 'n',  # nano
-         -6: 'u',  # micro
-         -3: 'm',  # mili
-          0: ' ',
-          3: 'k',  # kilo
-          6: 'M',  # mega
-          9: 'G',  # giga
-         12: 'T',  # tera
-         15: 'P',  # peta
-         18: 'E',  # exa
-         21: 'Z',  # zetta
-         24: 'Y',  # yotta
-    }
-
-    fticks = []
-    convertprefix = []
-    for i in out:
-        formtick = "%#.3f" % i
-        decformtick = '%.2e' % Decimal(formtick)
-        convertexponent = float(decformtick[-3:])
-        numbers = float(decformtick[:-4])
-        if convertexponent > 0:
-            if convertexponent % 3 == 2:
-                movednum = round(numbers/10,2)
-                newprefix = _prefix[int(convertexponent + 1)]
-                formtick = str(movednum) + newprefix
-            elif convertexponent % 3 == 1:
-                movednum = round(numbers*10,1)
-                newprefix = _prefix[int(convertexponent - 1)]
-                formtick = str(movednum) + newprefix
-            else:
-                newprefix = _prefix[int(convertexponent)]
-                if i < 0:
-                    formtick = str(decformtick[:5]) + newprefix
-                else: 
-                    formtick = str(decformtick[:4]) + newprefix
-        elif convertexponent < 0:
-            if convertexponent % -3 == -2:
-                movednum = round(numbers*10,1)
-                newprefix = _prefix[int(convertexponent - 1)]
-                formtick = str(movednum) + newprefix
-            elif convertexponent % -3 == -1:
-                movednum = round(numbers/10,2)
-                newprefix = _prefix[int(convertexponent + 1)]
-                formtick = str(movednum) + newprefix
-            else:
-                newprefix = _prefix[convertexponent]
-                if i < 0:
-                    formtick = str(decformtick[:5]) + newprefix
-                else: 
-                    formtick = str(decformtick[:4]) + newprefix
-        else:
-            if i < 0:
-                formtick = str(decformtick[:5]) + _prefix[int(convertexponent)]
-            else: 
-                formtick = str(decformtick[:4]) + _prefix[int(convertexponent)]
-        convertprefix.append(int(convertexponent))
-        fticks.append(formtick)
-
-    return fticks
-
-# Create a custom colormap to mimick Polus Plots
-def get_cmap():
-    
-    cmap_values = [[1.0,1.0,1.0,1.0]]
-    cmap_values.extend([[r/255,g/255,b/255,1] for r,g,b in zip(np.arange(0,255,2),
-                                                        np.arange(153,255+1/128,102/126),
-                                                        np.arange(34+1/128,0,-34/126))])
-    cmap_values.extend([[r/255,g/255,b/255,1] for r,g,b in zip(np.arange(255,136-1/128,-119/127),
-                                                        np.arange(255,0,-2),
-                                                        np.arange(0,68+1/128,68/127))])
-    cmap = ListedColormap(cmap_values)
-
-    return cmap
-
-def gen_plot(col1,
-             col2,
-             indexdict,
-             column_names,
-             bin_stats,
-             fig,
-             ax,
-             data,
-             typegraph):
-    """ Generate a heatmap
-    Generate a heatmap of data for column 1 against column 2.
-    Inputs:
-        col1 - the column plotted on the y-axis
-        col2 - column plotted on the x-axis
-        indexdict - a dictionary containing the indexes of graphs
-        column_names - list of column names
-        bin_stats - a list containing the min,max values of each column
-        fig - pregenerated figure
-        ax - pregenerated axis
-        data - p  regenerated heatmap bbox artist
-        typegraph - specifies whether the data is log scaled or linearly scaled
-    Outputs:
-        hmap - A numpy array containing pixels of the heatmap
-    """
-    def keepdecreasing(labeltexts0, decreasefont, bbxtext):
-        """ This function decreases the size of the labels if its too big """
-        labeltexts0.set_fontsize(decreasefont)
-        bbxtext = labeltexts0.get_window_extent(renderer = fig.canvas.renderer)
-        decreasefont = decreasefont - 1 
-        return bbxtext, decreasefont
-
-    def calculateticks(ticks, bin_width, fmin, typegraph):
-        """ This functio n calculates the tick values for the graphs """
-
-        if typegraph == "linear":
-            tick_vals = [t for t in ticks*bin_width+fmin]
-        if typegraph == "log": 
-            C = 1/np.log(10)
-            tick_vals = [np.sign(t)*C*(-1+(10**abs(t))) for t in ticks*bin_width+fmin]
-        return tick_vals
-
-    if col2>col1:
-        d = np.squeeze(bins[indexdict[col1, col2],:,:])
-        r = col1
-        c = col2
-    elif col2<col1:
-        d = np.transpose(np.squeeze(bins[indexdict[(col1, col2)],:,:]))
-        r = col2
-        c = col1
-    else:
-        d = np.zeros((bincount,bincount))
-        r = col1
-        c = col2
-
-    data.set_data(np.ceil(d/d.max() * 255))
-    data.set_clim(0, 255)
-
-    sizefont = 12 
-    axlabel = fig.axes[1]
-    aylabel = fig.axes[2] 
-
-    cname_c = column_names[c]
-    cname_r = column_names[r]
-
-    
-    # This is to decrease the size of the title labels if the name is too large (X AXIS LABEL)
-    if len(axlabel.texts) == 0:
-        axlabel.text(0.5, 0.5, "\n".join(wrap(cname_c, 60)), va = 'center', ha = 'center', fontsize = sizefont, wrap = True)
-    else:
-        axlabeltext0 = axlabel.texts[0]
-        axlabeltext0.set_text("\n".join(wrap(cname_c, 60)))
-        axlabeltext0.set_fontsize(sizefont)
-
-    bbxtext = (axlabel.texts[0]).get_window_extent(renderer = fig.canvas.renderer)
-    decreasefont = sizefont - 1
-    while (bbxtext.x0 < 0 or bbxtext.x1 > CHUNK_SIZE) or (bbxtext.y0 < 0 or bbxtext.y1 > (CHUNK_SIZE*.075)):
-        bbxtext, decreasefont = keepdecreasing(axlabel.texts[0], decreasefont, bbxtext)
-
-    # This is to decrease the size of the title labels if the name is too large (Y AXIS LABEL)
-    if len(aylabel.texts) == 0:
-        aylabel.text(0.5, 0.5, "\n".join(wrap(cname_r, 60)), va = 'center', ha = 'center', fontsize = sizefont, rotation = 90, wrap = True)
-    else:
-        aylabeltext0 = aylabel.texts[0]
-        aylabeltext0.set_text("\n".join(wrap(cname_r, 60)))
-        aylabeltext0.set_fontsize(sizefont)
-
-    bbytext = (aylabel.texts[0]).get_window_extent(renderer = fig.canvas.renderer)
-    decreasefont = sizefont - 1
-    while (bbytext.y0 < 0 or bbytext.y1 > CHUNK_SIZE) or (bbytext.x0 < 0 or bbytext.x1 > (CHUNK_SIZE*.075)):
-        bbytext, decreasefont = keepdecreasing(aylabel.texts[0], decreasefont, bbytext)
-    
-    while len(ax.lines) > 0:
-        ax.lines[-1].remove()
-
-    # Calculating the value of each tick in the graph (fixed width)
-    fmin_c = bin_stats['min'][cname_c]
-    fmax_c = bin_stats['max'][cname_c]
-    binwidth_c = bin_stats['binwidth'][cname_c]
-    tick_vals_c= calculateticks(ax.get_xticks(), binwidth_c, fmin_c, typegraph)
-    if fmin_c < 0: # draw x=0
-        ax.axvline(x=abs(fmin_c)/binwidth_c)
-    ax.set_xticklabels(format_ticks(tick_vals_c), rotation=45, fontsize = 5, ha='right')
-
-    # Calculating the value of each tick in the graph (fixed width)
-    fmin_r = bin_stats['min'][cname_r]
-    fmax_r = bin_stats['max'][cname_r]
-    binwidth_r = bin_stats['binwidth'][cname_r]
-    tick_vals_r = calculateticks(ax.get_yticks(), binwidth_r, fmin_r, typegraph)
-    if fmin_r < 0: # draw y=0
-        ax.axhline(y=abs(fmin_r)/binwidth_r)
-    ax.set_yticklabels(format_ticks(tick_vals_r), fontsize=5, ha='right')
-
-    fig.canvas.draw()
-    hmap = np.array(fig.canvas.renderer.buffer_rgba())
-
-    return hmap
-
-def get_default_fig(cmap):
-    """ Generate a default figure, axis, and heatmap artist
-    Generate a figure and draw an empty graph with useful settings for repeated
-    drawing of new figures. By passing the existing figure, axis, and heatmap
-    artist to the plot generator, many things do not need to be drawn from
-    scratch. This decreases the plot drawing time by a factor of 2-3 times.
-    Inputs:
-        cmap - the heatmap colormap
-    Outputs:
-        fig - A reference to the figure object
-        ax - A reference to the axis object
-        data - A reference to the heatmap artist
-    """
-    fig, ax = plt.subplots(dpi=int(CHUNK_SIZE/4),figsize=(4,4),tight_layout={'h_pad':1,'w_pad':1})
-    datacolor = ax.pcolorfast(np.zeros((bincount, bincount),np.uint64),cmap=cmap)
-    ticks = [t for t in range(0, bincount+1, int(bincount/(10)))]
-
-    ax.set_xlim(0,bincount)
-    ax.set_ylim(0,bincount)
-    ax.set_xticks(ticks)
-    ax.set_yticks(ticks)
-    ax.set_xlabel(" ")
-    ax.set_ylabel(" ")
-
-    ax.set_xticklabels(ticks, rotation = 45)
-    ax.set_yticklabels(ticks)
-
-    fig.canvas.draw()
-
-    axlabel = fig.add_axes([.075, 0, 1, .075], frameon = False, alpha = .5, facecolor = 'b')
-    axlabel.set_xticks([])
-    axlabel.set_yticks([])
-    axlabel.set_clip_on(True)
-    aylabel = fig.add_axes([0, .075, .075, 1], frameon = False, alpha = .5, facecolor = 'b')
-    aylabel.set_xticks([])
-    aylabel.set_yticks([])
-    aylabel.set_clip_on(True)
-
-    return fig, ax, datacolor
-
-""" 3. Pyramid generation functions """
-
-def _avg2(image):
-    """ Average pixels with optical field of 2x2 and stride 2 """
-    
-    # Convert 32-bit pixels to prevent overflow during averaging
-    image = image.astype(np.uint32)
-    imageshape0 = image.shape[0]
-    imageshape1 = image.shape[1]
-    # Get the height and width of each image to the nearest even number
-    y_max = imageshape0 - imageshape0 % 2
-    x_max = imageshape1 - imageshape1 % 2
-    
-    # Perform averaging
-    avg_img = np.zeros(np.ceil([image.shape[0]/2,image.shape[1]/2,image.shape[2]]).astype(np.uint32))
-    for z in range(4):
-        avg_img[0:int(y_max/2),0:int(x_max/2),z]= (image[0:y_max-1:2,0:x_max-1:2,z] + \
-                                                   image[1:y_max:2,0:x_max-1:2,z] + \
-                                                   image[0:y_max-1:2,1:x_max:2,z] + \
-                                                   image[1:y_max:2,1:x_max:2,z]) / 4
-        
-    # The next if statements handle edge cases if the height or width of the
-    # image has an odd number of pixels
-    if y_max != imageshape0:
-        for z in range(3):
-            avg_img[-1,:int(x_max/2),z] = (image[-1,0:x_max-1:2,z] + \
-                                           image[-1,1:x_max:2,z]) / 2
-    if x_max != imageshape1:
-        for z in range(4):
-            avg_img[:int(y_max/2),-1,z] = (image[0:y_max-1:2,-1,z] + \
-                                           image[1:y_max:2,-1,z]) / 2
-    if y_max != imageshape0 and x_max != imageshape1:
-        for z in range(4):
-            avg_img[-1,-1,z] = image[-1,-1,z]
-    return avg_img
-
-def metadata_to_graph_info(outPath,outFile, ngraphs):
-    
-    # Create an output path object for the info file
-    op = Path(outPath).joinpath("{}.dzi".format(outFile))
-
-    # create an output path for the images
-    of = Path(outPath).joinpath('{}_files'.format(outFile))
-    of.mkdir(exist_ok=True)
-    
-    # Get metadata info from the bfio reader
-    rows = np.ceil(np.sqrt(ngraphs))
-    cols = np.round(np.sqrt(ngraphs))
-    sizes = [cols*CHUNK_SIZE,rows*CHUNK_SIZE]
-    
-    # Calculate the number of pyramid levels
-    num_scales = np.ceil(np.log2(rows*CHUNK_SIZE)).astype(np.uint8)
-    
-    # create a scales template, use the full resolution
-    scales = {
-        "size":sizes,
-        "key": num_scales
-    }
-    
-    # initialize the json dictionary
-    info = {
-        "scales": [scales],       # Will build scales belows
-        "rows": rows,
-        "cols": cols
-    }
-    
-    # create the information for each scale
-    for i in range(1,num_scales+1):
-        previous_scale = info['scales'][-1]
-        current_scale = copy.deepcopy(previous_scale)
-        current_scale['key'] = str(num_scales - i)
-        current_scale['size'] = [int(np.ceil(previous_scale['size'][0]/2)),int(np.ceil(previous_scale['size'][1]/2))]
-        info['scales'].append(current_scale)
-    
-    # write the dzi file
-    with open(op,'w') as writer:
-        writer.write(DZI.format(int(info['cols']*CHUNK_SIZE),int(info['rows']*CHUNK_SIZE)))
-    
-    return info
-
-
-def _get_higher_res(S,info,cnames, outpath,out_file,indexscale,indexdict,binstats, typegraph, X=None,Y=None):
-    """
-    The following function builds the image pyramid at scale S by building up only
-    the necessary information at high resolution layers of the pyramid. So, if 0 is
-    the original resolution of the image, getting a tile at scale 2 will generate
-    only the necessary information at layers 0 and 1 to create the desired tile at
-    layer 2. This function is recursive and can be parallelized.
-    Inputs:
-        S - current scale
-        info - dictionary of scale information
-        outpath - directory for all outputs
-        out_file - directory for current dataset
-        indexscale - index of the graph 
-        binstats - stats for the binned data
-        typegraph - specifies whether the data is linear or logarithmically scaled
-    Outputs:
-        DeepZoom format of images.
-    """
-
-    # Get the scale info
-    num_scales = len(info['scales'])
-    scale_info = info['scales'][num_scales-S-1]
-
-    if scale_info==None:
-        raise ValueError("No scale information for resolution {}.".format(S))
-    if X == None:
-        X = [0,scale_info['size'][0]]
-    if Y == None:
-        Y = [0,scale_info['size'][1]]
-    
-    # Modify upper bound to stay within resolution dimensions
-    if X[1] > scale_info['size'][0]:
-        X[1] = scale_info['size'][0]
-    if Y[1] > scale_info['size'][1]:
-        Y[1] = scale_info['size'][1]
-    
-    # Initialize the output
-    image = np.zeros((int(Y[1]-Y[0]),int(X[1]-X[0]),4),dtype=np.uint8)
-    
-    # If requesting from the lowest scale, then just generate the graph
-    if S==num_scales-1:
-        index = int((int(Y[0]/CHUNK_SIZE) + int(X[0]/CHUNK_SIZE) * info['rows']))
-        if index>=len(indexscale):
-            image = np.ones((CHUNK_SIZE,CHUNK_SIZE,4),dtype=np.uint8) * (bincount + 55)
-        else:
-            image = gen_plot(col1=indexscale[index][0],
-                                col2=indexscale[index][1],
-                                indexdict=indexdict,
-                                column_names=cnames,
-                                bin_stats=binstats,
-                                fig=fig,
-                                ax=ax,
-                                data=datacolor,
-                                typegraph=typegraph)
-                            
-    else:
-        # Set the subgrid dimensions
-        subgrid_dimX = list(np.arange(2*X[0], 2*X[1], CHUNK_SIZE).astype('int'))
-        subgrid_dimX.append(2*X[1])
-        subgrid_dimY = list(np.arange(2*Y[0], 2*Y[1], CHUNK_SIZE).astype('int'))
-        subgrid_dimY.append(2*Y[1])
-        
-        
-        for y in range(0,len(subgrid_dimY)-1):
-            subgrid_Y_ind0 = np.ceil((subgrid_dimY[y] - subgrid_dimY[0])/2).astype('int')
-            subgrid_Y_ind1 = np.ceil((subgrid_dimY[y+1] - subgrid_dimY[0])/2).astype('int')
-            for x in range(0,len(subgrid_dimX)-1):
-                subgrid_X_ind0 = np.ceil((subgrid_dimX[x] - subgrid_dimX[0])/2).astype('int')
-                subgrid_X_ind1 = np.ceil((subgrid_dimX[x+1] - subgrid_dimX[0])/2).astype('int')
-                if S==(num_scales - 6): #to use multiple processors to compute faster.
-                    sub_image = _get_higher_res_par(S=S+1,
-                                                    info=info,
-                                                    cnames=cnames,
-                                                    outpath=outpath,
-                                                    out_file=out_file,
-                                                    indexscale=indexscale,
-                                                    indexdict=indexdict,
-                                                    binstats=binstats,
-                                                    typegraph=typegraph, 
-                                                    X=subgrid_dimX[x:x+2],
-                                                    Y=subgrid_dimY[y:y+2])
-                else:
-                    sub_image = _get_higher_res(S=S+1,
-                                                info=info,
-                                                cnames=cnames,
-                                                outpath=outpath,
-                                                out_file=out_file,
-                                                indexscale=indexscale,
-                                                indexdict=indexdict,
-                                                binstats=binstats,
-                                                typegraph=typegraph, 
-                                                X=subgrid_dimX[x:x+2],
-                                                Y=subgrid_dimY[y:y+2])
-                                                
-                image[subgrid_Y_ind0:subgrid_Y_ind1, subgrid_X_ind0:subgrid_X_ind1,:] = _avg2(sub_image)
-                del sub_image
-
-    # Write the chunk
-    outpath = Path(outpath).joinpath('{}_files'.format(out_file),str(S))
-    outpath.mkdir(exist_ok=True)
-    imageio.imwrite(outpath.joinpath('{}_{}.png'.format(int(X[0]/CHUNK_SIZE),int(Y[0]/CHUNK_SIZE))),image,format='PNG-FI',compression=1)
-    logger.info('Finished building tile (scale,X,Y): ({},{},{})'.format(S,int(X[0]/CHUNK_SIZE),int(Y[0]/CHUNK_SIZE)))
-    return image
-
-# This function performs the same operation as _get_highe_res, except it uses multiprocessing to grab higher
-# resolution layers at a specific layer.
-def _get_higher_res_par(S,info, cnames, outpath,out_file,indexscale, indexdict, binstats, typegraph, X=None,Y=None):
-    # Get the scale info
-    num_scales = len(info['scales'])
-    scale_info = info['scales'][num_scales-S-1]
-
-    if scale_info==None:
-        ValueError("No scale information for resolution {}.".format(S))
-        
-    if X == None:
-        X = [0,scale_info['size'][0]]
-    if Y == None:
-        Y = [0,scale_info['size'][1]]
-    
-    # Modify upper bound to stay within resolution dimensions
-    if X[1] > scale_info['size'][0]:
-        X[1] = scale_info['size'][0]
-    if Y[1] > scale_info['size'][1]:
-        Y[1] = scale_info['size'][1]
-    
-    # Initialize the output
-    image = np.zeros((Y[1]-Y[0],X[1]-X[0],4),dtype=np.uint8)
-    # If requesting from the lowest scale, then just generate the graph
-    if S==int(info['scales'][0]['key']):
-        index = (int(Y[0]/CHUNK_SIZE) + int(X[0]/CHUNK_SIZE) * info['rows'])
-        if index>=len(indexscale):
-            image = np.ones((CHUNK_SIZE,CHUNK_SIZE,4),dtype=np.uint8) * (bincount + 55)
-        else:
-            image = gen_plot(col1=indexscale[index][0],
-                             col2=indexscale[index][1],
-                             indexdict=indexdict,
-                             column_names=cnames,
-                             bin_stats=binstats,
-                             fig=fig,
-                             ax=ax,
-                             data=datacolor,
-                             typegraph=typegraph)
-
-    else:
-        # Set the subgrid dimensions
-        subgrid_dimX = list(np.arange(2*X[0], 2*X[1], CHUNK_SIZE).astype('int'))
-        subgrid_dimX.append(2*X[1])
-        subgrid_dimY = list(np.arange(2*Y[0], 2*Y[1], CHUNK_SIZE).astype('int'))
-        subgrid_dimY.append(2*Y[1])
-
-        subgrid_images = []
-        
-        with Pool(processes=np.min(4,initial=multiprocessing.cpu_count())) as pool:
-            for y in range(0,len(subgrid_dimY)-1):
-                subgrid_Y_ind0 = np.ceil((subgrid_dimY[y] - subgrid_dimY[0])/2).astype('int')
-                subgrid_Y_ind1 = np.ceil((subgrid_dimY[y+1] - subgrid_dimY[0])/2).astype('int')
-                for x in range(0,len(subgrid_dimX)-1):
-                    subgrid_X_ind0 = np.ceil((subgrid_dimX[x] - subgrid_dimX[0])/2).astype('int')
-                    subgrid_X_ind1 = np.ceil((subgrid_dimX[x+1] - subgrid_dimX[0])/2).astype('int')
-                    subgrid_images.append(pool.apply_async(_get_higher_res,(S+1,
-                                                                           info,
-                                                                           cnames,
-                                                                           outpath,
-                                                                           out_file,
-                                                                           indexscale,
-                                                                           indexdict,
-                                                                           binstats,
-                                                                           typegraph,
-                                                                           subgrid_dimX[x:x+2],
-                                                                           subgrid_dimY[y:y+2])))
-                    image[subgrid_Y_ind0:subgrid_Y_ind1,subgrid_X_ind0:subgrid_X_ind1,:] = _avg2((subgrid_images[y*(len(subgrid_dimX)-1) + x]).get())
-        
-        del subgrid_images
-
-    # Write the chunk
-    outpath = Path(outpath).joinpath('{}_files'.format(out_file),str(S))
-    outpath.mkdir(exist_ok=True)
-    imageio.imwrite(outpath.joinpath('{}_{}.png'.format(int(X[0]/CHUNK_SIZE),int(Y[0]/CHUNK_SIZE))),image,format='PNG-FI',compression=1)
-    logger.info('Finished building tile (scale,X,Y): ({},{},{})'.format(S,int(X[0]/CHUNK_SIZE),int(Y[0]/CHUNK_SIZE)))
-    return image
-
-def write_csv(cnames,index,f_info,out_path,out_file):
-    """ This function writes the csv file necessary for the Deep Zoom format """
-
-    header = 'dataset_id, x_axis_id, y_axis_id, x_axis_name, y_axis_name, title, length, width, global_row, global_col\n'
-    line = '{:d}, {:d}, {:d}, {:s}, {:s}, default title, {:d}, {:d}, {:d}, {:d}\n'
-    l_ind = 0
-    with open(str(Path(out_path).joinpath(out_file+'.csv').absolute()),'w') as writer:
-        writer.write(header)
-        for ind in index:
-            ind1 = ind[1]
-            ind0 = ind[0]
-            writer.write(line.format(1,
-                                     cnames[ind1][1],
-                                     cnames[ind0][1],
-                                     cnames[ind1][0],
-                                     cnames[ind0][0],
-                                     CHUNK_SIZE,
-                                     CHUNK_SIZE,
-                                     int(np.mod(l_ind,f_info['rows'])),
-                                     int(l_ind/f_info['rows'])))
-            l_ind += 1
-        
-if __name__=="__main__":
-    
-    
-    """ Initialize argument parser """
-    logger.info("Parsing arguments...")
-    parser = argparse.ArgumentParser(prog='main', description='Build an image pyramid from data in a csv file.')
-
-    """ Define the arguments """
-    parser.add_argument('--inpDir', dest='inpDir', type=str,
-                        help='Path to input images.', required=True)
-
-    parser.add_argument('--outDir', dest='outDir', type=str,
-                        help='Path to output images.', required=True)
-    
-    parser.add_argument('--bincount', dest='bin_count', type=int,
-                        help='Number of bins', required=True)
-    
-    parser.add_argument('--scale', dest='scale', type=str,
-                        help='Linear, Log, or Both', required=False)
-
-    """ Get the input arguments """
-    args = parser.parse_args()
-
-    input_path = args.inpDir
-    output_path = Path(args.outDir)
-    bincount = args.bin_count
-    scales = [args.scale.lower()]
-    all_scales = ['linear','log']
-    if scales[0] not in all_scales:
-        scales = all_scales
-
-    logger.info('inpDir = {}'.format(input_path))
-    logger.info('outDir = {}'.format(output_path))
-
-    # Set up the logger for each scale
-    loggers = {}
-    for scale in scales:
-        loggers[scale] = logging.getLogger("main.{}".format(scale.upper()))
-        loggers[scale].setLevel(logging.INFO)
-
-    # Get the path to each csv file in the collection
-    input_files = [str(f.absolute()) for f in Path(input_path).iterdir() if ''.join(f.suffixes)=='.csv']
-    
-    # Generate the default figure components
-    logger.info('Generating colormap and default figure...')
-    cmap = get_cmap()
-    fig, ax, datacolor = get_default_fig(cmap)
-    logger.info('Done!')
-
-    for f in input_files:
-        
-        logger.info('Loading csv: {}'.format(f))
-        data, cnames = load_csv(f)
-        column_names = [c[0] for c in cnames]
-
-        for scale in scales:
-            
-            # Set the file path folder
-            folder_name = Path(f).name.replace('.csv','_{}'.format(scale))
-            
-            # Process for current scale
-            loggers[scale].info('Processing: {}'.format(folder_name))
-
-            # Bin the data
-            loggers[scale].info('Binning data for {} {} features...'.format(len(column_names),scale.upper()))
-            bins, bin_stats, data_index, data_dict = transform_data(data,column_names, scale)
-
-            # Generate the dzi file
-            loggers[scale].info('Generating pyramid {} metadata...'.format(scale.upper()))
-            ngraphs = len(data_index)
-            info_data = metadata_to_graph_info(output_path,folder_name, ngraphs)
-            loggers[scale].info('Done!')
-
-            loggers[scale].info('Writing {} layout file...!'.format(scale.upper()))
-            write_csv(cnames,data_index,info_data,output_path,folder_name)
-            loggers[scale].info('Done!')
-
-            # Create the pyramid
-            loggers[scale].info('Building {} pyramids...'.format(scale.upper()))
-            image_data = _get_higher_res(0, info_data,column_names, output_path,folder_name,data_index, data_dict, bin_stats, scale)
-            loggers[scale].info('Done!')
diff --git a/visualization/polus-graph-pyramid-builder-plugin/src/requirements.txt b/visualization/polus-graph-pyramid-builder-plugin/src/requirements.txt
deleted file mode 100644
index da4cf76dc..000000000
--- a/visualization/polus-graph-pyramid-builder-plugin/src/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-pandas>=0.25.1
-matplotlib>=3.1.1
-numpy>=1.21.0
-imageio==2.5.0