From ae484d2aa27101f4540311b05d855c9174bbe47e Mon Sep 17 00:00:00 2001
From: Sricharan Reddy Varra <sricharanvarra@gmail.com>
Date: Mon, 7 Oct 2024 11:55:45 -0700
Subject: [PATCH] removed calculate_enrichment_stats

---
 src/ark/analysis/spatial_analysis_utils.py    | 65 -------------------
 tests/analysis/spatial_analysis_utils_test.py | 44 -------------
 2 files changed, 109 deletions(-)

diff --git a/src/ark/analysis/spatial_analysis_utils.py b/src/ark/analysis/spatial_analysis_utils.py
index 0b158ee43..83f7d2d71 100644
--- a/src/ark/analysis/spatial_analysis_utils.py
+++ b/src/ark/analysis/spatial_analysis_utils.py
@@ -274,71 +274,6 @@ def compute_close_cell_num(dist_mat, dist_lim, analysis_type,
     return close_num, mark1_num, mark1poslabels
 
 
-def calculate_enrichment_stats(close_num, close_num_rand):
-    """Calculates z score and p values from spatial enrichment analysis.
-
-    Args:
-        close_num (numpy.ndarray):
-            marker x marker matrix with counts for cells positive for corresponding markers
-        close_num_rand (numpy.ndarray):
-            random positive marker counts for every permutation in the bootstrap
-
-    Returns:
-        xarray.DataArray:
-            xarray contining the following statistics for marker to marker enrichment
-
-            - z: z scores for corresponding markers
-            - muhat: predicted mean values of close_num_rand random distribution
-            - sigmahat: predicted standard deviations of close_num_rand random distribution
-            - p: p values for corresponding markers, for both positive and negative enrichment
-            - h: matrix indicating whether corresponding marker interactions are significant
-            - adj_p: fdh_br adjusted p values
-    """
-    # Get the number of markers and number of permutations
-    marker_num = close_num.shape[0]
-    bootstrap_num = close_num_rand.shape[2]
-
-    # Create z, muhat, sigmahat, and p
-    z = np.zeros((marker_num, marker_num))
-    muhat = np.zeros((marker_num, marker_num))
-    sigmahat = np.zeros((marker_num, marker_num))
-    p_pos = np.zeros((marker_num, marker_num))
-    p_neg = np.zeros((marker_num, marker_num))
-
-    for j in range(0, marker_num):
-        for k in range(0, marker_num):
-            # Get close_num_rand value for every marker combination and reshape for norm fit
-            tmp = np.reshape(close_num_rand[j, k, :], (bootstrap_num, 1))
-            # Get muhat and sigmahat values for distribution from 100 permutations
-            (muhat[j, k], sigmahat[j, k]) = scipy.stats.norm.fit(tmp)
-            # Calculate z score based on distribution
-            z[j, k] = (close_num[j, k] - muhat[j, k]) / sigmahat[j, k]
-            # Calculate both positive and negative enrichment p values
-            p_pos[j, k] = (1 + (np.sum(tmp > close_num[j, k]))) / (bootstrap_num + 1)
-            p_neg[j, k] = (1 + (np.sum(tmp < close_num[j, k]))) / (bootstrap_num + 1)
-
-    # Get fdh_br adjusted p values
-    p_summary = np.zeros_like(p_pos[:, :])
-    for j in range(0, marker_num):
-        for k in range(0, marker_num):
-            # Use negative enrichment p values if the z score is negative, and vice versa
-            if z[j, k] > 0:
-                p_summary[j, k] = p_pos[j, k]
-            else:
-                p_summary[j, k] = p_neg[j, k]
-    (h, adj_p, aS, aB) = multipletests(
-        p_summary, alpha=.05
-    )
-
-    # Create an Xarray with the dimensions (stats variables, number of markers, number of markers)
-    stats_data = np.stack((z, muhat, sigmahat, p_pos, p_neg, h, adj_p), axis=0)
-    coords = [["z", "muhat", "sigmahat", "p_pos", "p_neg", "h", "p_adj"],
-              range(stats_data[0].data.shape[0]), range(stats_data[0].data.shape[1])]
-    dims = ["stats", "rows", "cols"]
-    stats_xr = xr.DataArray(stats_data, coords=coords, dims=dims)
-    return stats_xr
-
-
 def compute_neighbor_counts(current_fov_neighborhood_data, dist_matrix, distlim,
                             self_neighbor=False, cell_label_col=settings.CELL_LABEL,
                             cluster_name_col=settings.CELL_TYPE):
diff --git a/tests/analysis/spatial_analysis_utils_test.py b/tests/analysis/spatial_analysis_utils_test.py
index fe986bb4b..725ad7a82 100644
--- a/tests/analysis/spatial_analysis_utils_test.py
+++ b/tests/analysis/spatial_analysis_utils_test.py
@@ -199,50 +199,6 @@ def test_compute_close_cell_num():
     assert example_closenum[2, 2] == 0
 
 
-def test_calculate_enrichment_stats():
-    # Positive enrichment
-
-    # Generate random closenum matrix
-    stats_cnp = np.zeros((20, 20))
-    stats_cnp[:, :] = 80
-
-    # Generate random closenumrand matrix, ensuring significant positive enrichment
-    stats_cnrp = np.random.randint(1, 40, (20, 20, 100))
-
-    stats_xr_pos = spatial_analysis_utils.calculate_enrichment_stats(stats_cnp, stats_cnrp)
-
-    assert stats_xr_pos.loc["z", 0, 0] > 0
-    assert stats_xr_pos.loc["p_pos", 0, 0] < .05
-
-    # Negative enrichment
-
-    # Generate random closenum matrix
-    stats_cnn = np.zeros((20, 20))
-
-    # Generate random closenumrand matrix, ensuring significant negative enrichment
-    stats_cnrn = np.random.randint(40, 80, (20, 20, 100))
-
-    stats_xr_neg = spatial_analysis_utils.calculate_enrichment_stats(stats_cnn, stats_cnrn)
-
-    assert stats_xr_neg.loc["z", 0, 0] < 0
-    assert stats_xr_neg.loc["p_neg", 0, 0] < .05
-
-    # No enrichment
-
-    # Generate random closenum matrix
-    stats_cn = np.zeros((20, 20))
-    stats_cn[:, :] = 80
-
-    # Generate random closenumrand matrix, ensuring no enrichment
-    stats_cnr = np.random.randint(78, 82, (20, 20, 100))
-
-    stats_xr = spatial_analysis_utils.calculate_enrichment_stats(stats_cn, stats_cnr)
-
-    assert abs(stats_xr.loc["z", 0, 0]) < 1
-    assert stats_xr.loc["p_neg", 0, 0] > .05
-    assert stats_xr.loc["p_pos", 0, 0] > .05
-
-
 def test_compute_neighbor_counts():
     fov_col = settings.FOV_ID
     cluster_id_col = settings.CELL_TYPE_NUM