Switch to top 100 enriched treatments in JUMP (new sup table) (#60)

* switch to top 100 instead of top 10 * rerun processing and update .gz files * rerun notebook and minor mods
WayScience · Mar 8, 2024 · 17c0174 · 17c0174
1 parent cb4883b
commit 17c0174
Show file tree

Hide file tree

Showing 6 changed files with 16,206 additions and 1,808 deletions.
diff --git a/...e_model/jump_phenotype_profiles/jump_compare_cell_types_and_time_across_phenotypes.tsv.gz b/...e_model/jump_phenotype_profiles/jump_compare_cell_types_and_time_across_phenotypes.tsv.gz
diff --git a/3.evaluate_model/jump_phenotype_profiles/jump_most_significant_phenotype_enrichment.tsv b/3.evaluate_model/jump_phenotype_profiles/jump_most_significant_phenotype_enrichment.tsv
diff --git a/3.evaluate_model/jump_phenotype_profiles/jump_phenotype_profiles.tsv.gz b/3.evaluate_model/jump_phenotype_profiles/jump_phenotype_profiles.tsv.gz
diff --git a/3.evaluate_model/jump_phenotype_profiles/jump_phenotype_profiles_shuffled.tsv.gz b/3.evaluate_model/jump_phenotype_profiles/jump_phenotype_profiles_shuffled.tsv.gz
diff --git a/3.evaluate_model/process_jump_phenotype_profiles.ipynb b/3.evaluate_model/process_jump_phenotype_profiles.ipynb
@@ -32,9 +32,7 @@
    "source": [
     "import pathlib\n",
     "from typing import List\n",
-    "import pandas as pd\n",
-    "\n",
-    "import umap"
+    "import pandas as pd"
    ]
   },
   {
@@ -54,7 +52,7 @@
     "jump_sc_pred_file = f\"{url}/{commit}/{file}\"\n",
     "\n",
     "# Set constants\n",
-    "n_top_results_to_explore = 10"
+    "n_top_results_to_explore = 100"
    ]
   },
   {
@@ -436,7 +434,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(3600, 11)\n"
+      "(18000, 11)\n"
      ]
     },
     {
@@ -580,6 +578,7 @@
     "# Focus on the top results for downstream interpretation\n",
     "jump_focused_top_results_df = (\n",
     "    jump_pred_df\n",
+    "    .query(\"Metadata_model_type == 'final'\")\n",
     "    .groupby([\"Metadata_model_type\", \"treatment_type\", \"Cell_type\", \"Time\", \"phenotype\"])\n",
     "    .apply(lambda x: x.nsmallest(n_top_results_to_explore, \"p_value\"))\n",
     "    .reset_index(drop=True)\n",

diff --git a/3.evaluate_model/scripts/nbconverted/process_jump_phenotype_profiles.py b/3.evaluate_model/scripts/nbconverted/process_jump_phenotype_profiles.py
@@ -25,8 +25,6 @@
 from typing import List
 import pandas as pd
 
-import umap
-
 
 # In[2]:
 
@@ -41,7 +39,7 @@
 jump_sc_pred_file = f"{url}/{commit}/{file}"
 
 # Set constants
-n_top_results_to_explore = 10
+n_top_results_to_explore = 100
 
 
 # In[3]:
@@ -125,6 +123,7 @@
 # Focus on the top results for downstream interpretation
 jump_focused_top_results_df = (
     jump_pred_df
+    .query("Metadata_model_type == 'final'")
     .groupby(["Metadata_model_type", "treatment_type", "Cell_type", "Time", "phenotype"])
     .apply(lambda x: x.nsmallest(n_top_results_to_explore, "p_value"))
     .reset_index(drop=True)