Skip to content

Commit

Permalink
Remove flake. Add ruff format check. Format nb
Browse files Browse the repository at this point in the history
  • Loading branch information
johnarevalo committed Oct 22, 2024
1 parent 4206e57 commit 7d47818
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 48 deletions.
10 changes: 1 addition & 9 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,8 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip build
python -m pip install flake8
python -m build
python -m pip install --upgrade pip
pip install -e .[test]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
3 changes: 3 additions & 0 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v1
- uses: astral-sh/ruff-action@v1
with:
args: "format --check"
33 changes: 17 additions & 16 deletions examples/finding_pairs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,17 @@
"source": [
"random.seed(0)\n",
"n_samples = 20\n",
"dframe = pd.DataFrame({\n",
" 'plate': [random.choice(['p1', 'p2', 'p3']) for _ in range(n_samples)],\n",
" 'well': [random.choice(['w1', 'w2', 'w3', 'w4', 'w5']) for _ in range(n_samples)],\n",
" 'label': [random.choice(['t1', 't2', 't3', 't4']) for _ in range(n_samples)]\n",
"})\n",
"dframe = pd.DataFrame(\n",
" {\n",
" \"plate\": [random.choice([\"p1\", \"p2\", \"p3\"]) for _ in range(n_samples)],\n",
" \"well\": [\n",
" random.choice([\"w1\", \"w2\", \"w3\", \"w4\", \"w5\"]) for _ in range(n_samples)\n",
" ],\n",
" \"label\": [random.choice([\"t1\", \"t2\", \"t3\", \"t4\"]) for _ in range(n_samples)],\n",
" }\n",
")\n",
"dframe = dframe.drop_duplicates()\n",
"dframe = dframe.sort_values(by=['plate', 'well', 'label'])\n",
"dframe = dframe.sort_values(by=[\"plate\", \"well\", \"label\"])\n",
"dframe = dframe.reset_index(drop=True)"
]
},
Expand Down Expand Up @@ -85,9 +89,8 @@
}
],
"source": [
"\n",
"matcher = Matcher(dframe, ['plate', 'well', 'label'], seed=0)\n",
"pairs_dict = matcher.get_all_pairs(sameby=['label'], diffby=['plate', 'well'])\n",
"matcher = Matcher(dframe, [\"plate\", \"well\", \"label\"], seed=0)\n",
"pairs_dict = matcher.get_all_pairs(sameby=[\"label\"], diffby=[\"plate\", \"well\"])\n",
"pairs_dict"
]
},
Expand Down Expand Up @@ -225,7 +228,7 @@
}
],
"source": [
"dframe_multi = dframe.groupby(['plate', 'well'])['label'].unique().reset_index()\n",
"dframe_multi = dframe.groupby([\"plate\", \"well\"])[\"label\"].unique().reset_index()\n",
"dframe_multi"
]
},
Expand All @@ -235,12 +238,10 @@
"metadata": {},
"outputs": [],
"source": [
"matcher_multi = MatcherMultilabel(dframe_multi,\n",
" columns=['plate', 'well', 'label'],\n",
" multilabel_col='label',\n",
" seed=0)\n",
"pairs_multi = matcher_multi.get_all_pairs(sameby=['label'],\n",
" diffby=['plate', 'well'])"
"matcher_multi = MatcherMultilabel(\n",
" dframe_multi, columns=[\"plate\", \"well\", \"label\"], multilabel_col=\"label\", seed=0\n",
")\n",
"pairs_multi = matcher_multi.get_all_pairs(sameby=[\"label\"], diffby=[\"plate\", \"well\"])"
]
},
{
Expand Down
83 changes: 60 additions & 23 deletions examples/mAP_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"from copairs import map\n"
"from copairs import map"
]
},
{
Expand Down Expand Up @@ -559,7 +559,7 @@
"url = f\"https://media.githubusercontent.com/media/broadinstitute/lincs-cell-painting/{commit}/profiles/2016_04_01_a549_48hr_batch1/{plate}/{plate}_normalized_feature_select.csv.gz\"\n",
"\n",
"df = pd.read_csv(url)\n",
"df = df.loc[:, df.nunique() > 1] # remove constant columns\n",
"df = df.loc[:, df.nunique() > 1] # remove constant columns\n",
"df"
]
},
Expand Down Expand Up @@ -1104,7 +1104,9 @@
"# make index equal to -1 for all DMSO treatment replicates\n",
"df_activity.loc[df[\"Metadata_broad_sample\"] == \"DMSO\", \"Metadata_treatment_index\"] = -1\n",
"# now all treatment replicates differ in the index column, except for DMSO replicates\n",
"df_activity.insert(0, \"Metadata_treatment_index\", df_activity.pop(\"Metadata_treatment_index\"))\n",
"df_activity.insert(\n",
" 0, \"Metadata_treatment_index\", df_activity.pop(\"Metadata_treatment_index\")\n",
")\n",
"df_activity"
]
},
Expand Down Expand Up @@ -1134,7 +1136,7 @@
"pos_diffby = []\n",
"\n",
"neg_sameby = []\n",
"# negative pairs are replicates of different treatments \n",
"# negative pairs are replicates of different treatments\n",
"neg_diffby = [\"Metadata_broad_sample\", \"Metadata_treatment_index\"]"
]
},
Expand Down Expand Up @@ -1549,8 +1551,10 @@
"metadata = df_activity.filter(regex=\"^Metadata\")\n",
"profiles = df_activity.filter(regex=\"^(?!Metadata)\").values\n",
"\n",
"replicate_aps = map.average_precision(metadata, profiles, pos_sameby, pos_diffby, neg_sameby, neg_diffby)\n",
"replicate_aps = replicate_aps.query(\"Metadata_broad_sample != 'DMSO'\") # remove DMSO\n",
"replicate_aps = map.average_precision(\n",
" metadata, profiles, pos_sameby, pos_diffby, neg_sameby, neg_diffby\n",
")\n",
"replicate_aps = replicate_aps.query(\"Metadata_broad_sample != 'DMSO'\") # remove DMSO\n",
"replicate_aps"
]
},
Expand Down Expand Up @@ -1763,7 +1767,9 @@
}
],
"source": [
"replicate_maps = map.mean_average_precision(replicate_aps, pos_sameby, null_size=10000, threshold=0.05, seed=0)\n",
"replicate_maps = map.mean_average_precision(\n",
" replicate_aps, pos_sameby, null_size=10000, threshold=0.05, seed=0\n",
")\n",
"replicate_maps[\"-log10(p-value)\"] = -replicate_maps[\"corrected_p_value\"].apply(np.log10)\n",
"replicate_maps.head(10)"
]
Expand Down Expand Up @@ -1794,12 +1800,21 @@
"source": [
"active_ratio = replicate_maps.below_corrected_p.mean()\n",
"\n",
"plt.scatter(data=replicate_maps, x=\"mean_average_precision\", y=\"-log10(p-value)\", c=\"below_corrected_p\", cmap=\"tab10\", s=10)\n",
"# 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', \n",
"plt.scatter(\n",
" data=replicate_maps,\n",
" x=\"mean_average_precision\",\n",
" y=\"-log10(p-value)\",\n",
" c=\"below_corrected_p\",\n",
" cmap=\"tab10\",\n",
" s=10,\n",
")\n",
"# 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r',\n",
"plt.xlabel(\"mAP\")\n",
"plt.ylabel(\"-log10(p-value)\")\n",
"plt.axhline(-np.log10(0.05), color=\"black\", linestyle=\"--\")\n",
"plt.text(0.5, 1.5, f\"Phenotypically active = {100*active_ratio:.2f}%\", va=\"center\", ha=\"left\")\n",
"plt.text(\n",
" 0.5, 1.5, f\"Phenotypically active = {100*active_ratio:.2f}%\", va=\"center\", ha=\"left\"\n",
")\n",
"plt.show()"
]
},
Expand Down Expand Up @@ -2432,8 +2447,10 @@
"source": [
"# aggregate replicates by taking the median of each feature\n",
"feature_cols = [c for c in df_consistent.columns if not c.startswith(\"Metadata\")]\n",
"df_consistent = df_consistent.groupby([\"Metadata_broad_sample\", \"Metadata_target\"], as_index=False)[feature_cols].median()\n",
"df_consistent['Metadata_target'] = df_consistent['Metadata_target'].str.split('|')\n",
"df_consistent = df_consistent.groupby(\n",
" [\"Metadata_broad_sample\", \"Metadata_target\"], as_index=False\n",
")[feature_cols].median()\n",
"df_consistent[\"Metadata_target\"] = df_consistent[\"Metadata_target\"].str.split(\"|\")\n",
"df_consistent.head()"
]
},
Expand Down Expand Up @@ -2657,13 +2674,14 @@
"profiles = df_consistent.filter(regex=\"^(?!Metadata)\").values\n",
"\n",
"target_aps = map.multilabel.average_precision(\n",
" metadata,\n",
" profiles,\n",
" pos_sameby=pos_sameby,\n",
" pos_diffby=pos_diffby,\n",
" neg_sameby=neg_sameby,\n",
" neg_diffby=neg_diffby,\n",
" multilabel_col='Metadata_target')\n",
" metadata,\n",
" profiles,\n",
" pos_sameby=pos_sameby,\n",
" pos_diffby=pos_diffby,\n",
" neg_sameby=neg_sameby,\n",
" neg_diffby=neg_diffby,\n",
" multilabel_col=\"Metadata_target\",\n",
")\n",
"target_aps"
]
},
Expand Down Expand Up @@ -2874,7 +2892,9 @@
}
],
"source": [
"target_maps = map.mean_average_precision(target_aps, pos_sameby, null_size=10000, threshold=0.05, seed=0)\n",
"target_maps = map.mean_average_precision(\n",
" target_aps, pos_sameby, null_size=10000, threshold=0.05, seed=0\n",
")\n",
"target_maps[\"-log10(p-value)\"] = -target_maps[\"corrected_p_value\"].apply(np.log10)\n",
"target_maps.head(10)"
]
Expand Down Expand Up @@ -2905,11 +2925,24 @@
"source": [
"consistent_ratio = target_maps.below_corrected_p.mean()\n",
"\n",
"plt.scatter(data=target_maps, x=\"mean_average_precision\", y=\"-log10(p-value)\", c=\"below_corrected_p\", cmap=\"tab10\", s=10)\n",
"plt.scatter(\n",
" data=target_maps,\n",
" x=\"mean_average_precision\",\n",
" y=\"-log10(p-value)\",\n",
" c=\"below_corrected_p\",\n",
" cmap=\"tab10\",\n",
" s=10,\n",
")\n",
"plt.xlabel(\"mAP\")\n",
"plt.ylabel(\"-log10(p-value)\")\n",
"plt.axhline(-np.log10(0.05), color=\"black\", linestyle=\"--\")\n",
"plt.text(0.5, 1.5, f\"Phenotypically consistent = {100*consistent_ratio:.2f}%\", va=\"center\", ha=\"left\")\n",
"plt.text(\n",
" 0.5,\n",
" 1.5,\n",
" f\"Phenotypically consistent = {100*consistent_ratio:.2f}%\",\n",
" va=\"center\",\n",
" ha=\"left\",\n",
")\n",
"\n",
"plt.show()"
]
Expand Down Expand Up @@ -2939,7 +2972,11 @@
],
"source": [
"consistent_targets = target_maps.query(\"below_corrected_p\")[\"Metadata_target\"]\n",
"consistent_compounds = df_consistent[df_consistent[\"Metadata_target\"].apply(lambda x: any(t in x for t in consistent_targets))][\"Metadata_broad_sample\"]\n",
"consistent_compounds = df_consistent[\n",
" df_consistent[\"Metadata_target\"].apply(\n",
" lambda x: any(t in x for t in consistent_targets)\n",
" )\n",
"][\"Metadata_broad_sample\"]\n",
"\n",
"print(f\"Phenotypically consistent targets: {consistent_targets.str.cat(sep=', ')}\")\n",
"print(f\"Phenotypically consistent compounds: {consistent_compounds.str.cat(sep=', ')}\")"
Expand Down

0 comments on commit 7d47818

Please sign in to comment.