Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optional normalization for generic_cell_clustering.ipynb #1027

Merged
merged 9 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/ark/phenotyping/cell_som_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
cell_som_input_data, som_weights_name='cell_som_weights.feather',
xdim=10, ydim=10, lr_start=0.05, lr_end=0.01, num_passes=1, seed=42,
overwrite=False):
overwrite=False, normalize=True):
"""Run the SOM training on the expression columns specified in `cell_som_cluster_cols`.
Saves the SOM weights to `base_dir/som_weights_name`.
Expand Down Expand Up @@ -40,6 +40,8 @@ def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
The random seed to use for training the SOM
overwrite (bool):
If set, force retrains the SOM and overwrites the weights
normalize (bool):
Whether to perform 99.9% percentile normalization, default to True.
Returns:
cluster_helpers.CellSOMCluster:
Expand All @@ -62,7 +64,7 @@ def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
cell_pysom = cluster_helpers.CellSOMCluster(
cell_som_input_data, som_weights_path, fovs, cell_som_cluster_cols,
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end,
seed=seed
seed=seed, normalize=normalize
)

# train the SOM weights
Expand Down
8 changes: 6 additions & 2 deletions src/ark/phenotyping/cluster_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class CellSOMCluster(PixieSOMCluster):
def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
fovs: List[str], columns: List[str], num_passes: int = 1,
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01,
seed=42):
seed=42, normalize=True):
"""Creates a cell SOM cluster object derived from the abstract PixieSOMCluster
Args:
Expand All @@ -306,6 +306,9 @@ def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
The learning rate to decay to.
seed (int):
The random seed to use.
normalize (bool):
Whether to perform 99.9% percentile normalization, default to True.
"""
super().__init__(
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end, seed
Expand All @@ -323,7 +326,8 @@ def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
].reset_index(drop=True)

# since cell_data is the only dataset, we can just normalize it immediately
self.normalize_data()
if normalize:
self.normalize_data()

def normalize_data(self):
"""Normalizes `cell_data` by the 99.9% value of each pixel cluster count column
Expand Down
59 changes: 27 additions & 32 deletions templates/generic_cell_clustering.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
camisowers marked this conversation as resolved.
Show resolved Hide resolved
camisowers marked this conversation as resolved.
Show resolved Hide resolved
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand Down Expand Up @@ -39,7 +38,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand Down Expand Up @@ -67,7 +65,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -92,7 +89,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand All @@ -102,7 +98,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -132,7 +127,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -168,7 +162,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -193,7 +186,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -229,27 +221,46 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"**99.9% Normalization**\n",
"\n",
"Whether to normalize each of the `cell_some_cluster_cols` by their 99.9% value prior to training the data. Set to `False` to skip this normalization step."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"normalize_param"
]
},
"outputs": [],
"source": [
"normalize = True"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2: Cell clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.1: train cell SOM"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Train the cell SOM on the expression values provided per cell (the data stored in `cell_som_input_name`). Training is done using the `FlowSOM` algorithm. Note that each of the `cell_som_cluster_cols` are normalized by their 99.9% value prior to training.\n",
"Train the cell SOM on the expression values provided per cell (the data stored in `cell_som_input_name`). Training is done using the `FlowSOM` algorithm.\n",
"\n",
"For a full set of parameters you can customize for `train_cell_som`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.train_cell_som>cell training docs</a>."
]
Expand All @@ -272,20 +283,19 @@
" cell_som_cluster_cols,\n",
" cell_som_input_data,\n",
" som_weights_name=cell_som_weights_name,\n",
" num_passes=1\n",
" num_passes=1,\n",
" normalize=normalize\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2: assign cell SOM clusters"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -321,15 +331,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3: run cell consensus clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -344,7 +352,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -387,23 +394,20 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3: visualize results"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1: use the interactive reclustering results to relabel cell meta clusters"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -455,7 +459,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -491,7 +494,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -516,7 +518,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -574,7 +575,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -602,7 +602,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -639,15 +638,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.3: save consensus cluster labels to cell table"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -670,15 +667,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4.6: save the full results of Pixie cell clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -702,7 +697,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -750,7 +744,8 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"vscode": {
"interpreter": {
Expand Down
33 changes: 31 additions & 2 deletions tests/phenotyping/cell_som_clustering_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ def test_train_cell_som():
cluster_som_weights_names=cell_weights.columns.values
)

# assert the shape
# assert the shape and normalization
assert cell_weights.shape == (100, 15)
assert np.all(cell_weights < 1)

# remove cell weights and weighted channel average file for next test
os.remove(cell_pysom.weights_path)
Expand Down Expand Up @@ -124,8 +125,36 @@ def test_train_cell_som():
cluster_som_weights_names=cell_weights.columns.values
)

# assert the shape
# assert the shape and normalization
assert cell_weights.shape == (100, 2)
assert np.all(cell_weights < 1)

# remove cell weights and weighted channel average file for next test
os.remove(cell_pysom.weights_path)

# TEST 3: check data was not normalized
_, cluster_counts_norm = cell_cluster_utils.create_c2pc_data(
fovs, pixel_data_path, cell_table_path, 'pixel_som_cluster'
)

# train the cell SOM
cell_pysom = cell_som_clustering.train_cell_som(
fovs=fovs,
base_dir=temp_dir,
cell_table_path=cell_table_path,
cell_som_cluster_cols=['pixel_som_cluster_%d' % i for i in np.arange(15)],
cell_som_input_data=cluster_counts_norm, normalize=False
)

# assert cell weights has been created
assert os.path.exists(cell_pysom.weights_path)

# read in the cell weights
cell_weights = feather.read_dataframe(cell_pysom.weights_path)

# assert the shape and lack of normalization
assert cell_weights.shape == (100, 15)
assert not np.all(cell_weights < 1)


# NOTE: overwrite functionality tested in cluster_helpers_test.py
Expand Down
3 changes: 3 additions & 0 deletions tests/utils/notebooks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,9 @@ def test_cluster_prefix(self):
def test_cell_cluster_files(self):
self.tb.execute_cell("cell_cluster_files")

def test_normalization(self):
self.tb.execute_cell("normalize_param")

def test_train_cell_som(self):
self.tb.execute_cell("train_cell_som")

Expand Down
Loading