Skip to content

Commit

Permalink
Optional normalization for generic_cell_clustering.ipynb (#1027)
Browse files Browse the repository at this point in the history
* add normalize arg, default True

* test to check for no normalizing

* update notebook

* move arg init to section 1

* notebook test

* notebook typo

* notebook wording
  • Loading branch information
camisowers authored Aug 2, 2023
1 parent 7823f9d commit 94e6363
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 38 deletions.
6 changes: 4 additions & 2 deletions src/ark/phenotyping/cell_som_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
cell_som_input_data, som_weights_name='cell_som_weights.feather',
xdim=10, ydim=10, lr_start=0.05, lr_end=0.01, num_passes=1, seed=42,
overwrite=False):
overwrite=False, normalize=True):
"""Run the SOM training on the expression columns specified in `cell_som_cluster_cols`.
Saves the SOM weights to `base_dir/som_weights_name`.
Expand Down Expand Up @@ -40,6 +40,8 @@ def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
The random seed to use for training the SOM
overwrite (bool):
If set, force retrains the SOM and overwrites the weights
normalize (bool):
Whether to perform 99.9% percentile normalization, default to True.
Returns:
cluster_helpers.CellSOMCluster:
Expand All @@ -62,7 +64,7 @@ def train_cell_som(fovs, base_dir, cell_table_path, cell_som_cluster_cols,
cell_pysom = cluster_helpers.CellSOMCluster(
cell_som_input_data, som_weights_path, fovs, cell_som_cluster_cols,
num_passes=num_passes, xdim=xdim, ydim=ydim, lr_start=lr_start, lr_end=lr_end,
seed=seed
seed=seed, normalize=normalize
)

# train the SOM weights
Expand Down
8 changes: 6 additions & 2 deletions src/ark/phenotyping/cluster_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class CellSOMCluster(PixieSOMCluster):
def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
fovs: List[str], columns: List[str], num_passes: int = 1,
xdim: int = 10, ydim: int = 10, lr_start: float = 0.05, lr_end: float = 0.01,
seed=42):
seed=42, normalize=True):
"""Creates a cell SOM cluster object derived from the abstract PixieSOMCluster
Args:
Expand All @@ -306,6 +306,9 @@ def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
The learning rate to decay to.
seed (int):
The random seed to use.
normalize (bool):
Whether to perform 99.9% percentile normalization, default to True.
"""
super().__init__(
weights_path, columns, num_passes, xdim, ydim, lr_start, lr_end, seed
Expand All @@ -323,7 +326,8 @@ def __init__(self, cell_data: pd.DataFrame, weights_path: pathlib.Path,
].reset_index(drop=True)

# since cell_data is the only dataset, we can just normalize it immediately
self.normalize_data()
if normalize:
self.normalize_data()

def normalize_data(self):
"""Normalizes `cell_data` by the 99.9% value of each pixel cluster count column
Expand Down
59 changes: 27 additions & 32 deletions templates/generic_cell_clustering.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand Down Expand Up @@ -39,7 +38,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand Down Expand Up @@ -67,7 +65,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -92,7 +89,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"tags": []
Expand All @@ -102,7 +98,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -132,7 +127,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -168,7 +162,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -193,7 +186,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -229,27 +221,46 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"**99.9% Normalization**\n",
"\n",
"Whether to normalize each of the `cell_som_cluster_cols` by their 99.9% value prior to training FlowSOM. Set to `False` to skip this normalization step."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"normalize_param"
]
},
"outputs": [],
"source": [
"normalize = True"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2: Cell clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.1: train cell SOM"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Train the cell SOM on the expression values provided per cell (the data stored in `cell_som_input_name`). Training is done using the `FlowSOM` algorithm. Note that each of the `cell_som_cluster_cols` are normalized by their 99.9% value prior to training.\n",
"Train the cell SOM on the expression values provided per cell (the data stored in `cell_som_input_name`). Training is done using the `FlowSOM` algorithm.\n",
"\n",
"For a full set of parameters you can customize for `train_cell_som`, please consult: <a href=https://ark-analysis.readthedocs.io/en/latest/_markdown/ark.phenotyping.html#ark.phenotyping.cell_cluster_utils.train_cell_som>cell training docs</a>."
]
Expand All @@ -272,20 +283,19 @@
" cell_som_cluster_cols,\n",
" cell_som_input_data,\n",
" som_weights_name=cell_som_weights_name,\n",
" num_passes=1\n",
" num_passes=1,\n",
" normalize=normalize\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2: assign cell SOM clusters"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -321,15 +331,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3: run cell consensus clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -344,7 +352,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -387,23 +394,20 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3: visualize results"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1: use the interactive reclustering results to relabel cell meta clusters"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -455,7 +459,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -491,7 +494,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -516,7 +518,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -574,7 +575,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -602,7 +602,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -639,15 +638,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.3: save consensus cluster labels to cell table"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -670,15 +667,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4.6: save the full results of Pixie cell clustering"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -702,7 +697,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down Expand Up @@ -750,7 +744,8 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"vscode": {
"interpreter": {
Expand Down
33 changes: 31 additions & 2 deletions tests/phenotyping/cell_som_clustering_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ def test_train_cell_som():
cluster_som_weights_names=cell_weights.columns.values
)

# assert the shape
# assert the shape and normalization
assert cell_weights.shape == (100, 15)
assert np.all(cell_weights < 1)

# remove cell weights and weighted channel average file for next test
os.remove(cell_pysom.weights_path)
Expand Down Expand Up @@ -124,8 +125,36 @@ def test_train_cell_som():
cluster_som_weights_names=cell_weights.columns.values
)

# assert the shape
# assert the shape and normalization
assert cell_weights.shape == (100, 2)
assert np.all(cell_weights < 1)

# remove cell weights and weighted channel average file for next test
os.remove(cell_pysom.weights_path)

# TEST 3: check data was not normalized
_, cluster_counts_norm = cell_cluster_utils.create_c2pc_data(
fovs, pixel_data_path, cell_table_path, 'pixel_som_cluster'
)

# train the cell SOM
cell_pysom = cell_som_clustering.train_cell_som(
fovs=fovs,
base_dir=temp_dir,
cell_table_path=cell_table_path,
cell_som_cluster_cols=['pixel_som_cluster_%d' % i for i in np.arange(15)],
cell_som_input_data=cluster_counts_norm, normalize=False
)

# assert cell weights has been created
assert os.path.exists(cell_pysom.weights_path)

# read in the cell weights
cell_weights = feather.read_dataframe(cell_pysom.weights_path)

# assert the shape and lack of normalization
assert cell_weights.shape == (100, 15)
assert not np.all(cell_weights < 1)


# NOTE: overwrite functionality tested in cluster_helpers_test.py
Expand Down
3 changes: 3 additions & 0 deletions tests/utils/notebooks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,9 @@ def test_cluster_prefix(self):
def test_cell_cluster_files(self):
self.tb.execute_cell("cell_cluster_files")

def test_normalization(self):
self.tb.execute_cell("normalize_param")

def test_train_cell_som(self):
self.tb.execute_cell("train_cell_som")

Expand Down

0 comments on commit 94e6363

Please sign in to comment.