Skip to content

Commit

Permalink
Merge pull request #1890 from 4dn-dcic/ajs_ip_exp_cat_calcprop_upd
Browse files Browse the repository at this point in the history
Modified the ExperimentMic experiment categorizor calc prop
  • Loading branch information
aschroed authored May 24, 2024
2 parents 2a17541 + 165bfa0 commit 23f8c30
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 20 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ fourfront
Change Log
----------


7.5.3
=====

`calcprop update <https://github.com/4dn-dcic/fourfront/pull/1890>`_

* Updated experiment_categorizer calcprop for ExperimentMic to deal with many targets in imaging paths

7.5.2
=====

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
# Note: Various modules refer to this system as "encoded", not "fourfront".
name = "encoded"
version = "7.5.2"
version = "7.5.3"
description = "4DN-DCIC Fourfront"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down
89 changes: 89 additions & 0 deletions src/encoded/tests/test_types_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,22 @@ def basic_info(lab, award):
}


@pytest.fixture
def list_of_region_biofeatures(testapp, basic_info, region_term):
item = {'description': 'Test Region Biofeature',
'feature_type': region_term['@id']}
item.update(basic_info)
feats = []
for i in range(10):
item['preferred_label'] = f'genomic region {i + 1}'
feats.append(testapp.post_json('/bio_feature', item).json['@graph'][0])
return feats

@pytest.fixture
def list_of_3_reg_biofeatures(list_of_region_biofeatures):
return list_of_region_biofeatures[:3]


@pytest.fixture
def imaging_path_1(testapp, basic_info, genomic_region_bio_feature):
basic_info['target'] = [genomic_region_bio_feature['@id']]
Expand All @@ -697,12 +713,61 @@ def imaging_path_3(testapp, basic_info, basic_region_bio_feature):
return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]


@pytest.fixture
def imaging_path_4(testapp, basic_info, list_of_region_biofeatures):
basic_info['target'] = [bf.get('@id') for bf in list_of_region_biofeatures]
basic_info['labeled_probe'] = 'DAPI'
return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]


@pytest.fixture
def imaging_path_5(testapp, basic_info, list_of_3_reg_biofeatures):
basic_info['target'] = [bf.get('@id') for bf in list_of_3_reg_biofeatures]
basic_info['labeled_probe'] = 'DAPI'
return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]


@pytest.fixture
def imaging_path_6(testapp, basic_info, prot_bio_feature):
basic_info['target'] = [prot_bio_feature['@id']]
basic_info['labeled_probe'] = 'DAPI'
return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]


@pytest.fixture
def microscopy_no_path(testapp, repliseq_info, exp_types):
repliseq_info['experiment_type'] = exp_types['fish']['@id']
return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]


@pytest.fixture
def microscopy_w_path_w_many_targets(testapp, repliseq_info, imaging_path_4, exp_types):
repliseq_info['experiment_type'] = exp_types['fish']['@id']
img_path = {'path': imaging_path_4['@id'], 'channel': 'ch01'}
repliseq_info['imaging_paths'] = [img_path]
return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]


@pytest.fixture
def microscopy_w_path_w_many_targets_and_split_path(testapp, repliseq_info, imaging_path_4,
imaging_path_2, imaging_path_6, exp_types):
repliseq_info['experiment_type'] = exp_types['fish']['@id']
img_path1 = {'path': imaging_path_4['@id'], 'channel': 'ch01'}
img_path2 = {'path': imaging_path_2['@id'], 'channel': 'ch02'}
img_path3 = {'path': imaging_path_6['@id'], 'channel': 'ch03'}
repliseq_info['imaging_paths'] = [img_path1, img_path2, img_path3]
return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]

@pytest.fixture
def microscopy_w_path_w_few_targets_and_split_path(testapp, repliseq_info, imaging_path_5,
imaging_path_6, exp_types):
repliseq_info['experiment_type'] = exp_types['fish']['@id']
img_path1 = {'path': imaging_path_5['@id'], 'channel': 'ch01'}
img_path2 = {'path': imaging_path_6['@id'], 'channel': 'ch02'}
repliseq_info['imaging_paths'] = [img_path1, img_path2]
return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]


@pytest.fixture
def microscopy_w_path(testapp, repliseq_info, imaging_path_1, exp_types):
repliseq_info['experiment_type'] = exp_types['fish']['@id']
Expand Down Expand Up @@ -780,6 +845,30 @@ def test_experiment_categorizer_4_mic_w_multi_path(testapp, microscopy_w_multipa
assert v in value


def test_experiment_categorizer_4_mic_w_path_w_many_targets(testapp, microscopy_w_path_w_many_targets):
assert microscopy_w_path_w_many_targets['experiment_categorizer']['field'] == 'Target'
assert microscopy_w_path_w_many_targets['experiment_categorizer']['value'] == '10 genomic regions'


def test_experiment_categorizer_4_mic_w_path_w_many_targets_and_split_path(testapp, microscopy_w_path_w_many_targets_and_split_path,
prot_bio_feature):
assert microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['field'] == 'Target'
value = microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['value']
assert '11 genomic regions' in value
assert prot_bio_feature.get('display_title') in value


def test_experiment_categorizer_4_mic_w_paths_w_fewer_targets(testapp, microscopy_w_path_w_few_targets_and_split_path,
list_of_3_reg_biofeatures, prot_bio_feature):
# import pdb; pdb.set_trace()
assert microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['field'] == 'Target'
value = microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['value']
for bf in list_of_3_reg_biofeatures:
assert bf.get('display_title') in value
assert prot_bio_feature.get('display_title') in value



def test_experiment_categorizer_4_mic_w_split_path(testapp, microscopy_w_splitpath):
'''Sometimes a (group of) target(s) is split into different imaging paths,
e.g. due to multiplexing. Sum the split targets and return only one string.'''
Expand Down
48 changes: 29 additions & 19 deletions src/encoded/types/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1053,32 +1053,42 @@ def display_title(self, request, experiment_type, biosample):
return self.add_accession_to_title(self.experiment_summary(request, experiment_type, biosample))

@calculated_property(schema=EXP_CATEGORIZER_SCHEMA)
def experiment_categorizer(self, request, experiment_type, biosample, imaging_paths=None):
def experiment_categorizer(self, request, experiment_type, imaging_paths=None):
''' Use the target(s) in the imaging path'''
if imaging_paths:
unique_targets = []
path_targets = []
path_targets_by_type = {}
for pathobj in imaging_paths:
path = get_item_or_none(request, pathobj['path'], 'imaging_path')
for target in path.get('target', []):
biofeature = get_item_or_none(request, target, 'bio_feature')
if biofeature['@id'] not in unique_targets:
unique_targets.append(biofeature['@id'])
path_targets.append(biofeature['display_title'])
if path_targets:
if biofeature:
ftype = biofeature.get('feature_type')
atid = biofeature.get('@id')
path_targets_by_type.setdefault(ftype, {}).setdefault(atid, '')
path_targets_by_type[ftype][atid] = biofeature.get('display_title')
if path_targets_by_type:
value = []
sum_targets = {}
for target in path_targets:
# check if target starts with numbers, e.g. '50 TADs', '40 TADs'
# sum them if there are more: '90 TADs'
split_target = re.split(r'(^[0-9]+)', target, maxsplit=1)
if len(split_target) > 1:
t_num, t_name = split_target[1:3]
sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num)
elif target not in value:
value.append(target)
if sum_targets:
value = [str(n) + t for t, n in sum_targets.items()] + value
for feat_type, targets in path_targets_by_type.items():
if len(targets) > 5:
ftype_term = get_item_or_none(request, feat_type, 'ontology_term')
ftype_str = ftype_term.get('display_title')
if ftype_str == 'region':
ftype_str = 'genomic region'
ftype_str = f"{ftype_str.replace('_', ' ')}s"
value.append(f"{len(targets)} {ftype_str}")
else:
sum_targets = {}
for tname in targets.values():
# check if target starts with numbers, e.g. '50 TADs', '40 TADs'
# sum them if there are more: '90 TADs'
split_target = re.split(r'(^[0-9]+)', tname, maxsplit=1)
if len(split_target) > 1:
t_num, t_name = split_target[1:3]
sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num)
elif tname not in value:
value.append(tname)
if sum_targets:
value = [str(n) + t for t, n in sum_targets.items()] + value
value = ', '.join(value)
return {
'field': 'Target',
Expand Down

0 comments on commit 23f8c30

Please sign in to comment.