Merge pull request #1890 from 4dn-dcic/ajs_ip_exp_cat_calcprop_upd

Modified the ExperimentMic experiment categorizor calc prop
4dn-dcic · May 24, 2024 · 23f8c30 · 23f8c30
2 parents 2a17541 + 165bfa0
commit 23f8c30
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 20 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,14 @@ fourfront
 Change Log
 ----------
 
+
+7.5.3
+=====
+
+`calcprop update  <https://github.com/4dn-dcic/fourfront/pull/1890>`_
+
+* Updated experiment_categorizer calcprop for ExperimentMic to deal with many targets in imaging paths
+
 7.5.2
 =====
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 # Note: Various modules refer to this system as "encoded", not "fourfront".
 name = "encoded"
-version = "7.5.2"
+version = "7.5.3"
 description = "4DN-DCIC Fourfront"
 authors = ["4DN-DCIC Team <[email protected]>"]
 license = "MIT"

diff --git a/src/encoded/tests/test_types_experiment.py b/src/encoded/tests/test_types_experiment.py
@@ -676,6 +676,22 @@ def basic_info(lab, award):
     }
 
 
+@pytest.fixture
+def list_of_region_biofeatures(testapp, basic_info, region_term):
+    item = {'description': 'Test Region Biofeature',
+            'feature_type': region_term['@id']}
+    item.update(basic_info)
+    feats = []
+    for i in range(10):
+        item['preferred_label'] = f'genomic region {i + 1}'
+        feats.append(testapp.post_json('/bio_feature', item).json['@graph'][0])
+    return feats
+
+@pytest.fixture
+def list_of_3_reg_biofeatures(list_of_region_biofeatures):
+    return list_of_region_biofeatures[:3]
+
+
 @pytest.fixture
 def imaging_path_1(testapp, basic_info, genomic_region_bio_feature):
     basic_info['target'] = [genomic_region_bio_feature['@id']]
@@ -697,12 +713,61 @@ def imaging_path_3(testapp, basic_info, basic_region_bio_feature):
     return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]
 
 
+@pytest.fixture
+def imaging_path_4(testapp, basic_info, list_of_region_biofeatures):
+    basic_info['target'] = [bf.get('@id') for bf in list_of_region_biofeatures]
+    basic_info['labeled_probe'] = 'DAPI'
+    return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]
+
+
+@pytest.fixture
+def imaging_path_5(testapp, basic_info, list_of_3_reg_biofeatures):
+    basic_info['target'] = [bf.get('@id') for bf in list_of_3_reg_biofeatures]
+    basic_info['labeled_probe'] = 'DAPI'
+    return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]
+
+
+@pytest.fixture
+def imaging_path_6(testapp, basic_info, prot_bio_feature):
+    basic_info['target'] = [prot_bio_feature['@id']]
+    basic_info['labeled_probe'] = 'DAPI'
+    return testapp.post_json('/imaging_path', basic_info).json['@graph'][0]
+
+
 @pytest.fixture
 def microscopy_no_path(testapp, repliseq_info, exp_types):
     repliseq_info['experiment_type'] = exp_types['fish']['@id']
     return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]
 
 
+@pytest.fixture
+def microscopy_w_path_w_many_targets(testapp, repliseq_info, imaging_path_4, exp_types):
+    repliseq_info['experiment_type'] = exp_types['fish']['@id']
+    img_path = {'path': imaging_path_4['@id'], 'channel': 'ch01'}
+    repliseq_info['imaging_paths'] = [img_path]
+    return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]
+
+
+@pytest.fixture
+def microscopy_w_path_w_many_targets_and_split_path(testapp, repliseq_info, imaging_path_4,
+                                                    imaging_path_2, imaging_path_6, exp_types):
+    repliseq_info['experiment_type'] = exp_types['fish']['@id']
+    img_path1 = {'path': imaging_path_4['@id'], 'channel': 'ch01'}
+    img_path2 = {'path': imaging_path_2['@id'], 'channel': 'ch02'}
+    img_path3 = {'path': imaging_path_6['@id'], 'channel': 'ch03'}
+    repliseq_info['imaging_paths'] = [img_path1, img_path2, img_path3]
+    return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]
+
+@pytest.fixture
+def microscopy_w_path_w_few_targets_and_split_path(testapp, repliseq_info, imaging_path_5,
+                                                    imaging_path_6, exp_types):
+    repliseq_info['experiment_type'] = exp_types['fish']['@id']
+    img_path1 = {'path': imaging_path_5['@id'], 'channel': 'ch01'}
+    img_path2 = {'path': imaging_path_6['@id'], 'channel': 'ch02'}
+    repliseq_info['imaging_paths'] = [img_path1, img_path2]
+    return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0]
+
+
 @pytest.fixture
 def microscopy_w_path(testapp, repliseq_info, imaging_path_1, exp_types):
     repliseq_info['experiment_type'] = exp_types['fish']['@id']
@@ -780,6 +845,30 @@ def test_experiment_categorizer_4_mic_w_multi_path(testapp, microscopy_w_multipa
         assert v in value
 
 
+def test_experiment_categorizer_4_mic_w_path_w_many_targets(testapp, microscopy_w_path_w_many_targets):
+    assert microscopy_w_path_w_many_targets['experiment_categorizer']['field'] == 'Target'
+    assert microscopy_w_path_w_many_targets['experiment_categorizer']['value'] == '10 genomic regions'
+
+
+def test_experiment_categorizer_4_mic_w_path_w_many_targets_and_split_path(testapp, microscopy_w_path_w_many_targets_and_split_path,
+                                                                           prot_bio_feature):
+    assert microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['field'] == 'Target'
+    value = microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['value']
+    assert '11 genomic regions' in value
+    assert prot_bio_feature.get('display_title') in value
+
+
+def test_experiment_categorizer_4_mic_w_paths_w_fewer_targets(testapp, microscopy_w_path_w_few_targets_and_split_path,
+                                                              list_of_3_reg_biofeatures, prot_bio_feature):
+    # import pdb; pdb.set_trace()
+    assert microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['field'] == 'Target'
+    value = microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['value']
+    for bf in list_of_3_reg_biofeatures:
+        assert bf.get('display_title') in value
+    assert prot_bio_feature.get('display_title') in value
+
+
+
 def test_experiment_categorizer_4_mic_w_split_path(testapp, microscopy_w_splitpath):
     '''Sometimes a (group of) target(s) is split into different imaging paths,
     e.g. due to multiplexing. Sum the split targets and return only one string.'''

diff --git a/src/encoded/types/experiment.py b/src/encoded/types/experiment.py
@@ -1053,32 +1053,42 @@ def display_title(self, request, experiment_type, biosample):
         return self.add_accession_to_title(self.experiment_summary(request, experiment_type, biosample))
 
     @calculated_property(schema=EXP_CATEGORIZER_SCHEMA)
-    def experiment_categorizer(self, request, experiment_type, biosample, imaging_paths=None):
+    def experiment_categorizer(self, request, experiment_type, imaging_paths=None):
         ''' Use the target(s) in the imaging path'''
         if imaging_paths:
-            unique_targets = []
-            path_targets = []
+            path_targets_by_type = {}
             for pathobj in imaging_paths:
                 path = get_item_or_none(request, pathobj['path'], 'imaging_path')
                 for target in path.get('target', []):
                     biofeature = get_item_or_none(request, target, 'bio_feature')
-                    if biofeature['@id'] not in unique_targets:
-                        unique_targets.append(biofeature['@id'])
-                        path_targets.append(biofeature['display_title'])
-            if path_targets:
+                    if biofeature:
+                        ftype = biofeature.get('feature_type')
+                        atid = biofeature.get('@id')
+                        path_targets_by_type.setdefault(ftype, {}).setdefault(atid, '')
+                        path_targets_by_type[ftype][atid] = biofeature.get('display_title')
+            if path_targets_by_type:
                 value = []
-                sum_targets = {}
-                for target in path_targets:
-                    # check if target starts with numbers, e.g. '50 TADs', '40 TADs'
-                    # sum them if there are more: '90 TADs'
-                    split_target = re.split(r'(^[0-9]+)', target, maxsplit=1)
-                    if len(split_target) > 1:
-                        t_num, t_name = split_target[1:3]
-                        sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num)
-                    elif target not in value:
-                        value.append(target)
-                if sum_targets:
-                    value = [str(n) + t for t, n in sum_targets.items()] + value
+                for feat_type, targets in path_targets_by_type.items():
+                    if len(targets) > 5:
+                        ftype_term = get_item_or_none(request, feat_type, 'ontology_term')
+                        ftype_str = ftype_term.get('display_title')
+                        if ftype_str == 'region':
+                            ftype_str = 'genomic region'
+                        ftype_str = f"{ftype_str.replace('_', ' ')}s"
+                        value.append(f"{len(targets)} {ftype_str}")
+                    else:
+                        sum_targets = {}
+                        for tname in targets.values():
+                            # check if target starts with numbers, e.g. '50 TADs', '40 TADs'
+                            # sum them if there are more: '90 TADs'
+                            split_target = re.split(r'(^[0-9]+)', tname, maxsplit=1)
+                            if len(split_target) > 1:
+                                t_num, t_name = split_target[1:3]
+                                sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num)
+                            elif tname not in value:
+                                value.append(tname)
+                        if sum_targets:
+                            value = [str(n) + t for t, n in sum_targets.items()] + value
                 value = ', '.join(value)
                 return {
                     'field': 'Target',