diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cb8b271789..cd3f94c33d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,14 @@ fourfront Change Log ---------- + +7.5.3 +===== + +`calcprop update `_ + +* Updated experiment_categorizer calcprop for ExperimentMic to deal with many targets in imaging paths + 7.5.2 ===== diff --git a/pyproject.toml b/pyproject.toml index fa3df8b339..77643fb3b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] # Note: Various modules refer to this system as "encoded", not "fourfront". name = "encoded" -version = "7.5.2" +version = "7.5.3" description = "4DN-DCIC Fourfront" authors = ["4DN-DCIC Team "] license = "MIT" diff --git a/src/encoded/tests/test_types_experiment.py b/src/encoded/tests/test_types_experiment.py index b7fccf4756..c4fb6a8563 100644 --- a/src/encoded/tests/test_types_experiment.py +++ b/src/encoded/tests/test_types_experiment.py @@ -676,6 +676,22 @@ def basic_info(lab, award): } +@pytest.fixture +def list_of_region_biofeatures(testapp, basic_info, region_term): + item = {'description': 'Test Region Biofeature', + 'feature_type': region_term['@id']} + item.update(basic_info) + feats = [] + for i in range(10): + item['preferred_label'] = f'genomic region {i + 1}' + feats.append(testapp.post_json('/bio_feature', item).json['@graph'][0]) + return feats + +@pytest.fixture +def list_of_3_reg_biofeatures(list_of_region_biofeatures): + return list_of_region_biofeatures[:3] + + @pytest.fixture def imaging_path_1(testapp, basic_info, genomic_region_bio_feature): basic_info['target'] = [genomic_region_bio_feature['@id']] @@ -697,12 +713,61 @@ def imaging_path_3(testapp, basic_info, basic_region_bio_feature): return testapp.post_json('/imaging_path', basic_info).json['@graph'][0] +@pytest.fixture +def imaging_path_4(testapp, basic_info, list_of_region_biofeatures): + basic_info['target'] = [bf.get('@id') for bf in list_of_region_biofeatures] + basic_info['labeled_probe'] = 'DAPI' + return testapp.post_json('/imaging_path', basic_info).json['@graph'][0] + + +@pytest.fixture +def imaging_path_5(testapp, basic_info, list_of_3_reg_biofeatures): + basic_info['target'] = [bf.get('@id') for bf in list_of_3_reg_biofeatures] + basic_info['labeled_probe'] = 'DAPI' + return testapp.post_json('/imaging_path', basic_info).json['@graph'][0] + + +@pytest.fixture +def imaging_path_6(testapp, basic_info, prot_bio_feature): + basic_info['target'] = [prot_bio_feature['@id']] + basic_info['labeled_probe'] = 'DAPI' + return testapp.post_json('/imaging_path', basic_info).json['@graph'][0] + + @pytest.fixture def microscopy_no_path(testapp, repliseq_info, exp_types): repliseq_info['experiment_type'] = exp_types['fish']['@id'] return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0] +@pytest.fixture +def microscopy_w_path_w_many_targets(testapp, repliseq_info, imaging_path_4, exp_types): + repliseq_info['experiment_type'] = exp_types['fish']['@id'] + img_path = {'path': imaging_path_4['@id'], 'channel': 'ch01'} + repliseq_info['imaging_paths'] = [img_path] + return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0] + + +@pytest.fixture +def microscopy_w_path_w_many_targets_and_split_path(testapp, repliseq_info, imaging_path_4, + imaging_path_2, imaging_path_6, exp_types): + repliseq_info['experiment_type'] = exp_types['fish']['@id'] + img_path1 = {'path': imaging_path_4['@id'], 'channel': 'ch01'} + img_path2 = {'path': imaging_path_2['@id'], 'channel': 'ch02'} + img_path3 = {'path': imaging_path_6['@id'], 'channel': 'ch03'} + repliseq_info['imaging_paths'] = [img_path1, img_path2, img_path3] + return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0] + +@pytest.fixture +def microscopy_w_path_w_few_targets_and_split_path(testapp, repliseq_info, imaging_path_5, + imaging_path_6, exp_types): + repliseq_info['experiment_type'] = exp_types['fish']['@id'] + img_path1 = {'path': imaging_path_5['@id'], 'channel': 'ch01'} + img_path2 = {'path': imaging_path_6['@id'], 'channel': 'ch02'} + repliseq_info['imaging_paths'] = [img_path1, img_path2] + return testapp.post_json('/experiment_mic', repliseq_info).json['@graph'][0] + + @pytest.fixture def microscopy_w_path(testapp, repliseq_info, imaging_path_1, exp_types): repliseq_info['experiment_type'] = exp_types['fish']['@id'] @@ -780,6 +845,30 @@ def test_experiment_categorizer_4_mic_w_multi_path(testapp, microscopy_w_multipa assert v in value +def test_experiment_categorizer_4_mic_w_path_w_many_targets(testapp, microscopy_w_path_w_many_targets): + assert microscopy_w_path_w_many_targets['experiment_categorizer']['field'] == 'Target' + assert microscopy_w_path_w_many_targets['experiment_categorizer']['value'] == '10 genomic regions' + + +def test_experiment_categorizer_4_mic_w_path_w_many_targets_and_split_path(testapp, microscopy_w_path_w_many_targets_and_split_path, + prot_bio_feature): + assert microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['field'] == 'Target' + value = microscopy_w_path_w_many_targets_and_split_path['experiment_categorizer']['value'] + assert '11 genomic regions' in value + assert prot_bio_feature.get('display_title') in value + + +def test_experiment_categorizer_4_mic_w_paths_w_fewer_targets(testapp, microscopy_w_path_w_few_targets_and_split_path, + list_of_3_reg_biofeatures, prot_bio_feature): + # import pdb; pdb.set_trace() + assert microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['field'] == 'Target' + value = microscopy_w_path_w_few_targets_and_split_path['experiment_categorizer']['value'] + for bf in list_of_3_reg_biofeatures: + assert bf.get('display_title') in value + assert prot_bio_feature.get('display_title') in value + + + def test_experiment_categorizer_4_mic_w_split_path(testapp, microscopy_w_splitpath): '''Sometimes a (group of) target(s) is split into different imaging paths, e.g. due to multiplexing. Sum the split targets and return only one string.''' diff --git a/src/encoded/types/experiment.py b/src/encoded/types/experiment.py index 52452da563..de8cc8d0a3 100644 --- a/src/encoded/types/experiment.py +++ b/src/encoded/types/experiment.py @@ -1053,32 +1053,42 @@ def display_title(self, request, experiment_type, biosample): return self.add_accession_to_title(self.experiment_summary(request, experiment_type, biosample)) @calculated_property(schema=EXP_CATEGORIZER_SCHEMA) - def experiment_categorizer(self, request, experiment_type, biosample, imaging_paths=None): + def experiment_categorizer(self, request, experiment_type, imaging_paths=None): ''' Use the target(s) in the imaging path''' if imaging_paths: - unique_targets = [] - path_targets = [] + path_targets_by_type = {} for pathobj in imaging_paths: path = get_item_or_none(request, pathobj['path'], 'imaging_path') for target in path.get('target', []): biofeature = get_item_or_none(request, target, 'bio_feature') - if biofeature['@id'] not in unique_targets: - unique_targets.append(biofeature['@id']) - path_targets.append(biofeature['display_title']) - if path_targets: + if biofeature: + ftype = biofeature.get('feature_type') + atid = biofeature.get('@id') + path_targets_by_type.setdefault(ftype, {}).setdefault(atid, '') + path_targets_by_type[ftype][atid] = biofeature.get('display_title') + if path_targets_by_type: value = [] - sum_targets = {} - for target in path_targets: - # check if target starts with numbers, e.g. '50 TADs', '40 TADs' - # sum them if there are more: '90 TADs' - split_target = re.split(r'(^[0-9]+)', target, maxsplit=1) - if len(split_target) > 1: - t_num, t_name = split_target[1:3] - sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num) - elif target not in value: - value.append(target) - if sum_targets: - value = [str(n) + t for t, n in sum_targets.items()] + value + for feat_type, targets in path_targets_by_type.items(): + if len(targets) > 5: + ftype_term = get_item_or_none(request, feat_type, 'ontology_term') + ftype_str = ftype_term.get('display_title') + if ftype_str == 'region': + ftype_str = 'genomic region' + ftype_str = f"{ftype_str.replace('_', ' ')}s" + value.append(f"{len(targets)} {ftype_str}") + else: + sum_targets = {} + for tname in targets.values(): + # check if target starts with numbers, e.g. '50 TADs', '40 TADs' + # sum them if there are more: '90 TADs' + split_target = re.split(r'(^[0-9]+)', tname, maxsplit=1) + if len(split_target) > 1: + t_num, t_name = split_target[1:3] + sum_targets[t_name] = sum_targets.setdefault(t_name, 0) + int(t_num) + elif tname not in value: + value.append(tname) + if sum_targets: + value = [str(n) + t for t, n in sum_targets.items()] + value value = ', '.join(value) return { 'field': 'Target',