hubmapconsortium · austenem · Oct 29, 2024 · Oct 25, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/CHANGELOG-fix-epic-redirects.md b/CHANGELOG-fix-epic-redirects.md
@@ -0,0 +1 @@
+- Update support entity and processed/component dataset redirects to go only to the appropriate primary datasets.
diff --git a/context/app/routes_browse.py b/context/app/routes_browse.py
@@ -7,7 +7,8 @@
 
 from .utils import (
     get_default_flask_data, make_blueprint, get_client,
-    get_url_base_from_request, entity_types)
+    get_url_base_from_request, entity_types, find_earliest_dataset_ancestor,
+    should_redirect_entity)
 
 
 blueprint = make_blueprint(__name__)
@@ -48,39 +49,39 @@ def details(type, uuid):
     entity = client.get_entity(uuid)
     actual_type = entity['entity_type'].lower()
 
-    # Redirect to primary dataset if this is
-    # - a support entity (e.g. an image pyramid)
-    # - a processed or component dataset
-    is_support = actual_type == 'support'
-    is_processed = entity.get('processing') != 'raw' and actual_type == 'dataset'
-    is_component = entity.get('is_component', False) is True
-    if (is_support or is_processed or is_component):
-        supported_entity = client.get_entities(
+    if (should_redirect_entity(entity)):
+        earliest_uuid = find_earliest_dataset_ancestor(client, uuid)
+        earliest_dataset = client.get_entities(
             'datasets',
             query_override={
                 "bool": {
                     "must": {
-                        "terms": {
-                            "descendant_ids": [uuid]
+                        "term": {
+                            "uuid": earliest_uuid
                         }
                     }
                 }
             },
-            non_metadata_fields=['hubmap_id', 'uuid']
+            non_metadata_fields=['uuid', 'processing', 'entity_type', 'is_component']
         )
 
         pipeline_anchor = entity.get('pipeline', entity.get('hubmap_id')).replace(' ', '')
         anchor = quote(f'section-{pipeline_anchor}-{entity.get("status")}').lower()
 
-        if len(supported_entity) > 0:
-            return redirect(
-                url_for('routes_browse.details',
-                        type='dataset',
-                        uuid=supported_entity[0]['uuid'],
-                        _anchor=anchor,
-                        redirected=True,
-                        redirectedFromId=entity.get('hubmap_id'),
-                        redirectedFromPipeline=entity.get('pipeline')))
+        # Check whether the oldest found ancestor exists and is of an expected type. 404 is
+        # preferable to a page that shows only a support entity or processed/component dataset.
+        if not earliest_dataset or should_redirect_entity(earliest_dataset[0]):
+            abort(404)
+
+        # Redirect to the primary dataset
+        return redirect(
+            url_for('routes_browse.details',
+                    type='dataset',
+                    uuid=earliest_uuid,
+                    _anchor=anchor,
+                    redirected=True,
+                    redirectedFromId=entity.get('hubmap_id'),
+                    redirectedFromPipeline=entity.get('pipeline')))
 
     if type != actual_type:
         return redirect(url_for('routes_browse.details', type=actual_type, uuid=uuid))

diff --git a/context/app/test_utils.py b/context/app/test_utils.py
@@ -0,0 +1,136 @@
+import pytest
+from unittest.mock import Mock
+from .utils import find_earliest_dataset_ancestor
+
+
+@pytest.mark.parametrize(
+    "initial_uuid, client_responses, expected_result",
+    [
+        # Dataset with no ancestors
+        (
+            "uuid_initial",
+            [
+                [{
+                    "hubmap_id": "initial_id",
+                    "uuid": "uuid_initial",
+                    "immediate_ancestors": [],
+                    "entity_type": "Dataset"
+                }]
+            ],
+            "uuid_initial"
+        ),
+        # Dataset with a non-dataset parent
+        (
+            "uuid_initial",
+            [
+                [{
+                    "hubmap_id": "initial_id",
+                    "uuid": "uuid_initial",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor", "entity_type": "Sample"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor_id",
+                    "uuid": "uuid_ancestor",
+                    "immediate_ancestors": [],
+                    "entity_type": "Sample"
+                }]
+            ],
+            "uuid_initial"
+        ),
+        # Dataset with a dataset parent
+        (
+            "uuid_initial",
+            [
+                [{
+                    "hubmap_id": "initial_id",
+                    "uuid": "uuid_initial",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor_id",
+                    "uuid": "uuid_ancestor",
+                    "immediate_ancestors": [],
+                    "entity_type": "Dataset"
+                }]
+            ],
+            "uuid_ancestor"
+        ),
+        # Dataset with a dataset grandparent
+        (
+            "uuid_initial",
+            [
+                [{
+                    "hubmap_id": "initial_id",
+                    "uuid": "uuid_initial",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor1", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor1_id",
+                    "uuid": "uuid_ancestor1",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor2", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor2_id",
+                    "uuid": "uuid_ancestor2",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor3", "entity_type": "Donor"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor3_id",
+                    "uuid": "uuid_ancestor3",
+                    "immediate_ancestors": [],
+                    "entity_type": "Donor"
+                }]
+            ],
+            "uuid_ancestor2"
+        ),
+        # Dataset with a dataset great-grandparent
+        (
+            "uuid_initial",
+            [
+                [{
+                    "hubmap_id": "initial_id",
+                    "uuid": "uuid_initial",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor1", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor1_id",
+                    "uuid": "uuid_ancestor1",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor2", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor2_id",
+                    "uuid": "uuid_ancestor2",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor3", "entity_type": "Dataset"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor3_id",
+                    "uuid": "uuid_ancestor3",
+                    "immediate_ancestors": [{"uuid": "uuid_ancestor4", "entity_type": "Donor"}],
+                    "entity_type": "Dataset"
+                }],
+                [{
+                    "hubmap_id": "ancestor4_id",
+                    "uuid": "uuid_ancestor4",
+                    "immediate_ancestors": [],
+                    "entity_type": "Donor"
+                }]
+            ],
+            "uuid_ancestor3"
+        ),
+
+    ]
+)
+def test_find_earliest_dataset_ancestor(initial_uuid, client_responses, expected_result):
+    client = Mock()
+    client.get_entities.side_effect = client_responses
+
+    result = find_earliest_dataset_ancestor(client, initial_uuid)
+    assert result == expected_result
diff --git a/context/app/utils.py b/context/app/utils.py
@@ -62,3 +62,49 @@ def get_organs():
     dir_path = Path(dirname(__file__) + '/organ')
     organs = {p.stem: safe_load(p.read_text()) for p in dir_path.glob('*.yaml')}
     return organs
+
+
+# Redirect to primary dataset if this entity is
+# - non-existent
+# - a support entity (e.g. an image pyramid)
+# - a processed or component dataset
+def should_redirect_entity(entity):
+    if not entity:
+        return True
+
+    actual_type = entity.get('entity_type').lower()
+    is_support_type = actual_type == 'support'
+    is_component = entity.get('is_component', False) is True
+    is_not_raw_dataset = entity.get('processing') != 'raw' and actual_type == 'dataset'
+
+    if is_support_type or is_component or is_not_raw_dataset:
+        return True
+
+    return False
+
+
+def find_earliest_dataset_ancestor(client, uuid):
+    dataset = client.get_entities(
+        'datasets',
+        query_override={
+            "bool": {
+                "must": {
+                    "term": {
+                        "uuid": uuid
+                    }
+                }
+            }
+        },
+        non_metadata_fields=['hubmap_id', 'uuid', 'immediate_ancestors', 'entity_type']
+    )
+
+    # If no dataset is found or it has no ancestors, return the current dataset UUID
+    if not dataset or not dataset[0].get('immediate_ancestors'):
+        return uuid
+
+    # Traverse through immediate ancestors to find the earliest dataset ancestor
+    for ancestor in dataset[0]['immediate_ancestors']:
+        if ancestor.get('entity_type') == 'Dataset':
+            uuid = find_earliest_dataset_ancestor(client, ancestor.get('uuid'))
+
+    return uuid