-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Austenem/CAT-960 Fix EPIC redirects #3583
Changes from 6 commits
ac10bce
0e42e62
1c9c285
8cb9cc1
c8de748
2c6124c
79b2074
6371c99
83a1760
9334d2d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
- Update support entity and processed/component dataset redirects to go only to the appropriate primary datasets. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
import pytest | ||
from unittest.mock import Mock | ||
from .utils import find_earliest_dataset_ancestor | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"initial_uuid, client_responses, expected_result", | ||
[ | ||
# Dataset with no ancestors | ||
( | ||
"uuid_initial", | ||
[ | ||
[{ | ||
"hubmap_id": "initial_id", | ||
"uuid": "uuid_initial", | ||
"immediate_ancestors": [], | ||
"entity_type": "Dataset" | ||
}] | ||
], | ||
"uuid_initial" | ||
), | ||
# Dataset with a non-dataset parent | ||
( | ||
"uuid_initial", | ||
[ | ||
[{ | ||
"hubmap_id": "initial_id", | ||
"uuid": "uuid_initial", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor", "entity_type": "Sample"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor_id", | ||
"uuid": "uuid_ancestor", | ||
"immediate_ancestors": [], | ||
"entity_type": "Sample" | ||
}] | ||
], | ||
"uuid_initial" | ||
), | ||
# Dataset with a dataset parent | ||
( | ||
"uuid_initial", | ||
[ | ||
[{ | ||
"hubmap_id": "initial_id", | ||
"uuid": "uuid_initial", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor_id", | ||
"uuid": "uuid_ancestor", | ||
"immediate_ancestors": [], | ||
"entity_type": "Dataset" | ||
}] | ||
], | ||
"uuid_ancestor" | ||
), | ||
# Dataset with a dataset grandparent | ||
( | ||
"uuid_initial", | ||
[ | ||
[{ | ||
"hubmap_id": "initial_id", | ||
"uuid": "uuid_initial", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor1", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor1_id", | ||
"uuid": "uuid_ancestor1", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor2", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor2_id", | ||
"uuid": "uuid_ancestor2", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor3", "entity_type": "Donor"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor3_id", | ||
"uuid": "uuid_ancestor3", | ||
"immediate_ancestors": [], | ||
"entity_type": "Donor" | ||
}] | ||
], | ||
"uuid_ancestor2" | ||
), | ||
# Dataset with a dataset great-grandparent | ||
( | ||
"uuid_initial", | ||
[ | ||
[{ | ||
"hubmap_id": "initial_id", | ||
"uuid": "uuid_initial", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor1", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor1_id", | ||
"uuid": "uuid_ancestor1", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor2", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor2_id", | ||
"uuid": "uuid_ancestor2", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor3", "entity_type": "Dataset"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor3_id", | ||
"uuid": "uuid_ancestor3", | ||
"immediate_ancestors": [{"uuid": "uuid_ancestor4", "entity_type": "Donor"}], | ||
"entity_type": "Dataset" | ||
}], | ||
[{ | ||
"hubmap_id": "ancestor4_id", | ||
"uuid": "uuid_ancestor4", | ||
"immediate_ancestors": [], | ||
"entity_type": "Donor" | ||
}] | ||
], | ||
"uuid_ancestor3" | ||
), | ||
|
||
] | ||
) | ||
def test_find_earliest_dataset_ancestor(initial_uuid, client_responses, expected_result): | ||
client = Mock() | ||
client.get_entities.side_effect = client_responses | ||
|
||
result = find_earliest_dataset_ancestor(client, initial_uuid) | ||
assert result == expected_result |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,3 +62,49 @@ def get_organs(): | |
dir_path = Path(dirname(__file__) + '/organ') | ||
organs = {p.stem: safe_load(p.read_text()) for p in dir_path.glob('*.yaml')} | ||
return organs | ||
|
||
|
||
# Redirect to primary dataset if this entity is | ||
# - non-existent | ||
# - a support entity (e.g. an image pyramid) | ||
# - a processed or component dataset | ||
def should_redirect_entity(entity): | ||
if not entity: | ||
return True | ||
|
||
actual_type = entity.get('entity_type').lower() | ||
is_support_type = actual_type == 'support' | ||
is_component = entity.get('is_component', False) is True | ||
is_not_raw_dataset = entity.get('processing') != 'raw' and actual_type == 'dataset' | ||
|
||
if is_support_type or is_component or is_not_raw_dataset: | ||
return True | ||
|
||
return False | ||
|
||
|
||
def find_earliest_dataset_ancestor(client, uuid): | ||
dataset = client.get_entities( | ||
'datasets', | ||
query_override={ | ||
"bool": { | ||
"must": { | ||
"term": { | ||
"uuid": uuid | ||
} | ||
} | ||
} | ||
}, | ||
non_metadata_fields=['hubmap_id', 'uuid', 'immediate_ancestors', 'entity_type'] | ||
) | ||
|
||
# If no dataset is found or it has no ancestors, return the current dataset UUID | ||
if not dataset or not dataset[0].get('immediate_ancestors'): | ||
return uuid | ||
|
||
# Traverse through immediate ancestors to find the earliest dataset ancestor | ||
for ancestor in dataset[0]['immediate_ancestors']: | ||
if ancestor.get('entity_type') == 'Dataset': | ||
uuid = find_earliest_dataset_ancestor(client, ancestor.get('uuid')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't depend on Can we use a query that returns the dataset which has |
||
|
||
return uuid |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need to call
should_redirect_entity
again?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was originally to check that the retrieved entity was not actually a processed/support dataset - with the updated query, this shouldn't be necessary.