Skip to content

Commit

Permalink
Fix: Invalid columns in compact manifest for AnVIL (#6110)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Oct 8, 2024
1 parent 3e36345 commit 6a77227
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 12 deletions.
16 changes: 15 additions & 1 deletion src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'document_id',
'source_datarepo_row_ids'
]
# Note that there is a brittle coupling that must be maintained between
# the keys here and those used in the custom field name lookup in
# `self.manifest_config`.
return {
'entity_id': 'entryId',
'bundles': {
Expand Down Expand Up @@ -277,9 +280,20 @@ def manifest_config(self) -> ManifestConfig:
# the field mapping. Keys are field paths in an ES hit, and values are
# the desired manifest column name, or None to omit the column from the
# manifest.
#
# Note that there is a brittle coupling that must be maintained between
# the keys here and those used in `self._field_mapping`. Also, the
# values (that aren't `None`) should match the related field's path in
# a response hit from the `/index/files` endpoint.
#
custom_field_names = {
('bundles', 'uuid'): 'bundles.bundle_uuid',
('bundles', 'version'): 'bundles.bundle_version',
('contents', 'activities', 'activity_table'): None,
('contents', 'files', 'uuid'): None,
('contents', 'files', 'version'): None,
('sources', 'id'): 'sources.source_id',
('sources', 'spec'): 'sources.source_spec',
}

def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath):
Expand All @@ -302,7 +316,7 @@ def recurse(mapping: MetadataPlugin._FieldMapping, path: FieldPath):
# The file URL is synthesized from the `uuid` and `version` fields.
# Above, we already configured these two fields to be omitted from the
# manifest since they are not informative to the user.
result[('contents', 'files')]['file_url'] = 'files.file_url'
result[('contents', 'files')]['file_url'] = 'files.azul_file_url'
return result

def verbatim_pfb_schema(self,
Expand Down
3 changes: 3 additions & 0 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ def choose_entry(_term):
def _make_hit(self, es_hit: JSON) -> MutableJSON:
return {
'entryId': es_hit['entity_id'],
# Note that there is a brittle coupling that must be maintained
# between the `sources` and `bundles` field paths and the values in
# the custom field name lookup in `Plugin.manifest_config`.
'sources': list(map(self._make_source, es_hit['sources'])),
'bundles': list(map(self._make_bundle, es_hit['bundles'])),
**self._make_contents(es_hit['contents'])
Expand Down
16 changes: 5 additions & 11 deletions test/service/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,25 +1716,25 @@ def test_compact_manifest(self):
self.assertEqual(200, response.status_code)
expected = [
(
'bundle_uuid',
'bundles.bundle_uuid',
'6b0f6c0f-5d80-a242-accb-840921351cd5',
'826dea02-e274-affe-aabc-eb3db63ad068',
'826dea02-e274-affe-aabc-eb3db63ad068'
),
(
'bundle_version',
'bundles.bundle_version',
'2022-06-01T00:00:00.000000Z',
'2022-06-01T00:00:00.000000Z',
'2022-06-01T00:00:00.000000Z'
),
(
'source_id',
'sources.source_id',
'6c87f0e1-509d-46a4-b845-7584df39263b',
'6c87f0e1-509d-46a4-b845-7584df39263b',
'6c87f0e1-509d-46a4-b845-7584df39263b'
),
(
'source_spec',
'sources.source_spec',
'tdr:bigquery:gcp:test_anvil_project:anvil_snapshot:/2',
'tdr:bigquery:gcp:test_anvil_project:anvil_snapshot:/2',
'tdr:bigquery:gcp:test_anvil_project:anvil_snapshot:/2'
Expand Down Expand Up @@ -1973,12 +1973,6 @@ def test_compact_manifest(self):
'18b3be87-e26b-4376-0d8d-c1e370e90e07',
'a60c5138-3749-f7cb-8714-52d389ad5231'
),
(
'activities.activity_table',
'',
'sequencingactivity',
'sequencingactivity'
),
(
'activities.activity_type',
'',
Expand Down Expand Up @@ -2082,7 +2076,7 @@ def test_compact_manifest(self):
self._drs_uri('v1_6c87f0e1-509d-46a4-b845-7584df39263b_8b722e88-8103-49c1-b351-e64fa7c6ab37')
),
(
'files.file_url',
'files.azul_file_url',
self._file_url('6b0f6c0f-5d80-4242-accb-840921351cd5', self.version),
self._file_url('15b76f9c-6b46-433f-851d-34e89f1b9ba6', self.version),
self._file_url('3b17377b-16b1-431c-9967-e5d01fc5923f', self.version)
Expand Down

0 comments on commit 6a77227

Please sign in to comment.