Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix DRS #1895

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
10 changes: 9 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ fourfront
Change Log
----------

8.2.0
=====

* Merged in master (2024-10-09)
* Update DRS API to return JSON always
* Update DRS download URLs to return direct downloads to Open Data where applicable


8.1.5
=====
Expand Down Expand Up @@ -65,7 +72,7 @@ Change Log
7.10.1
=====

* Bug fix to revert schema version of workflow.json back to 7
* Bug fix to revert schema version of workflow.json back to 7


7.10.0
Expand Down Expand Up @@ -116,6 +123,7 @@ Change Log
`Add ignored enum <https://github.com/4dn-dcic/fourfront/pull/1897>`_

* Add value to ignored_enum for file_type
>>>>>>> master


7.5.7
Expand Down
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,28 @@ configure: # does any pre-requisite installs
pip install setuptools
poetry config virtualenvs.create false --local # do not create a virtualenv - the user should have already done this -wrr 20-Sept-2021

check-awscli:
@if ! aws --version > /dev/null 2>&1; then \
echo "AWS CLI is not installed."; \
exit 0; \
else \
echo "AWS CLI is already installed. Exiting."; \
exit 1; \
fi

install-awscli: check-awscli # installs awscli v2 for use with credentialing
@echo "Installing AWS CLI v2..."
curl "https://awscli.amazonaws.com/AWSCLIV2.pkg" -o "AWSCLIV2.pkg"
sudo installer -pkg AWSCLIV2.pkg -target /usr/local/bin/
aws --version
rm AWSCLIV2.pkg

clear-aws:
@echo "unset AWS_ACCESS_KEY_ID" > ~/.clear_aws_env && \
echo "unset AWS_SECRET_ACCESS_KEY" >> ~/.clear_aws_env && \
echo "unset AWS_SESSION_TOKEN" >> ~/.clear_aws_env && \
echo "Run 'source ~/.clear_aws_env' to finish clearing"

build-poetry:
make configure
poetry install
Expand Down Expand Up @@ -111,6 +133,7 @@ kibana-stop:

kill: # kills back-end processes associated with the application. Use with care.
pkill -f postgres &
pkill -f opensearch &
pkill -f elasticsearch &
pkill -f moto_server &

Expand Down
2,925 changes: 1,617 additions & 1,308 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
# Note: Various modules refer to this system as "encoded", not "fourfront".
name = "encoded"
version = "8.1.5"
version = "8.1.5.1b1" # TODO: To become 8.2.0
description = "4DN-DCIC Fourfront"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down Expand Up @@ -38,8 +38,8 @@ classifiers = [
]

[tool.poetry.dependencies]

python = ">=3.9,<3.13"
awscli = ">=1.29.62"
boto3 = "^1.34.136"
botocore = "^1.34.136"
certifi = ">=2021.5.30"
Expand All @@ -49,7 +49,8 @@ colorama = "0.3.3"
# we get odd 'pyo3_runtime.PanicException: Python API call failed' error on import
# of cryptography.hazmat.bindings._rust in cryptography package. 2023-04-21.
cryptography = "39.0.2"
dcicsnovault = "^11.22.0"
# dcicsnovault = "11.21.1.0b5"
dcicsnovault = "11.22.0.1b1"
dcicutils = "^8.13.3"
elasticsearch = "7.13.4"
elasticsearch-dsl = "^7.0.0" # TODO: port code from cgap-portal to get rid of uses
Expand Down
59 changes: 39 additions & 20 deletions src/encoded/tests/test_file_drs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from unittest import mock


pytestmark = [pytest.mark.setone, pytest.mark.working]
Expand All @@ -17,7 +18,7 @@ def mcool_file_json(award, experiment, lab, file_formats):
'md5sum': '00000000000000000000000000000000',
'content_md5sum': '00000000000000000000000000000000',
'filename': 'my.cool.mcool',
'status': 'uploaded',
'status': 'released',
}
return item

Expand All @@ -32,45 +33,63 @@ def file(testapp, award, experiment, lab, file_formats):
'md5sum': '00000000000000000000000000000000',
'content_md5sum': '00000000000000000000000000000000',
'filename': 'my.fastq.gz',
'status': 'uploaded',
'status': 'released',
'accession': 'TSTFI2896250',
'uuid': '96115074-b6bd-4a1e-9564-14b708607e4c'
}
res = testapp.post_json('/file_fastq', item)
return res.json['@graph'][0]


def validate_drs_conversion(drs_obj, meta, uri=None):
""" Validates drs object structure against the metadata in the db """
assert drs_obj['id'] == meta['@id']
assert drs_obj['id'] == meta['accession']
assert drs_obj['created_time'] == meta['date_created']
assert drs_obj['drs_id'] == meta['accession']
assert drs_obj['self_uri'] == f'drs://localhost:80{meta["@id"]}@@drs' if not uri else uri
assert drs_obj['self_uri'] == f'drs://localhost:80/{meta["accession"]}' if not uri else uri
assert drs_obj['version'] == meta['md5sum']
assert drs_obj['name'] == meta['filename']
assert drs_obj['aliases'] == [meta['uuid']]
assert drs_obj['access_methods'][0]['access_id'] == 'https'


def test_processed_file_drs_view(testapp, mcool_file_json):
""" Tests that processed mcool gives a valid DRS response """
meta = testapp.post_json('/file_processed', mcool_file_json).json['@graph'][0]
drs_meta = testapp.get(meta['@id'] + '@@drs').json
validate_drs_conversion(drs_meta, meta)
drs_meta = testapp.get(f'{DRS_PREFIX}/{meta["uuid"]}').json
validate_drs_conversion(drs_meta, meta, uri=f'{DRS_PREFIX}/{meta["uuid"]}')
with mock.patch('encoded.types.file.File._head_s3', return_value=None):
meta = testapp.post_json('/file_processed', mcool_file_json).json['@graph'][0]
drs_meta = testapp.get(meta['@id'] + '@@drs').json
validate_drs_conversion(drs_meta, meta)
drs_meta = testapp.get(f'{DRS_PREFIX}/{meta["uuid"]}').json
validate_drs_conversion(drs_meta, meta, uri=f'{DRS_PREFIX}/{meta["uuid"]}')


def test_fastq_file_drs_view(testapp, file):
""" Tests that a fastq file has valid DRS response """
drs_meta = testapp.get(file['@id'] + '@@drs').json
validate_drs_conversion(drs_meta, file)
drs_meta = testapp.get(f'{DRS_PREFIX}/{file["uuid"]}').json
validate_drs_conversion(drs_meta, file, uri=f'{DRS_PREFIX}/{file["uuid"]}')
with mock.patch('encoded.types.file.File._head_s3', return_value=None):
drs_meta = testapp.get(file['@id'] + '@@drs').json
validate_drs_conversion(drs_meta, file)
drs_meta = testapp.get(f'{DRS_PREFIX}/{file["uuid"]}').json
validate_drs_conversion(drs_meta, file, uri=f'{DRS_PREFIX}/{file["uuid"]}')


def test_fastq_file_drs_access(testapp, file):
""" Tests that access URLs are retrieved successfully """
drs_meta = testapp.get(file['@id'] + '@@drs').json
drs_object_uri = drs_meta['drs_id']
drs_object_download = testapp.get(f'/ga4gh/drs/v1/objects/{drs_object_uri}/access/').json
assert drs_object_download == {
'url': f'https://localhost:80/{drs_object_uri}/@@download'
}
with mock.patch('encoded.types.file.File._head_s3', return_value=None):
drs_meta = testapp.get(file['@id'] + '@@drs').json
drs_object_uri = drs_meta['id']
drs_object_download = testapp.get(f'/ga4gh/drs/v1/objects/{drs_object_uri}/access/').json
assert drs_object_download == {
'url': f'https://4dn-open-data-public.s3.amazonaws.com/fourfront-webprod/wfoutput/'
f'96115074-b6bd-4a1e-9564-14b708607e4c/TSTFI2896250.fastq.gz'
}


def test_drs_always_returns_json(htmltestapp, file):
""" DRS is a JSON only API so should never not return html """
with mock.patch('encoded.types.file.File._head_s3', return_value=None):
drs_meta = htmltestapp.get(file['@id'] + '@@drs')
assert drs_meta.content_type == 'application/json'


def test_drs_without_open_data_returns_404(testapp, file):
""" Tests that without open_data_url mocked, """
testapp.get(file['@id'] + '@@drs', status=404)
22 changes: 6 additions & 16 deletions src/encoded/types/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,18 @@ def _award_viewing_group(award_uuid, root):

# Item acls
from ..acl import (
ALLOW_CURRENT_AND_SUBMITTER_EDIT_ACL,
ALLOW_CURRENT_AND_SUBMITTER_EDIT_ACL,
ALLOW_CURRENT_ACL,
AWARD_MEMBER_ROLE,
ALLOW_ANY_USER_ADD_ACL,
ALLOW_EVERYONE_VIEW_ACL,
ALLOW_LAB_MEMBER_VIEW_ACL,
ALLOW_LAB_SUBMITTER_EDIT_ACL,
ALLOW_EVERYONE_VIEW_ACL,
ALLOW_LAB_MEMBER_VIEW_ACL,
ALLOW_LAB_SUBMITTER_EDIT_ACL,
ALLOW_LAB_VIEW_ADMIN_EDIT_ACL,
ALLOW_OWNER_EDIT_ACL,
ALLOW_SUBMITTER_ADD_ACL,
ALLOW_VIEWING_GROUP_LAB_SUBMITTER_EDIT_ACL,
ALLOW_VIEWING_GROUP_VIEW_ACL,
ALLOW_VIEWING_GROUP_LAB_SUBMITTER_EDIT_ACL,
ALLOW_VIEWING_GROUP_VIEW_ACL,
DELETED_ACL,
LAB_MEMBER_ROLE,
LAB_SUBMITTER_ROLE,
Expand Down Expand Up @@ -244,16 +244,6 @@ def _is_joint_analysis(props):
roles[submitter] = OWNER_ROLE
return roles

def unique_keys(self, properties):
"""smth."""
keys = super(Item, self).unique_keys(properties)
if 'accession' not in self.schema['properties']:
return keys
keys.setdefault('accession', []).extend(properties.get('alternate_accessions', []))
if properties.get('status') != 'replaced' and 'accession' in properties:
keys['accession'].append(properties['accession'])
return keys

def _update(self, properties, sheets=None):
props = {}
try:
Expand Down
28 changes: 13 additions & 15 deletions src/encoded/types/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from pyramid.threadlocal import get_current_request
from pyramid.traversal import resource_path
from pyramid.view import view_config
from pyramid.response import Response
from snovault import (
AfterModified,
BeforeModified,
Expand Down Expand Up @@ -569,7 +570,7 @@ def _get_file_experiment_info(self, request, currinfo):
def track_and_facet_info(self, request, biosource_name=None):
props = self.upgrade_properties()
# order matters here at leat for last 2 fields
fields = ['experiment_type', 'assay_info', 'experimental_lab', 'dataset', 'condition',
fields = ['experiment_type', 'assay_info', 'experimental_lab', 'dataset', 'condition',
'biosource_name', 'replicate_info', 'experiment_bucket', 'lab_name', 'track_title']
# look for existing _props
track_info = {field: props.get('override_' + field) for field in fields}
Expand Down Expand Up @@ -1709,26 +1710,23 @@ def drs(context, request):
""" DRS object implementation for file. """
rendered_object = request.embed(str(context.uuid), '@@object', as_user=True)
accession = rendered_object['accession']
open_data_url = rendered_object.get('open_data_url', None)
# TODO: implement access_id mechanism
if not open_data_url:
return Response('Access ID support planned for the future', status=404)
drs_object_base = {
'id': rendered_object['@id'],
'id': accession,
'created_time': rendered_object['date_created'],
'drs_id': accession,
'self_uri': f'drs://{request.host}{request.path}',
'self_uri': f'drs://{request.host}/{accession}',
'access_methods': [
{
# always prefer https
# always use open data
'access_url': {
'url': f'https://{request.host}/{accession}/@@download'
'url': open_data_url
},
'type': 'https'
},
{
# but provide http as well in case we are not on prod
'access_url': {
'url': f'http://{request.host}/{accession}/@@download'
},
'type': 'http'
},
'type': 'https',
'access_id': 'https'
}
]
}
return build_drs_object_from_props(drs_object_base, rendered_object)
Expand Down
Loading