From bae575dc34404f719ebaaa713067006dfa2b0c5f Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Tue, 20 Aug 2024 13:13:53 -0400 Subject: [PATCH] Respond with 409 when creating duplicate asset blobs --- dandiapi/api/tests/test_upload.py | 32 ++++++------------------------- dandiapi/api/views/upload.py | 27 ++++++++++++++------------ 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/dandiapi/api/tests/test_upload.py b/dandiapi/api/tests/test_upload.py index 11a85e9a6..8e6eaa462 100644 --- a/dandiapi/api/tests/test_upload.py +++ b/dandiapi/api/tests/test_upload.py @@ -524,16 +524,10 @@ def test_upload_validate_wrong_etag(api_client, user, upload): def test_upload_validate_existing_assetblob(api_client, user, upload, asset_blob_factory): api_client.force_authenticate(user=user) - asset_blob = asset_blob_factory(etag=upload.etag, size=upload.size) + asset_blob_factory(etag=upload.etag, size=upload.size) resp = api_client.post(f'/api/uploads/{upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(asset_blob.blob_id), - 'etag': asset_blob.etag, - 'sha256': asset_blob.sha256, - 'size': asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 assert not Upload.objects.all().exists() @@ -549,16 +543,10 @@ def test_upload_validate_embargo_existing_assetblob( embargoed_upload = embargoed_upload_factory(dandiset=dandiset) # The upload should recognize this preexisting AssetBlob and use it instead - asset_blob = asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) + asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) resp = api_client.post(f'/api/uploads/{embargoed_upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(asset_blob.blob_id), - 'etag': asset_blob.etag, - 'sha256': asset_blob.sha256, - 'size': asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 @@ -574,17 +562,9 @@ def test_upload_validate_embargo_existing_embargoed_assetblob( # The upload should recognize this preexisting embargoed AssetBlob and use it instead # This only works because the embargoed asset blob belongs to the same dandiset - embargoed_asset_blob = embargoed_asset_blob_factory( - etag=embargoed_upload.etag, size=embargoed_upload.size - ) + embargoed_asset_blob_factory(etag=embargoed_upload.etag, size=embargoed_upload.size) resp = api_client.post(f'/api/uploads/{embargoed_upload.upload_id}/validate/') - assert resp.status_code == 200 - assert resp.data == { - 'blob_id': str(embargoed_asset_blob.blob_id), - 'etag': embargoed_asset_blob.etag, - 'sha256': embargoed_asset_blob.sha256, - 'size': embargoed_asset_blob.size, - } + assert resp.status_code == 409 assert AssetBlob.objects.all().count() == 1 diff --git a/dandiapi/api/views/upload.py b/dandiapi/api/views/upload.py index 433195716..ab4c11a1c 100644 --- a/dandiapi/api/views/upload.py +++ b/dandiapi/api/views/upload.py @@ -240,23 +240,26 @@ def upload_validate_view(request: Request, upload_id: str) -> HttpResponseBase: f'ETag {upload.etag} does not match actual ETag {upload.actual_etag()}.' ) - # Use a transaction here so we can use select_for_update to lock the DB rows to avoid - # a race condition where two clients are uploading the same blob at the same time. - # This also ensures that the minting of the new AssetBlob and deletion of the Upload - # is an atomic operation. with transaction.atomic(): - try: - # Perhaps another upload completed before this one and has already created an AssetBlob. - asset_blob = AssetBlob.objects.select_for_update(no_key=True).get( - etag=upload.etag, size=upload.size - ) - except AssetBlob.DoesNotExist: - asset_blob = upload.to_asset_blob() - asset_blob.save() + # Avoid a race condition where two clients are uploading the same blob at the same time. + asset_blob, created = AssetBlob.objects.get_or_create( + etag=upload.etag, + size=upload.size, + defaults={ + 'embargoed': upload.embargoed, + 'blob_id': upload.upload_id, + 'blob': upload.blob, + }, + ) # Clean up the upload upload.delete() + if not created: + return Response( + 'An identical blob has already been uploaded.', status=status.HTTP_409_CONFLICT + ) + # Start calculating the sha256 in the background calculate_sha256.delay(asset_blob.blob_id)