diff --git a/.github/workflows/mega-linter.yml b/.github/workflows/mega-linter.yml new file mode 100644 index 0000000..70b0494 --- /dev/null +++ b/.github/workflows/mega-linter.yml @@ -0,0 +1,56 @@ +# MegaLinter GitHub Action configuration file +# More info at https://megalinter.github.io +name: MegaLinter + +on: + # Trigger mega-linter at every push. Action will also be visible from Pull Requests to main + push: # Comment this line to trigger action only on pull-requests (not recommended if you don't pay for GH Actions) +permissions: read-all + +env: # Comment env block if you do not want to apply fixes + # Apply linter fixes configuration + APPLY_FIXES: all # When active, APPLY_FIXES must also be defined as environment variable (in github/workflows/mega-linter.yml or other CI tool) + #APPLY_FIXES_EVENT: pull_request # Decide which event triggers application of fixes in a commit or a PR (pull_request, push, all) + #APPLY_FIXES_MODE: pull_request # If APPLY_FIXES is used, defines if the fixes are directly committed (commit) or posted in a PR (pull_request) + DISABLE_LINTERS: SPELL_CSPELL,COPYPASTE_JSCPD,PYTHON_BANDIT,PYTHON_MYPY,PYTHON_PYRIGHT,PYTHON_PYLINT,MARKDOWN_MARKDOWN_LINK_CHECK,REPOSITORY_TRIVY + +concurrency: + group: ${{ github.ref }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + build: + name: MegaLinter + runs-on: ubuntu-latest + steps: + # Git Checkout + - name: Checkout Code + uses: actions/checkout@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + fetch-depth: 0 # If you use VALIDATE_ALL_CODEBASE = true, you can remove this line to improve performances + + # MegaLinter + - name: MegaLinter + id: ml + # You can override MegaLinter flavor used to have faster performances + # More info at https://megalinter.github.io/flavors/ + uses: oxsecurity/megalinter/flavors/python@v6.22.2 + env: + # All available variables are described in documentation + # https://megalinter.github.io/configuration/ + VALIDATE_ALL_CODEBASE: true + # VALIDATE_ALL_CODEBASE: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Validates all source when push on main, else just the git diff with main. Override with true if you always want to lint all sources + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # ADD YOUR CUSTOM ENV VARIABLES HERE OR DEFINE THEM IN A FILE .mega-linter.yml AT THE ROOT OF YOUR REPOSITORY + DISABLE: COPYPASTE,SPELL # Uncomment to disable copy-paste and spell checks + + # Upload MegaLinter artifacts + - name: Archive production artifacts + if: ${{ success() }} || ${{ failure() }} + uses: actions/upload-artifact@v3 + with: + name: MegaLinter reports + path: | + megalinter-reports + mega-linter.log diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..05bd687 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,48 @@ +# This workflow will install Python dependencies and run tests +name: Python package + +on: + push: +permissions: read-all + +jobs: + build: + permissions: + contents: read + id-token: write + issues: write + pull-requests: write + strategy: + fail-fast: false + matrix: + python-version: ["3.9"] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: arn:aws:iam::712023778557:role/github/GitHub-Testing-BenchlingPackager + aws-region: us-east-1 + - name: Test with pytest + run: | + make test TEST_OS=${{ matrix.os }} + env: + BENCHLING_TENANT: ${{ secrets.BENCHLING_TENANT }} + BENCHLING_CLIENT_ID: ${{ secrets.BENCHLING_CLIENT_ID }} + BENCHLING_CLIENT_SECRET_ARN: ${{ secrets.BENCHLING_CLIENT_SECRET_ARN }} + DST_BUCKET: ${{ secrets.DST_BUCKET }} + PKG_PREFIX: ${{ secrets.PKG_PREFIX }} + QUILT_CATALOG_DOMAIN: ${{ secrets.QUILT_CATALOG_DOMAIN }} + - name: Get Coverage Report + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + thresholdAll: 0.8 + if: github.event_name == 'pull_request' diff --git a/Install.md b/Install.md index 7fc4e9b..b27dcd3 100644 --- a/Install.md +++ b/Install.md @@ -124,7 +124,7 @@ since it was already created there. 1. Under `Parameters`: 1. Enter the name of the event bus created at step 1 as `BenchlingEventBusName`. 1. Enter the client ID from settings of app created at step 2 as `BenchlingClientId`. - 1. Enter the your Benchling tenant name (i.e. $BenchlingTenant in https://$BenchlingTenant.benchling.com) as `BenchlingTenant`. + 1. Enter the your Benchling tenant name (i.e. $BenchlingTenant in ) as `BenchlingTenant`. 1. Enter the name of the S3 bucket to use for storing packages as `DestinationBucket`. 1. Optional: change the `PackageNamePrefix` used when creating new packages (default: `benchling/`). 1. Specify the hostname of your Quilt Catalog as `QuiltWebHost` @@ -144,12 +144,12 @@ click on its Physical ID. In order for the lambda to update Benchling with the package information, the notebook must have a schema containing exactly the following fields: -| Name | Required | Multi-select | Definition | -| --------------------- | --------- | ------------- | ------------- | -| Quilt+ URI | | | Text | -| Quilt Revise URL | | | Text | -| Quilt Catalog URL | | | Text | -| Sentinel | | | Integer | +| Name | Required | Multi-select | Definition | +|-------------------|----------|--------------|------------| +| Quilt+ URI | | | Text | +| Quilt Revise URL | | | Text | +| Quilt Catalog URL | | | Text | +| Sentinel | | | Integer | You can either create a brand-new schema, or add these fields to an existing schema. Each new notebook will need to have this schema applied to it. diff --git a/Makefile b/Makefile index c15bbe4..70990e7 100644 --- a/Makefile +++ b/Makefile @@ -2,18 +2,18 @@ sinclude .env TARGET = build/benchling_packager.yaml ACTIVATE = ./venv/bin/activate PKG_URL = "https://open.quiltdata.com/b/quilt-example/packages/examples/benchling-packager" -.PHONY: all clean install template upload +.PHONY: all clean install install-dev template test upload all: template upload clean: - rm -rf build - rm -rf venv - rm -f *requirements.txt + rm -rf build venv + rm -f *requirements.txt .pytest_cache .DS_Store + rm -f .coverage coverage.xml template: $(TARGET) -$(TARGET): build venv install make.py lambdas/lambda.py +$(TARGET): build venv install make.py lambdas/main.py . $(ACTIVATE) && python3 make.py > $(TARGET) upload: @@ -38,3 +38,23 @@ install: venv/bin/pip-sync requirements.txt requirements.txt: venv/bin/pip-compile requirements.in . $(ACTIVATE) && pip-compile requirements.in + +test: venv install-dev + . $(ACTIVATE) && python3 -m pytest --cov --cov-report xml:coverage.xml + +test-partials: venv install-dev + SKIP_PARTIALS=False . $(ACTIVATE) && python3 -m pytest + +coverage: venv install-dev + printenv BENCHLING_ENTRY_ID + . $(ACTIVATE) && python3 -m pytest --cov --cov-report html:coverage.html + open coverage.html/index.html + +watch: venv install-dev + . $(ACTIVATE) && ptw . --now + +install-dev: venv/bin/pip-sync dev-requirements.txt + . $(ACTIVATE) && pip-sync dev-requirements.txt + +dev-requirements.txt: venv/bin/pip-sync dev-requirements.in + . $(ACTIVATE) && pip-compile dev-requirements.in diff --git a/README.md b/README.md index 6065d16..6678f93 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,18 @@ This repository generates a CloudFormation template for processing (and link, if possible) a [Quilt](https://quiltdata.com/) package for every Benchling notebook. -## Template generation and upload +## Template generation Requires a recent version of Python 3. -```bash -make all +```shell +python3 -m venv venv +. ./venv/bin/activate +python3 -m pip install -r requirements.txt +python3 make.py > build/benchling_packager.yaml ``` -This will: +## Template upload - setup the Python environment - generate the template in the `build` directory @@ -24,3 +27,21 @@ This will: To install and configure the template, see [Install.md](Install.md). Note: this is the file that's distributed as `README.md` in the package. + +## Testing for Developers + +If you want to modify the actual lambda function, you can run automated tests via: + +```shell +make test +``` + +In order to run these tests, you'll need to set the following environment variables +(usually in the `.env` file, which is auto-included by the Makefile): + +- `BENCHLING_TENANT`: the part before ".benchling.com" in your Benchling URL (e.g. "mycompany" for "mycompany.benchling.com") +- `BENCHLING_CLIENT_ID`: the client ID for the Benchling API` +- `BENCHLING_CLIENT_SECRET_ARN`: the ARN of the AWS Secrets Manager secret containing the client secret for the Benchling API +- `DST_BUCKET`: the name of the S3 bucket (no prefix) where the generated packages should be stored +- `PKG_PREFIX`: the prefix to use for the generated packages, with a trailing "/" (e.g. "benchling/" to store packages in the "benchling" directory) +- `QUILT_CATALOG_DOMAIN`: the domain name of your Quilt catalog (if any) where the generated packages can be viewed diff --git a/dev-requirements.in b/dev-requirements.in new file mode 100644 index 0000000..a342f7d --- /dev/null +++ b/dev-requirements.in @@ -0,0 +1,9 @@ +# https://suyojtamrakar.medium.com/managing-your-requirements-txt-with-pip-tools-in-python-8d07d9dfa464 +aws-lambda-powertools ~= 2.15 +benchling-sdk ~= 1.6 +botocore ~= 1.31 +jinja2 ~= 3.1 +quilt3 ~=5.3 +pytest +pytest-coverage +pytest-watcher diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..8509eaa --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,178 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile dev-requirements.in +# +anyio==3.7.1 + # via httpcore +attrs==22.2.0 + # via + # benchling-api-client + # benchling-sdk + # jsonschema + # referencing +aws-lambda-powertools==2.22.0 + # via -r dev-requirements.in +aws-requests-auth==0.4.3 + # via quilt3 +backoff==1.11.1 + # via + # benchling-api-client + # benchling-sdk +benchling-api-client==2.0.167 + # via benchling-sdk +benchling-sdk==1.7.0 + # via -r dev-requirements.in +boto3==1.28.27 + # via quilt3 +botocore==1.31.27 + # via + # -r dev-requirements.in + # boto3 + # s3transfer +certifi==2023.7.22 + # via + # benchling-sdk + # httpcore + # httpx + # requests +cffi==1.15.1 + # via cryptography +charset-normalizer==3.2.0 + # via requests +coverage[toml]==7.3.0 + # via pytest-cov +cryptography==41.0.3 + # via jwcrypto +dataclasses-json==0.5.14 + # via + # benchling-api-client + # benchling-sdk +deprecated==1.2.14 + # via jwcrypto +exceptiongroup==1.1.3 + # via + # anyio + # pytest +h11==0.14.0 + # via httpcore +httpcore==0.17.3 + # via httpx +httpx==0.24.1 + # via + # benchling-api-client + # benchling-sdk +idna==3.4 + # via + # anyio + # httpx + # requests +iniconfig==2.0.0 + # via pytest +jinja2==3.1.2 + # via -r dev-requirements.in +jmespath==1.0.1 + # via + # boto3 + # botocore +jsonlines==1.2.0 + # via quilt3 +jsonschema==4.19.0 + # via quilt3 +jsonschema-specifications==2023.7.1 + # via jsonschema +jwcrypto==1.5.0 + # via benchling-sdk +markupsafe==2.1.3 + # via jinja2 +marshmallow==3.20.1 + # via dataclasses-json +mypy-extensions==1.0.0 + # via typing-inspect +ordered-set==4.1.0 + # via benchling-sdk +packaging==23.1 + # via + # marshmallow + # pytest +platformdirs==3.10.0 + # via quilt3 +pluggy==1.2.0 + # via pytest +pycparser==2.21 + # via cffi +pytest==7.4.0 + # via + # -r dev-requirements.in + # pytest-cov +pytest-cov==4.1.0 + # via pytest-cover +pytest-cover==3.0.0 + # via pytest-coverage +pytest-coverage==0.0 + # via -r dev-requirements.in +pytest-watcher==0.3.4 + # via -r dev-requirements.in +python-dateutil==2.8.2 + # via + # benchling-api-client + # benchling-sdk + # botocore +pyyaml==6.0.1 + # via + # benchling-sdk + # quilt3 +quilt3==5.3.1 + # via -r dev-requirements.in +referencing==0.30.2 + # via + # jsonschema + # jsonschema-specifications +requests==2.31.0 + # via + # aws-requests-auth + # quilt3 + # requests-futures +requests-futures==1.0.0 + # via quilt3 +rpds-py==0.9.2 + # via + # jsonschema + # referencing +s3transfer==0.6.2 + # via boto3 +six==1.16.0 + # via + # jsonlines + # python-dateutil +sniffio==1.3.0 + # via + # anyio + # httpcore + # httpx +tenacity==8.2.3 + # via quilt3 +tomli==2.0.1 + # via + # coverage + # pytest + # pytest-watcher +tqdm==4.66.1 + # via quilt3 +typing-extensions==4.7.1 + # via + # aws-lambda-powertools + # benchling-api-client + # benchling-sdk + # typing-inspect +typing-inspect==0.9.0 + # via dataclasses-json +urllib3==1.26.16 + # via + # botocore + # requests +watchdog==3.0.0 + # via pytest-watcher +wrapt==1.15.0 + # via deprecated diff --git a/lambdas/__init__.py b/lambdas/__init__.py new file mode 100644 index 0000000..610510b --- /dev/null +++ b/lambdas/__init__.py @@ -0,0 +1 @@ +from .main import BenchlingClient, BenchlingEntry, main # noqa: F401 diff --git a/lambdas/lambda.py b/lambdas/lambda.py deleted file mode 100644 index 9834a3a..0000000 --- a/lambdas/lambda.py +++ /dev/null @@ -1,162 +0,0 @@ -import io -import json -import os -import pathlib -import tempfile -import urllib -import zipfile - -# Must be done before importing quilt3 -os.environ["QUILT_DISABLE_CACHE"] = "true" - -import botocore -import jinja2 -import quilt3 -from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities import parameters -from benchling_sdk import models as benchling_models -from benchling_sdk.auth.client_credentials_oauth2 import ClientCredentialsOAuth2 -from benchling_sdk.benchling import Benchling -from benchling_sdk.helpers import serialization_helpers - -logger = Logger() - -BENCHLING_TENANT = os.environ["BENCHLING_TENANT"] -BENCHLING_CLIENT_ID = os.environ["BENCHLING_CLIENT_ID"] -BENCHLING_CLIENT_SECRET_ARN = os.environ["BENCHLING_CLIENT_SECRET_ARN"] -DST_BUCKET = os.environ["DST_BUCKET"] -PKG_PREFIX = os.environ["PKG_PREFIX"] -QUILT_CATALOG_DOMAIN = os.environ["QUILT_CATALOG_DOMAIN"] - - -benchling = Benchling( - url=f"https://{BENCHLING_TENANT}.benchling.com", - auth_method=ClientCredentialsOAuth2( - client_id=BENCHLING_CLIENT_ID, - client_secret=parameters.get_secret(BENCHLING_CLIENT_SECRET_ARN), - ), -) - - -template = jinja2.Template( - """# [{{ entry.name }}]({{ entry.webURL }}) - -* id: {{ entry.id }} -* displayId: {{ entry.displayId }} -* folderId: {{ entry.folderId }} -* createdAt: {{ entry.createdAt }} -* modifiedAt: {{ entry.modifiedAt }} - -## Authors -{% for author in entry.authors %} -* {{ author.name }} - * id: {{ author.id }} - * handle: {{ author.handle }} -{%- endfor %} - -## Schema - -* id: {{ entry.schema.id }} -* name: {{ entry.schema.name }} - -## Fields -{% for name, value in entry.fields.items() %} -* {{ name }}: {{ value.displayValue }} -{%- endfor %} - -## Custom fields -{% for name, value in entry.customFields.items() %} -* {{ name }}: {{ value.value }} -{%- endfor %} -""" -) - - -QUILT_SUMMARIZE = json.dumps( - [ - [ - { - "path": "entry.md", - "width": "calc(40% - 16px)", - "expand": True, - }, - { - "path": "notes.pdf", - "width": "calc(60% - 16px)", - "expand": True, - }, - ] - ] -) - - -@logger.inject_lambda_context -def lambda_handler(event, context): - entry = event["detail"]["entry"] - task = benchling.tasks.wait_for_task( - benchling.exports.export( - benchling_models.ExportItemRequest(id=entry["id"]) - ).task_id - ) - if task.status != benchling_models.AsyncTaskStatus.SUCCEEDED: - raise Exception(f"Notes export failed: {task!r}") - - with urllib.request.urlopen(task.response["downloadURL"]) as src: - buf = io.BytesIO(src.read()) - - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir_path = pathlib.Path(tmpdir) - - (tmpdir_path / "entry.md").write_text(template.render({"entry": entry})) - - with zipfile.ZipFile(buf) as zip_file: - with zip_file.open(zip_file.namelist()[0]) as src: - with (tmpdir_path / "notes.pdf").open("wb") as dst: - while data := src.read(4096): - dst.write(data) - - (tmpdir_path / "quilt_summarize.json").write_text(QUILT_SUMMARIZE) - - pkg_name = PKG_PREFIX + entry["displayId"] - registry = f"s3://{DST_BUCKET}" - try: - pkg = quilt3.Package.browse(pkg_name, registry=registry) - except botocore.exceptions.ClientError as e: - # XXX: quilt3 should raise some specific exception when package doesn't exist. - if e.response["Error"]["Code"] not in ("NoSuchKey", "404"): - raise - pkg = quilt3.Package() - pkg.set_dir( - ".", - tmpdir_path, - # This shouldn't hit 1 MB limit on metadata, because max size of EventBridge is 256 KiB. - meta=entry, - ).push( - pkg_name, - registry=registry, - ) - - fields_values = {} - if "Quilt+ URI" in entry["fields"]: - fields_values["Quilt+ URI"] = f"quilt+s3://{DST_BUCKET}#package={pkg_name}" - if "Quilt Catalog URL" in entry["fields"]: - fields_values[ - "Quilt Catalog URL" - ] = f"https://{QUILT_CATALOG_DOMAIN}/b/{DST_BUCKET}/packages/{pkg_name}" - if "Quilt Revise URL" in entry["fields"]: - fields_values[ - "Quilt Revise URL" - ] = f"https://{QUILT_CATALOG_DOMAIN}/b/{DST_BUCKET}/packages/{pkg_name}?action=revisePackage" - - if fields_values: - benchling.entries.update_entry( - event["detail"]["entry"]["id"], - benchling_models.EntryUpdate( - fields=serialization_helpers.fields( - { - name: {"value": value} - for name, value in fields_values.items() - } - ) - ), - ) diff --git a/lambdas/main.py b/lambdas/main.py new file mode 100644 index 0000000..6affa27 --- /dev/null +++ b/lambdas/main.py @@ -0,0 +1,226 @@ +import io +import json +import os +import pathlib +import tempfile +import zipfile +from urllib import request as urllib_request + +import jinja2 +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities import parameters +from benchling_sdk import models as benchling_models +from benchling_sdk.auth.client_credentials_oauth2 import ClientCredentialsOAuth2 +from benchling_sdk.benchling import Benchling +from benchling_sdk.helpers import serialization_helpers +from botocore import exceptions as botocore_exceptions + +# Must be done before importing quilt3 +os.environ["QUILT_DISABLE_CACHE"] = "true" +import quilt3 # noqa: E402 + +logger = Logger() + + +class BenchlingClient: + BENCHLING_TENANT = os.environ["BENCHLING_TENANT"] + BENCHLING_CLIENT_ID = os.environ["BENCHLING_CLIENT_ID"] + BENCHLING_CLIENT_SECRET_ARN = os.environ["BENCHLING_CLIENT_SECRET_ARN"] + + @classmethod + def Default(cls): + return cls( + cls.BENCHLING_TENANT, + cls.BENCHLING_CLIENT_ID, + cls.BENCHLING_CLIENT_SECRET_ARN, + ) + + def __init__(self, tenant, id, arn): + if not isinstance(arn, str): + raise Exception("Failed to fetch CLIENT_SECRET_ARN") + secret = parameters.get_secret(arn) + if not isinstance(secret, str): + raise Exception(f"Failed to fetch secret: {arn!r}") + self.benchling = Benchling( + url=f"https://{tenant}.benchling.com", + auth_method=ClientCredentialsOAuth2( + client_id=id, + client_secret=secret, + ), + ) + + def get_task(self, entry_id): + self.task = self.benchling.tasks.wait_for_task( + self.benchling.exports.export( + benchling_models.ExportItemRequest(id=entry_id) # type: ignore + ).task_id + ) + if self.task.status != benchling_models.AsyncTaskStatus.SUCCEEDED: + raise Exception(f"Notes export failed: {self.task!r}") + return self.task + + def update_entry(self, entry_id, fields_values): + values = {k: {"value": v} for k, v in fields_values.items()} + fields = serialization_helpers.fields(values) + self.benchling.entries.update_entry( + entry_id, + benchling_models.EntryUpdate(fields=fields), # type: ignore + ) + + +class BenchlingEntry: + REVISE = "action=revisePackage" + + QUILT_SUMMARIZE = json.dumps( + [ + [ + { + "path": "entry.md", + "width": "calc(40% - 16px)", + "expand": True, + }, + { + "path": "notes.pdf", + "width": "calc(60% - 16px)", + "expand": True, + }, + ] + ] + ) + + FLD = { + "URI": "Quilt+ URI", + "CAT": "Quilt Catalog URL", + "REV": "Quilt Revise URL", + } + + ENTRY_FMT = """ +# [{{ entry.name }}]({{ entry.webURL }}) + +* id: {{ entry.id }} +* displayId: {{ entry.displayId }} +* folderId: {{ entry.folderId }} +* createdAt: {{ entry.createdAt }} +* modifiedAt: {{ entry.modifiedAt }} + +## Authors +{% for author in entry.authors %} +* {{ author.name }} + * id: {{ author.id }} + * handle: {{ author.handle }} +{%- endfor %} + +## Schema + +* id: {{ entry.schema.id }} +* name: {{ entry.schema.name }} + +## Fields +{% for name, value in entry.fields.items() %} +* {{ name }}: {{ value.displayValue }} +{%- endfor %} + +## Custom fields +{% for name, value in entry.customFields.items() %} +* {{ name }}: {{ value.value }} +{%- endfor %} +""" + + DST_BUCKET = os.environ["DST_BUCKET"] + PKG_PREFIX = os.environ["PKG_PREFIX"] + QUILT_CATALOG_DOMAIN = os.environ["QUILT_CATALOG_DOMAIN"] + QUILT_PREFIX = f"https://{QUILT_CATALOG_DOMAIN}/b/{DST_BUCKET}/packages" + + def __init__(self, entry): + self.client = BenchlingClient.Default() + self.entry = entry + self.entry_id = entry["id"] + self.fields = entry.get("fields", {}) + self.pkg_name = self.name() + self.registry = f"s3://{self.DST_BUCKET}" + + def name(self): + SEP = "/" + if SEP not in self.PKG_PREFIX: + self.PKG_PREFIX += SEP + return self.PKG_PREFIX + self.entry.get("displayId", self.entry_id) + + def format(self): + template = jinja2.Template(self.ENTRY_FMT) + return template.render({"entry": self.entry}) + + def dump(self): + return json.dumps(self.entry) + + def write_notes(self, tmpdir_path): + outfile = tmpdir_path / "notes.pdf" + task = self.client.get_task(self.entry_id) + + with urllib_request.urlopen(task.response["downloadURL"]) as src: + buf = io.BytesIO(src.read()) + + with zipfile.ZipFile(buf) as zip_file: + with zip_file.open(zip_file.namelist()[0]) as src: + with outfile.open("wb") as dst: + while data := src.read(4096): + dst.write(data) + return outfile + + def write_files(self, tmpdir_path): + self.write_notes(tmpdir_path) + (tmpdir_path / "entry.md").write_text(self.format()) + (tmpdir_path / "entry.json").write_text(self.dump()) + (tmpdir_path / "quilt_summarize.json").write_text(self.QUILT_SUMMARIZE) + + def push_package(self, tmpdir_path): + pkg = quilt3.Package() + try: + pkg = quilt3.Package.browse(self.pkg_name, registry=self.registry) + except botocore_exceptions.ClientError as e: + # XXX: quilt3 should raise some specific exception + # when package doesn't exist. + if e.response["Error"]["Code"] not in ("NoSuchKey", "404"): + raise + + pkg.set_dir(".", tmpdir_path, meta=self.entry) + # This shouldn't hit 1 MB limit on metadata, + # because max size of EventBridge is 256 KiB. + return pkg.push(self.pkg_name, registry=self.registry) + + def field_values(self): + values = { + "URI": f"quilt+s3://{self.DST_BUCKET}#package={self.pkg_name}", + "CAT": f"{self.QUILT_PREFIX}/{self.pkg_name}", + "REV": f"{self.QUILT_PREFIX}/{self.pkg_name}?{self.REVISE}", + } + return {f: values.get(k) for k, f in self.FLD.items()} + + def update_benchling_notebook(self) -> bool: + values = self.field_values() + if values: + self.client.update_entry(self.entry_id, values) + logger.debug(f"Updated entry {self.entry_id} with package {self.pkg_name}") + return True + else: + logger.warning(f"Quilt schema fields not found for entry {self.entry_id!r}") + return False + + +def main(entry_dict): + entry = BenchlingEntry(entry_dict) + with tempfile.TemporaryDirectory() as tmpdir: + tmpdir_path = pathlib.Path(tmpdir) + + entry.write_files(tmpdir_path) + entry.push_package(tmpdir_path) + entry.update_benchling_notebook() + return entry + + +@logger.inject_lambda_context +def lambda_handler(event, context): + main(event["detail"]["entry"]) + + return { + "statusCode": 200, + } diff --git a/layer/deploy.sh b/layer/deploy.sh old mode 100755 new mode 100644 index e4fb942..8efc999 --- a/layer/deploy.sh +++ b/layer/deploy.sh @@ -3,8 +3,8 @@ set -euo pipefail error() { - echo $@ 2>&1 - exit 1 + echo "$@" 2>&1 + exit 1 } [ "$#" -eq 0 ] || error "Usage: $0" @@ -17,13 +17,13 @@ cd "$work_dir" echo "Installing packages..." python3 -m pip install \ - --platform manylinux2014_x86_64 \ - --target=./python/lib/python3.9/site-packages \ - --implementation cp \ - --python 3.9 \ - --no-deps \ - --no-compile \ - -r $exec_dir/requirements.txt + --platform manylinux2014_x86_64 \ + --target=./python/lib/python3.9/site-packages \ + --implementation cp \ + --python 3.9 \ + --no-deps \ + --no-compile \ + -r "$exec_dir/requirements.txt" echo "Compressing..." zip -9 -r "$zip_file" "." @@ -39,13 +39,11 @@ aws s3 cp --acl public-read "$zip_file" "s3://quilt-lambda-$primary_region/$s3_k cd .. rm -rf "$work_dir" -for region in $regions -do - if [ "$region" != "$primary_region" ] - then - echo "Copying to $region..." - aws s3 cp --acl public-read "s3://quilt-lambda-$primary_region/$s3_key" "s3://quilt-lambda-$region/$s3_key" --region "$region" --source-region "$primary_region" - fi +for region in $regions; do + if [ "$region" != "$primary_region" ]; then + echo "Copying to $region..." + aws s3 cp --acl public-read "s3://quilt-lambda-$primary_region/$s3_key" "s3://quilt-lambda-$region/$s3_key" --region "$region" --source-region "$primary_region" + fi done echo "Deployed $s3_key" diff --git a/make.py b/make.py index 393e309..3bde703 100644 --- a/make.py +++ b/make.py @@ -53,7 +53,7 @@ def make_layer(cft: troposphere.Template): template=cft, Content=awslambda.Content( S3Bucket=troposphere.Sub("quilt-lambda-${AWS::Region}"), - S3Key="benchling-packager/benchling-packager-layer.4bcb4369305e6dca4ec2cec50d2891ad138adfc1f3833293d32a999bd1295770.zip", + S3Key="benchling-packager/benchling-packager-layer.4bcb4369305e6dca4ec2cec50d2891ad138adfc1f3833293d32a999bd1295770.zip", # noqa ), ) @@ -76,7 +76,8 @@ def make_template(*, metadata: dict) -> troposphere.Template: Type="String", AllowedPattern=r"^aws\.partner(/[\.\-_A-Za-z0-9]+){2,}$", Description=( - "Name of event bus where Benchling events are emitted, e.g aws.partner/benchling.com/tenant/app-name" + "Name of event bus where Benchling events are emitted, " + + "e.g aws.partner/benchling.com/tenant/app-name" ), ) benchling_tenant = troposphere.Parameter( @@ -84,7 +85,8 @@ def make_template(*, metadata: dict) -> troposphere.Template: template=cft, Type="String", AllowedPattern=r"^[^/]+$", - Description="Benchling tenant name, i.e. $BenchlingTenant in https://$BenchlingTenant.benchling.com", + Description="Benchling tenant name, i.e. $BenchlingTenant in " + + "https://$BenchlingTenant.benchling.com", ) benchling_client_id = troposphere.Parameter( "BenchlingClientId", @@ -114,7 +116,8 @@ def make_template(*, metadata: dict) -> troposphere.Template: Default="benchling/", AllowedPattern=r".+/.*$", Description=( - "Prefix for package names i.e. package names will be $PackageNamePrefix$ExperimentDisplayID," + "Prefix for package names i.e. package names will be" + " $PackageNamePrefix$ExperimentDisplayID," " must contain, but not start with '/'" ), ) @@ -180,7 +183,7 @@ def make_template(*, metadata: dict) -> troposphere.Template: QUILT_CATALOG_DOMAIN=quilt_domain.ref(), ), Handler="index.lambda_handler", - Code=awslambda.Code(ZipFile=(LAMBDAS_DIR / "lambda.py").read_text()), + Code=awslambda.Code(ZipFile=(LAMBDAS_DIR / "main.py").read_text()), ReservedConcurrentExecutions=1, # FIXME MemorySize=512, ) diff --git a/requirements.in b/requirements.in index a89b443..6772766 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,4 @@ +botocore ~= 1.31 troposphere ~=4.1 quilt3 ~=5.3 cfn-lint diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_entry.py b/tests/test_entry.py new file mode 100644 index 0000000..2997379 --- /dev/null +++ b/tests/test_entry.py @@ -0,0 +1,104 @@ +import os +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +import quilt3 +from lambdas import BenchlingEntry, main + +SKIP_PARTIALS = os.environ.get("SKIP_PARTIALS", True) + +try: + BENCHLING_ENTRY_ID = os.environ["BENCHLING_ENTRY_ID"] +except KeyError: + pytest.skip(allow_module_level=True) + +ENTRY_DATA = { + "id": BENCHLING_ENTRY_ID, + "displayId": "test_entry", + "name": "test_entry", + "folderId": "test_folder", + "createdAt": "2021-01-01T00:00:00.000Z", + "modifiedAt": "2021-01-01T00:00:00.000Z", + "schema": { + "id": "test_schema", + "name": "test_schema", + }, + "fields": {}, + "customFields": {}, + "authors": [ + { + "id": "test_author", + "name": "test_author", + "handle": "test_author", + } + ], + "days": [], + "webURL": "https://example.com", +} + + +@pytest.fixture +def entry(): + return BenchlingEntry(ENTRY_DATA) + + +def test_entry(entry): + assert entry + assert entry.entry_id == BENCHLING_ENTRY_ID + assert entry.fields == {} + assert entry.pkg_name + assert BenchlingEntry.PKG_PREFIX in entry.pkg_name + assert "/" in entry.pkg_name + + +def test_format(entry): + fmt = entry.format() + assert fmt + assert "test_entry" in fmt + + +def test_dump(entry): + dmp = entry.dump() + assert dmp + assert "days" in dmp + + +@pytest.mark.skipif(SKIP_PARTIALS, reason="Only do end-to-end test") +def test_write(entry): + with TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + entry.write_files(tmpdir_path) + fn = {f.name: f for f in tmpdir_path.glob("*")} + assert "entry.json" in fn + assert "notes.pdf" in fn + notes = fn["notes.pdf"] + assert isinstance(notes, Path) + assert notes.exists() + + +@pytest.mark.skipif(SKIP_PARTIALS, reason="Only do end-to-end test") +def test_push(entry): + with TemporaryDirectory() as tmpdir: + tmpdir_path = Path(tmpdir) + (tmpdir_path / "README.md").write_text("test_push") + rc = entry.push_package(tmpdir_path) + assert rc + + pkg = quilt3.Package.browse(entry.pkg_name, registry=entry.registry) + assert pkg + readme = pkg["README.md"] + assert readme + assert readme() == "test_push" + + +@pytest.mark.skipif(SKIP_PARTIALS, reason="Only do end-to-end test") +def test_update(entry): + rc = entry.update_benchling_notebook() + assert rc + + +@pytest.mark.skipif(SKIP_PARTIALS is False, reason="Only do partial tests") +def test_handler(): + entry = main(ENTRY_DATA) + assert isinstance(entry, BenchlingEntry) diff --git a/tests/test_lambda.py b/tests/test_lambda.py new file mode 100644 index 0000000..09f1293 --- /dev/null +++ b/tests/test_lambda.py @@ -0,0 +1,26 @@ +from aws_lambda_powertools.utilities import parameters # type: ignore +from lambdas import BenchlingClient, BenchlingEntry, main + + +def test_import(): + assert BenchlingClient + assert BenchlingEntry + assert main + + +def test_env(): + assert BenchlingClient.BENCHLING_TENANT + assert BenchlingEntry.DST_BUCKET + + +def test_secret(): + arn = BenchlingClient.BENCHLING_CLIENT_SECRET_ARN + assert arn + assert "us-east-1" in arn + secret = parameters.get_secret(arn) + assert secret + + +def test_client(): + client = BenchlingClient.Default() + assert client diff --git a/upload.py b/upload.py index 3a655e2..a719535 100644 --- a/upload.py +++ b/upload.py @@ -5,11 +5,13 @@ TEMPLATE = "benchling_packager.yaml" TARGET = f"build/{TEMPLATE}" + def upload(): pkg = quilt3.Package() pkg.set("README.md", "Install.md") pkg.set(TEMPLATE, TARGET) pkg.push(PKG_NAME, registry=REGISTRY) + if __name__ == "__main__": upload()