Skip to content

Commit

Permalink
Run all tests against S3 and S3 Express
Browse files Browse the repository at this point in the history
Enhance Github CI actions and variables to run all tests
against both S3 and S3 Express. Address test flakiness
by creating per test assets using BucketPrefixFixture,
instead of static assets.
  • Loading branch information
dnnanuti committed Dec 7, 2023
1 parent 071f495 commit 967eb41
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 271 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/python-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Dwarnings"
CI_REGION: ${{ vars.S3_REGION }}
CI_BUCKET: ${{ vars.S3_BUCKET }}
CI_PREFIX: ${{ vars.S3_PREFIX }}

jobs:
test:
Expand Down
38 changes: 30 additions & 8 deletions .github/workflows/python-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Dwarnings"
CI_REGION: ${{ vars.S3_REGION }}
CI_BUCKET: ${{ vars.S3_BUCKET }}
S3_REGION: ${{ vars.S3_REGION }}
S3_BUCKET: ${{ vars.S3_BUCKET }}
S3_EXPRESS_REGION: ${{ vars.S3_EXPRESS_REGION }}
S3_EXPRESS_BUCKET: ${{ vars.S3_EXPRESS_BUCKET }}
CI_PREFIX: ${{ vars.S3_PREFIX }}
CI_EXPRESS_BUCKET: ${{ vars.S3_EXPRESS_BUCKET }}
CI_EXPRESS_REGION: ${{ vars.S3_EXPRESS_REGION }}

jobs:
integration-test:
Expand Down Expand Up @@ -79,11 +79,33 @@ jobs:
python -m pip install -e "s3torchconnectorclient[test,e2e]"
python -m pip install -e "s3torchconnector[test,e2e]"
- name: s3torchconnector integration tests
run: pytest s3torchconnector/tst/e2e -n auto
- name: s3torchconnector S3 integration tests
run: |
CI_REGION=${{ S3_REGION }} \
CI_BUCKET=${{ S3_BUCKET }} \
CI_STORAGE_CLASS=\
pytest s3torchconnector/tst/e2e -n auto
- name: s3torchconnector S3 Express integration tests
run: |
CI_REGION=${{ S3_EXPRESS_REGION }} \
CI_BUCKET=${{ S3_EXPRESS_BUCKET }} \
CI_STORAGE_CLASS=EXPRESS_ONEZONE \
pytest s3torchconnector/tst/e2e -n auto
- name: s3torchconnectorclient integration tests
run: pytest s3torchconnectorclient/python/tst/integration -n auto
- name: s3torchconnectorclient S3 integration tests
run: |
CI_REGION=${{ S3_REGION }} \
CI_BUCKET=${{ S3_BUCKET }} \
CI_STORAGE_CLASS=\
pytest s3torchconnectorclient/python/tst/integration -n auto
- name: s3torchconnectorclient S3 Express integration tests
run: |
CI_REGION=${{ S3_EXPRESS_REGION }} \
CI_BUCKET=${{ S3_EXPRESS_BUCKET }} \
CI_STORAGE_CLASS=EXPRESS_ONEZONE \
pytest s3torchconnectorclient/python/tst/integration -n auto
- name: Save Cargo cache
uses: actions/cache/save@v3
Expand Down
9 changes: 7 additions & 2 deletions s3torchconnector/tst/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ class BucketPrefixFixture(object):
region: str
bucket: str
prefix: str
storage_class: str = None

def __init__(self, region: str, bucket: str, prefix: str):
def __init__(
self, region: str, bucket: str, prefix: str, storage_class: str = None
):
self.bucket = bucket
self.prefix = prefix
self.region = region
self.storage_class = storage_class
self.contents = {}
session = boto3.Session(region_name=region)
self.s3 = session.client("s3")
Expand All @@ -57,12 +61,13 @@ def get_test_bucket_prefix(name: str) -> BucketPrefixFixture:
bucket = getenv("CI_BUCKET")
prefix = getenv("CI_PREFIX")
region = getenv("CI_REGION")
storage_class = getenv("CI_STORAGE_CLASS")
assert prefix == "" or prefix.endswith("/")

nonce = random.randrange(2**64)
prefix = f"{prefix}{name}/{nonce}/"

return BucketPrefixFixture(region, bucket, prefix)
return BucketPrefixFixture(region, bucket, prefix, storage_class)


@pytest.fixture
Expand Down
11 changes: 8 additions & 3 deletions s3torchconnector/tst/e2e/test_e2e_s3datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ def test_s3mapdataset_images_10_from_prefix(image_directory):

# Intentional usage to emphasize the accessor dataset[index] works.
for index in range(len(dataset)):
key = f"{image_directory.prefix}img{index:03d}.jpg"
key = dataset[index].key
if image_directory.storage_class is None:
# S3 Express does not guarantee a sorted order for listed keys.
assert key == f"{image_directory.prefix}img{index:03d}.jpg"
assert dataset[index].read() == image_directory[key]


Expand Down Expand Up @@ -131,8 +134,10 @@ def _verify_image_iterable_dataset(
assert dataset is not None
for index, fileobj in enumerate(dataset):
assert fileobj is not None
full_key = f"{image_directory.prefix}img{index:03d}.jpg"
assert image_directory[full_key] == fileobj.read()
if image_directory.storage_class is None:
# S3 Express does not guarantee a sorted order for listed keys.
assert fileobj.key == f"{image_directory.prefix}img{index:03d}.jpg"
assert image_directory[fileobj.key] == fileobj.read()


def _pytorch_dataloader(
Expand Down
98 changes: 84 additions & 14 deletions s3torchconnectorclient/python/tst/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# // SPDX-License-Identifier: BSD

import io
import os
import random

import boto3
import numpy as np
from PIL import Image
import pytest


def getenv(var: str) -> str:
Expand All @@ -11,29 +18,92 @@ def getenv(var: str) -> str:
return v


# TODO: Update with test fixtures
class TestConfig(object):
"""Config object fixture for CI, wrapping region, S3 bucket, S3 express bucket and prefix configuration."""
class BucketPrefixFixture(object):
"""An S3 bucket/prefix and its contents for use in a single unit test. The prefix will be unique
to this instance, so other concurrent tests won't affect its state."""

region: str
bucket: str
express_bucket: str
express_region: str
prefix: str
storage_class: str = None

def __init__(
self, region: str, bucket: str, express_bucket: str, express_region: str
self, region: str, bucket: str, prefix: str, storage_class: str = None
):
self.region = region
self.bucket = bucket
self.express_bucket = express_bucket
self.express_region = express_region
self.prefix = prefix
self.region = region
self.storage_class = storage_class
self.contents = {}
session = boto3.Session(region_name=region)
self.s3 = session.client("s3")

@property
def s3_uri(self):
return f"s3://{self.bucket}/{self.prefix}"

def add(self, key: str, contents: bytes, **kwargs):
"""Upload an S3 object to this prefix of the bucket."""
full_key = f"{self.prefix}{key}"
self.s3.put_object(Bucket=self.bucket, Key=full_key, Body=contents, **kwargs)
self.contents[full_key] = contents

def __getitem__(self, index):
return self.contents[index]

def get_test_config() -> TestConfig:
def __iter__(self):
return iter(self.contents)


def get_test_bucket_prefix(name: str) -> BucketPrefixFixture:
"""Create a new bucket/prefix fixture for the given test name."""
region = getenv("CI_REGION")
bucket = getenv("CI_BUCKET")
express_bucket = getenv("CI_EXPRESS_BUCKET")
express_region = getenv("CI_EXPRESS_REGION")
prefix = getenv("CI_PREFIX")
region = getenv("CI_REGION")
storage_class = getenv("CI_STORAGE_CLASS")
assert prefix == "" or prefix.endswith("/")

nonce = random.randrange(2**64)
prefix = f"{prefix}{name}/{nonce}/"

return BucketPrefixFixture(region, bucket, prefix, storage_class)


@pytest.fixture
def image_directory(request) -> BucketPrefixFixture:
"""Create a bucket/prefix fixture that contains a directory of random JPG image files."""
NUM_IMAGES = 10
IMAGE_SIZE = 100
fixture = get_test_bucket_prefix(f"{request.node.name}/image_directory")
for i in range(NUM_IMAGES):
data = np.random.randint(0, 256, IMAGE_SIZE * IMAGE_SIZE * 3, np.uint8)
data = data.reshape(IMAGE_SIZE, IMAGE_SIZE, 3)
image = Image.fromarray(data, "RGB")
image_bytes = io.BytesIO()
image.save(image_bytes, "jpeg")
image_bytes.seek(0)
image_bytes = image_bytes.read()

key = f"img{i:03d}.jpg"
fixture.add(key, image_bytes)

return fixture


@pytest.fixture
def sample_directory(request) -> BucketPrefixFixture:
fixture = get_test_bucket_prefix(f"{request.node.name}/sample_files")
fixture.add("hello_world.txt", b"Hello, World!\n")
return fixture


@pytest.fixture
def put_object_tests_directory(request) -> BucketPrefixFixture:
fixture = get_test_bucket_prefix(f"{request.node.name}/put_integration_tests")
fixture.add("to_overwrite.txt", b"before")
return fixture


return TestConfig(region, bucket, express_bucket, express_region)
@pytest.fixture
def checkpoint_directory(request) -> BucketPrefixFixture:
return get_test_bucket_prefix(f"{request.node.name}/checkpoint_directory")
Loading

0 comments on commit 967eb41

Please sign in to comment.