Skip to content

Commit

Permalink
Run all tests against S3 and S3 Express (#90)
Browse files Browse the repository at this point in the history
Enhance Github CI actions and variables to run all tests
against both S3 and S3 Express. Address test flakiness
by creating per test assets using BucketPrefixFixture,
instead of static assets.
Remove hypothesis setup from test_mountpoint_client_pickles()
as it was flaky. We will add this back when we align on the
limits of the parameters.
  • Loading branch information
dnanuti authored Dec 8, 2023
1 parent c92ca2a commit 1f8fdd0
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 296 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/python-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Dwarnings"
CI_REGION: ${{ vars.S3_REGION }}
CI_BUCKET: ${{ vars.S3_BUCKET }}
CI_PREFIX: ${{ vars.S3_PREFIX }}

jobs:
test:
Expand Down
30 changes: 21 additions & 9 deletions .github/workflows/python-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Dwarnings"
CI_REGION: ${{ vars.S3_REGION }}
CI_BUCKET: ${{ vars.S3_BUCKET }}
CI_PREFIX: ${{ vars.S3_PREFIX }}
CI_EXPRESS_BUCKET: ${{ vars.S3_EXPRESS_BUCKET }}
CI_EXPRESS_REGION: ${{ vars.S3_EXPRESS_REGION }}

jobs:
integration-test:
Expand All @@ -29,10 +25,18 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
test-run:
- name: "S3"
bucket: ${{ vars.S3_BUCKET }}
region: ${{ vars.S3_REGION }}
storage-class: ""
- name: "S3 Express"
bucket: ${{ vars.S3_EXPRESS_BUCKET }}
region: ${{ vars.S3_EXPRESS_REGION }}
storage-class: "EXPRESS_ONEZONE"
permissions:
id-token: write
contents: read

steps:
- name: Checkout code
uses: actions/checkout@v4
Expand Down Expand Up @@ -79,11 +83,19 @@ jobs:
python -m pip install -e "s3torchconnectorclient[test,e2e]"
python -m pip install -e "s3torchconnector[test,e2e]"
- name: s3torchconnector integration tests
run: pytest s3torchconnector/tst/e2e -n auto
- name: s3torchconnector ${{ matrix.test-run.name }} integration tests
run: |
CI_REGION=${{ matrix.test-run.region }} \
CI_BUCKET=${{ matrix.test-run.bucket }} \
CI_STORAGE_CLASS=${{ matrix.test-run.storage-class }} \
pytest s3torchconnector/tst/e2e -n auto
- name: s3torchconnectorclient integration tests
run: pytest s3torchconnectorclient/python/tst/integration -n auto
- name: s3torchconnectorclient ${{ matrix.test-run.name }} integration tests
run: |
CI_REGION=${{ matrix.test-run.region }} \
CI_BUCKET=${{ matrix.test-run.bucket }} \
CI_STORAGE_CLASS=${{ matrix.test-run.storage-class }} \
pytest s3torchconnectorclient/python/tst/integration -n auto
- name: Save Cargo cache
uses: actions/cache/save@v3
Expand Down
16 changes: 10 additions & 6 deletions s3torchconnector/tst/e2e/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# // SPDX-License-Identifier: BSD

from dataclasses import dataclass
import io
import os
import random
Expand All @@ -12,10 +11,10 @@
import pytest


def getenv(var: str) -> str:
def getenv(var: str, optional: bool = False) -> str:
v = os.getenv(var)
if v is None:
raise Exception(f"required environment variable {var} is not set")
if v is None and not optional:
raise Exception(f"Required environment variable {var} is not set")
return v


Expand All @@ -26,11 +25,15 @@ class BucketPrefixFixture(object):
region: str
bucket: str
prefix: str
storage_class: str = None

def __init__(self, region: str, bucket: str, prefix: str):
def __init__(
self, region: str, bucket: str, prefix: str, storage_class: str = None
):
self.bucket = bucket
self.prefix = prefix
self.region = region
self.storage_class = storage_class
self.contents = {}
session = boto3.Session(region_name=region)
self.s3 = session.client("s3")
Expand All @@ -57,12 +60,13 @@ def get_test_bucket_prefix(name: str) -> BucketPrefixFixture:
bucket = getenv("CI_BUCKET")
prefix = getenv("CI_PREFIX")
region = getenv("CI_REGION")
storage_class = getenv("CI_STORAGE_CLASS", optional=True)
assert prefix == "" or prefix.endswith("/")

nonce = random.randrange(2**64)
prefix = f"{prefix}{name}/{nonce}/"

return BucketPrefixFixture(region, bucket, prefix)
return BucketPrefixFixture(region, bucket, prefix, storage_class)


@pytest.fixture
Expand Down
15 changes: 12 additions & 3 deletions s3torchconnector/tst/e2e/test_e2e_s3datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ def test_s3mapdataset_images_10_from_prefix(image_directory):

# Intentional usage to emphasize the accessor dataset[index] works.
for index in range(len(dataset)):
key = f"{image_directory.prefix}img{index:03d}.jpg"
key = dataset[index].key
if _expect_sorted_list_results(image_directory.storage_class):
# S3 Express does not guarantee a sorted order for listed keys.
assert key == f"{image_directory.prefix}img{index:03d}.jpg"
assert dataset[index].read() == image_directory[key]


Expand Down Expand Up @@ -131,8 +134,10 @@ def _verify_image_iterable_dataset(
assert dataset is not None
for index, fileobj in enumerate(dataset):
assert fileobj is not None
full_key = f"{image_directory.prefix}img{index:03d}.jpg"
assert image_directory[full_key] == fileobj.read()
if _expect_sorted_list_results(image_directory.storage_class):
# S3 Express does not guarantee a sorted order for listed keys.
assert fileobj.key == f"{image_directory.prefix}img{index:03d}.jpg"
assert image_directory[fileobj.key] == fileobj.read()


def _pytorch_dataloader(
Expand Down Expand Up @@ -160,3 +165,7 @@ def _get_dataloader_len(dataloader: DataLoader):
return len(dataloader)
else:
return len(list(dataloader))


def _expect_sorted_list_results(storage_class: str) -> bool:
return storage_class != "EXPRESS_ONEZONE"
104 changes: 87 additions & 17 deletions s3torchconnectorclient/python/tst/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,109 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# // SPDX-License-Identifier: BSD

import io
import os
import random

import boto3
import numpy as np
from PIL import Image
import pytest

def getenv(var: str) -> str:

def getenv(var: str, optional: bool = False) -> str:
v = os.getenv(var)
if v is None:
raise Exception(f"required environment variable {var} is not set")
if v is None and not optional:
raise Exception(f"Required environment variable {var} is not set")
return v


# TODO: Update with test fixtures
class TestConfig(object):
"""Config object fixture for CI, wrapping region, S3 bucket, S3 express bucket and prefix configuration."""
class BucketPrefixFixture(object):
"""An S3 bucket/prefix and its contents for use in a single unit test. The prefix will be unique
to this instance, so other concurrent tests won't affect its state."""

region: str
bucket: str
express_bucket: str
express_region: str
prefix: str
storage_class: str = None

def __init__(
self, region: str, bucket: str, express_bucket: str, express_region: str
self, region: str, bucket: str, prefix: str, storage_class: str = None
):
self.region = region
self.bucket = bucket
self.express_bucket = express_bucket
self.express_region = express_region
self.prefix = prefix
self.region = region
self.storage_class = storage_class
self.contents = {}
session = boto3.Session(region_name=region)
self.s3 = session.client("s3")

@property
def s3_uri(self):
return f"s3://{self.bucket}/{self.prefix}"

def add(self, key: str, contents: bytes, **kwargs):
"""Upload an S3 object to this prefix of the bucket."""
full_key = f"{self.prefix}{key}"
self.s3.put_object(Bucket=self.bucket, Key=full_key, Body=contents, **kwargs)
self.contents[full_key] = contents

def __getitem__(self, index):
return self.contents[index]

def get_test_config() -> TestConfig:
def __iter__(self):
return iter(self.contents)


def get_test_bucket_prefix(name: str) -> BucketPrefixFixture:
"""Create a new bucket/prefix fixture for the given test name."""
region = getenv("CI_REGION")
bucket = getenv("CI_BUCKET")
express_bucket = getenv("CI_EXPRESS_BUCKET")
express_region = getenv("CI_EXPRESS_REGION")
prefix = getenv("CI_PREFIX")
region = getenv("CI_REGION")
storage_class = getenv("CI_STORAGE_CLASS", optional=True)
assert prefix == "" or prefix.endswith("/")

nonce = random.randrange(2**64)
prefix = f"{prefix}{name}/{nonce}/"

return BucketPrefixFixture(region, bucket, prefix, storage_class)


@pytest.fixture
def image_directory(request) -> BucketPrefixFixture:
"""Create a bucket/prefix fixture that contains a directory of random JPG image files."""
NUM_IMAGES = 10
IMAGE_SIZE = 100
fixture = get_test_bucket_prefix(f"{request.node.name}/image_directory")
for i in range(NUM_IMAGES):
data = np.random.randint(0, 256, IMAGE_SIZE * IMAGE_SIZE * 3, np.uint8)
data = data.reshape(IMAGE_SIZE, IMAGE_SIZE, 3)
image = Image.fromarray(data, "RGB")
image_bytes = io.BytesIO()
image.save(image_bytes, "jpeg")
image_bytes.seek(0)
image_bytes = image_bytes.read()

key = f"img{i:03d}.jpg"
fixture.add(key, image_bytes)

return fixture


@pytest.fixture
def sample_directory(request) -> BucketPrefixFixture:
fixture = get_test_bucket_prefix(f"{request.node.name}/sample_files")
fixture.add("hello_world.txt", b"Hello, World!\n")
return fixture


@pytest.fixture
def put_object_tests_directory(request) -> BucketPrefixFixture:
fixture = get_test_bucket_prefix(f"{request.node.name}/put_integration_tests")
fixture.add("to_overwrite.txt", b"before")
return fixture


return TestConfig(region, bucket, express_bucket, express_region)
@pytest.fixture
def checkpoint_directory(request) -> BucketPrefixFixture:
return get_test_bucket_prefix(f"{request.node.name}/checkpoint_directory")
Loading

0 comments on commit 1f8fdd0

Please sign in to comment.