Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore testrunner validation retries and cache #87

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 94 additions & 3 deletions python/src/etos_api/library/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# limitations under the License.
"""ETOS API suite validator module."""
import logging
import asyncio
import time
from typing import List, Union
from uuid import UUID

Expand All @@ -24,12 +26,82 @@
from pydantic import BaseModel # pylint:disable=no-name-in-module
from pydantic import ValidationError, conlist, constr, field_validator
from pydantic.fields import PrivateAttr
from opentelemetry import trace

from etos_api.library.docker import Docker

# pylint:disable=too-few-public-methods


class TestRunnerValidationCache:
"""Lazy test runner validation via in-memory cache."""

# Cache for lazy testrunner validation. Keys: container names, values: timestamp.
# Only passed validations are cached.
TESTRUNNER_VALIDATION_CACHE = {}
TESTRUNNER_VALIDATION_WINDOW = 3600 * 24 * 7 # 1 week

lock = asyncio.Lock()

@classmethod
async def get_timestamp(cls, test_runner: str) -> Union[float, None]:
"""Get latest passed validation timestamp for the given testrunner.

:param test_runner: test runner container name
:type test_runner: str
:return: validation timestamp or none if not found
:rtype: float or NoneType
"""
async with cls.lock:
if test_runner in cls.TESTRUNNER_VALIDATION_CACHE:
return cls.TESTRUNNER_VALIDATION_CACHE[test_runner]
return None

@classmethod
async def set_timestamp(cls, test_runner: str, timestamp: float) -> None:
"""Set passed validation timestamp for the given testrunner.

:param test_runner: test runner container name
:type test_runner: str
:param timestamp: test runner container name
:type timestamp: float
:return: none
:rtype: NoneType
"""
async with cls.lock:
cls.TESTRUNNER_VALIDATION_CACHE[test_runner] = timestamp

@classmethod
async def remove(cls, test_runner: str) -> None:
"""Remove the given test runner from the validation cache.

:param test_runner: test runner container name
:type test_runner: str
:return: none
:rtype: NoneType
"""
async with cls.lock:
if test_runner in cls.TESTRUNNER_VALIDATION_CACHE:
del cls.TESTRUNNER_VALIDATION_CACHE[test_runner]

@classmethod
async def is_test_runner_valid(cls, test_runner: str) -> bool:
"""Determine if the given test runner is valid.

:param test_runner: test runner container name
:type test_runner: str
:return: validation result from cache
:rtype: bool
"""
timestamp = await cls.get_timestamp(test_runner)
if timestamp is None:
return False
if (timestamp + cls.TESTRUNNER_VALIDATION_WINDOW) > time.time():
return True
await cls.remove(test_runner)
return False


class Environment(BaseModel):
"""ETOS suite definion 'ENVIRONMENT' constraint."""

Expand Down Expand Up @@ -179,6 +251,7 @@ async def validate(self, test_suite_url):
:type test_suite_url: str
:raises ValidationError: If the suite did not validate.
"""
span = trace.get_current_span()
downloaded_suite = await self._download_suite(test_suite_url)
assert (
len(downloaded_suite) > 0
Expand All @@ -194,6 +267,24 @@ async def validate(self, test_suite_url):
test_runners.add(constraint.value)
docker = Docker()
for test_runner in test_runners:
assert (
await docker.digest(test_runner) is not None
), f"Test runner {test_runner} not found"
if await TestRunnerValidationCache.is_test_runner_valid(test_runner):
self.logger.info("Using cached test runner validation result: %s", test_runner)
continue
for attempt in range(5):
if attempt > 0:
span.add_event(f"Test runner validation unsuccessful, retry #{attempt}")
self.logger.warning(
"Test runner %s validation unsuccessful, retry #%d",
test_runner,
attempt,
)
result = await docker.digest(test_runner)
if result:
# only passed validations shall be cached
await TestRunnerValidationCache.set_timestamp(test_runner, time.time())
break
# Total wait time with 5 attempts: 55 seconds
sleep_time = (attempt + 1) ** 2
await asyncio.sleep(sleep_time)

assert result is not None, f"Test runner {test_runner} not found"
Loading