Skip to content

Commit

Permalink
Remove testrunner validation retries and cache (#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
andmat900 authored Oct 14, 2024
1 parent 4cf0fdb commit 5418166
Showing 1 changed file with 3 additions and 94 deletions.
97 changes: 3 additions & 94 deletions python/src/etos_api/library/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# limitations under the License.
"""ETOS API suite validator module."""
import logging
import asyncio
import time
from typing import List, Union
from uuid import UUID

Expand All @@ -26,82 +24,12 @@
from pydantic import BaseModel # pylint:disable=no-name-in-module
from pydantic import ValidationError, conlist, constr, field_validator
from pydantic.fields import PrivateAttr
from opentelemetry import trace

from etos_api.library.docker import Docker

# pylint:disable=too-few-public-methods


class TestRunnerValidationCache:
"""Lazy test runner validation via in-memory cache."""

# Cache for lazy testrunner validation. Keys: container names, values: timestamp.
# Only passed validations are cached.
TESTRUNNER_VALIDATION_CACHE = {}
TESTRUNNER_VALIDATION_WINDOW = 1800 # seconds

lock = asyncio.Lock()

@classmethod
async def get_timestamp(cls, test_runner: str) -> Union[float, None]:
"""Get latest passed validation timestamp for the given testrunner.
:param test_runner: test runner container name
:type test_runner: str
:return: validation timestamp or none if not found
:rtype: float or NoneType
"""
async with cls.lock:
if test_runner in cls.TESTRUNNER_VALIDATION_CACHE:
return cls.TESTRUNNER_VALIDATION_CACHE[test_runner]
return None

@classmethod
async def set_timestamp(cls, test_runner: str, timestamp: float) -> None:
"""Set passed validation timestamp for the given testrunner.
:param test_runner: test runner container name
:type test_runner: str
:param timestamp: test runner container name
:type timestamp: float
:return: none
:rtype: NoneType
"""
async with cls.lock:
cls.TESTRUNNER_VALIDATION_CACHE[test_runner] = timestamp

@classmethod
async def remove(cls, test_runner: str) -> None:
"""Remove the given test runner from the validation cache.
:param test_runner: test runner container name
:type test_runner: str
:return: none
:rtype: NoneType
"""
async with cls.lock:
if test_runner in cls.TESTRUNNER_VALIDATION_CACHE:
del cls.TESTRUNNER_VALIDATION_CACHE[test_runner]

@classmethod
async def is_test_runner_valid(cls, test_runner: str) -> bool:
"""Determine if the given test runner is valid.
:param test_runner: test runner container name
:type test_runner: str
:return: validation result from cache
:rtype: bool
"""
timestamp = await cls.get_timestamp(test_runner)
if timestamp is None:
return False
if (timestamp + cls.TESTRUNNER_VALIDATION_WINDOW) > time.time():
return True
await cls.remove(test_runner)
return False


class Environment(BaseModel):
"""ETOS suite definion 'ENVIRONMENT' constraint."""

Expand Down Expand Up @@ -251,7 +179,6 @@ async def validate(self, test_suite_url):
:type test_suite_url: str
:raises ValidationError: If the suite did not validate.
"""
span = trace.get_current_span()
downloaded_suite = await self._download_suite(test_suite_url)
assert (
len(downloaded_suite) > 0
Expand All @@ -267,24 +194,6 @@ async def validate(self, test_suite_url):
test_runners.add(constraint.value)
docker = Docker()
for test_runner in test_runners:
if await TestRunnerValidationCache.is_test_runner_valid(test_runner):
self.logger.info("Using cached test runner validation result: %s", test_runner)
continue
for attempt in range(5):
if attempt > 0:
span.add_event(f"Test runner validation unsuccessful, retry #{attempt}")
self.logger.warning(
"Test runner %s validation unsuccessful, retry #%d",
test_runner,
attempt,
)
result = await docker.digest(test_runner)
if result:
# only passed validations shall be cached
await TestRunnerValidationCache.set_timestamp(test_runner, time.time())
break
# Total wait time with 5 attempts: 55 seconds
sleep_time = (attempt + 1) ** 2
await asyncio.sleep(sleep_time)

assert result is not None, f"Test runner {test_runner} not found"
assert (
await docker.digest(test_runner) is not None
), f"Test runner {test_runner} not found"

0 comments on commit 5418166

Please sign in to comment.