Skip to content

Commit

Permalink
Add OpenTelemetry instrumentation (#59)
Browse files Browse the repository at this point in the history
* Add OpenTelemetry instrumentation
  • Loading branch information
andmat900 authored May 7, 2024
1 parent 3d834f3 commit 9cd62cc
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 102 deletions.
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
FROM python:3.9.0-buster AS build
FROM python:3.9-buster AS build

COPY . /src
WORKDIR /src/projects/etos_suite_runner
RUN python3 setup.py bdist_wheel

FROM python:3.9.0-slim-buster
FROM python:3.9-slim-buster

COPY --from=build /src/projects/etos_suite_runner/dist/*.whl /tmp
# hadolint ignore=DL3013
RUN pip install --no-cache-dir /tmp/*.whl && groupadd -r etos && useradd -r -m -s /bin/false -g etos etos

USER etos

LABEL org.opencontainers.image.source=https://github.com/eiffel-community/etos-suite-runner
Expand Down
7 changes: 5 additions & 2 deletions projects/etos_suite_runner/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
PyScaffold==3.2.3
packageurl-python~=0.11
cryptography>=42.0.4,<43.0.0
etos_lib==4.0.0
etos_environment_provider~=4.1
etos_lib==4.2.0
etos_environment_provider~=4.2
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21
7 changes: 5 additions & 2 deletions projects/etos_suite_runner/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ install_requires =
PyScaffold==3.2.3
packageurl-python~=0.11
cryptography>=42.0.4,<43.0.0
etos_lib==4.0.0
etos_environment_provider~=4.1
etos_lib==4.2.0
etos_environment_provider~=4.2
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21

python_requires = >=3.4

Expand Down
41 changes: 41 additions & 0 deletions projects/etos_suite_runner/src/etos_suite_runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""ETOS suite runner module."""
import logging
import os
from importlib.metadata import PackageNotFoundError, version

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_NAMESPACE, SERVICE_VERSION, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from etos_lib.logging.logger import setup_logging

try:
Expand All @@ -30,3 +37,37 @@
ENVIRONMENT = "development" if DEV else "production"
os.environ["ENVIRONMENT_PROVIDER_DISABLE_LOGGING"] = "true"
setup_logging("ETOS Suite Runner", VERSION, ENVIRONMENT)


LOGGER = logging.getLogger(__name__)

# Setting OTEL_COLLECTOR_HOST will override the default OTEL collector endpoint.
# This is needed because Suite Runner uses the cluster-level OpenTelemetry collector
# instead of a sidecar collector.
if os.getenv("OTEL_COLLECTOR_HOST"):
os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"] = os.getenv("OTEL_COLLECTOR_HOST")
else:
if "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT" in os.environ:
LOGGER.debug("Environment variable OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not used.")
LOGGER.debug("To specify an OpenTelemetry collector host use OTEL_COLLECTOR_HOST.")
del os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"]

if os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
LOGGER.info(
"Using OpenTelemetry collector: %s", os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT")
)
PROVIDER = TracerProvider(
resource=Resource.create(
{
SERVICE_NAME: "etos-suite-runner",
SERVICE_VERSION: VERSION,
SERVICE_NAMESPACE: ENVIRONMENT,
}
)
)
EXPORTER = OTLPSpanExporter()
PROCESSOR = BatchSpanProcessor(EXPORTER)
PROVIDER.add_span_processor(PROCESSOR)
trace.set_tracer_provider(PROVIDER)
else:
LOGGER.info("OpenTelemetry not enabled. OTEL_COLLECTOR_HOST not set.")
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from .esr import ESR


LOGGER = logging.getLogger(__name__)


Expand Down
85 changes: 52 additions & 33 deletions projects/etos_suite_runner/src/etos_suite_runner/esr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@
from etos_lib import ETOS
from etos_lib.logging.logger import FORMAT_CONFIG
from jsontas.jsontas import JsonTas
import opentelemetry

from .lib.esr_parameters import ESRParameters
from .lib.exceptions import EnvironmentProviderException
from .lib.runner import SuiteRunner
from .lib.otel_tracing import get_current_context, OpenTelemetryBase

# Remove spam from pika.
logging.getLogger("pika").setLevel(logging.WARNING)


class ESR: # pylint:disable=too-many-instance-attributes
class ESR(OpenTelemetryBase): # pylint:disable=too-many-instance-attributes
"""Suite runner for ETOS main program.
Run this as a daemon on your system in order to trigger test suites within
Expand All @@ -49,6 +51,7 @@ class ESR: # pylint:disable=too-many-instance-attributes
def __init__(self) -> None:
"""Initialize ESR by creating a rabbitmq publisher."""
self.logger = logging.getLogger("ESR")
self.otel_tracer = opentelemetry.trace.get_tracer(__name__)
self.etos = ETOS("ETOS Suite Runner", os.getenv("SOURCE_HOST"), "ETOS Suite Runner")
signal.signal(signal.SIGTERM, self.graceful_exit)
self.params = ESRParameters(self.etos)
Expand All @@ -67,40 +70,57 @@ def _request_environment(self, ids: list[str]) -> None:
:param ids: Generated suite runner IDs used to correlate environments and the suite
runners.
"""
try:
provider = EnvironmentProvider(self.params.tercc.meta.event_id, ids, copy=False)
result = provider.run()
except Exception:
self.params.set_status("FAILURE", "Failed to run environment provider")
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
raise
if result.get("error") is not None:
self.params.set_status("FAILURE", result.get("error"))
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
else:
self.params.set_status("SUCCESS", result.get("error"))
self.logger.info(
"Environment provider has finished creating an environment for test.",
extra={"user_log": True},
)
span_name = "request_environment"
suite_context = get_current_context()
with self.otel_tracer.start_as_current_span(
span_name,
context=suite_context,
kind=opentelemetry.trace.SpanKind.CLIENT,
):
try:
provider = EnvironmentProvider(self.params.tercc.meta.event_id, ids, copy=False)
result = provider.run()
except Exception as exc:
self.params.set_status("FAILURE", "Failed to run environment provider")
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
self._record_exception(exc)
raise
if result.get("error") is not None:
self.params.set_status("FAILURE", result.get("error"))
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
exc = Exception(str(result.get("error")))
self._record_exception(exc)
else:
self.params.set_status("SUCCESS", result.get("error"))
self.logger.info(
"Environment provider has finished creating an environment for test.",
extra={"user_log": True},
)

def _release_environment(self) -> None:
"""Release an environment from the environment provider."""
# TODO: We should remove jsontas as a requirement for this function.
# Passing variables as keyword argument to make it easier to transition to a function where
# jsontas is not required.
jsontas = JsonTas()
status, message = release_full_environment(
etos=self.etos, jsontas=jsontas, suite_id=self.params.tercc.meta.event_id
)
if not status:
self.logger.error(message)
span_name = "release_full_environment"
suite_context = get_current_context()
with self.otel_tracer.start_as_current_span(
span_name,
context=suite_context,
kind=opentelemetry.trace.SpanKind.CLIENT,
):
status, message = release_full_environment(
etos=self.etos, jsontas=jsontas, suite_id=self.params.tercc.meta.event_id
)
if not status:
self.logger.error(message)

def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
"""Start up a suite runner handling multiple suites that execute within test runners.
Expand All @@ -117,13 +137,11 @@ def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
"ESR Docker", {"CONTEXT": context}, image=os.getenv("SUITE_RUNNER")
)
runner = SuiteRunner(self.params, self.etos)

ids = []
for suite in self.params.test_suite:
suite["test_suite_started_id"] = str(uuid4())
ids.append(suite["test_suite_started_id"])
self.logger.info("Number of test suites to run: %d", len(ids), extra={"user_log": True})

try:
self.logger.info("Get test environment.")
threading.Thread(
Expand All @@ -135,10 +153,11 @@ def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
self.logger.info("Starting ESR.")
runner.start_suites_and_wait()
return ids
except EnvironmentProviderException:
except EnvironmentProviderException as exc:
self.logger.info("Release test environment.")
self._release_environment()
raise
self._record_exception(exc)
raise exc

@staticmethod
def verify_input() -> None:
Expand Down Expand Up @@ -176,7 +195,6 @@ def run(self) -> list[str]:
executionType="AUTOMATED",
triggers=[{"type": "EIFFEL_EVENT"}],
)

self.verify_input()
context = triggered.meta.event_id
except: # noqa
Expand Down Expand Up @@ -211,6 +229,7 @@ def run(self) -> list[str]:
"MAJOR",
{"CONTEXT": context},
)
self._record_exception(exception)
raise

def graceful_exit(self, *_) -> None:
Expand Down
44 changes: 31 additions & 13 deletions projects/etos_suite_runner/src/etos_suite_runner/lib/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,19 @@
"""Executor handler module."""
import logging
import os
from json import JSONDecodeError
from json import JSONDecodeError, dumps
from typing import Union

from cryptography.fernet import Fernet
from etos_lib import ETOS
from etos_lib.opentelemetry.semconv import Attributes as SemConvAttributes
from opentelemetry import trace
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import HTTPError

from .otel_tracing import OpenTelemetryBase


class TestStartException(Exception):
"""Exception when starting tests."""
Expand All @@ -35,7 +39,7 @@ def __init__(self, message: dict):
self.error = message.get("error", "Unknown error when starting tests")


class Executor: # pylint:disable=too-few-public-methods
class Executor(OpenTelemetryBase): # pylint:disable=too-few-public-methods
"""Executor for launching ETR."""

logger = logging.getLogger("ESR - Executor")
Expand All @@ -47,6 +51,7 @@ def __init__(self, etos: ETOS) -> None:
"""
self.etos = etos
self.etos.config.set("build_urls", [])
self.tracer = trace.get_tracer(__name__)

def __decrypt(self, password: Union[str, dict]) -> str:
"""Decrypt a password using an encryption key.
Expand Down Expand Up @@ -89,15 +94,28 @@ def run_tests(self, test_suite: dict) -> None:
if request.get("auth"):
request["auth"] = self.__auth(**request["auth"])
method = getattr(self.etos.http, request.pop("method").lower())
try:
response = method(**request)
response.raise_for_status()
except HTTPError as http_error:
span_name = "start_execution_space"
with self.tracer.start_as_current_span(span_name, kind=trace.SpanKind.CLIENT) as span:
span.set_attribute(
SemConvAttributes.EXECUTOR_ID, executor["id"] if "id" in executor else ""
)
span.set_attribute("http.request.body", dumps(request))
try:
raise TestStartException(http_error.response.json()) from http_error
except JSONDecodeError:
raise TestStartException({"error": http_error.response.text}) from http_error
except RequestsConnectionError as connection_error:
raise TestStartException({"error": str(connection_error)}) from connection_error
self.logger.info("%r", response)
self.logger.debug("%r", response.text)
response = method(**request)
response.raise_for_status()
except HTTPError as http_error:
try:
exc = TestStartException(http_error.response.json())
self._record_exception(exc)
raise exc from http_error
except JSONDecodeError:
exc = TestStartException({"error": http_error.response.text})
self._record_exception(exc)
raise exc from http_error
except RequestsConnectionError as connection_error:
exc = TestStartException({"error": str(connection_error)})
self._record_exception(exc)
raise exc from connection_error

self.logger.info("%r", response)
self.logger.debug("%r", response.text)
Loading

0 comments on commit 9cd62cc

Please sign in to comment.