Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenTelemetry instrumentation #59

Merged
merged 10 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
FROM python:3.9.0-buster AS build
FROM python:3.9-buster AS build

COPY . /src
WORKDIR /src/projects/etos_suite_runner
RUN python3 setup.py bdist_wheel

FROM python:3.9.0-slim-buster
FROM python:3.9-slim-buster

COPY --from=build /src/projects/etos_suite_runner/dist/*.whl /tmp
# hadolint ignore=DL3013
RUN pip install --no-cache-dir /tmp/*.whl && groupadd -r etos && useradd -r -m -s /bin/false -g etos etos

USER etos

LABEL org.opencontainers.image.source=https://github.com/eiffel-community/etos-suite-runner
Expand Down
7 changes: 5 additions & 2 deletions projects/etos_suite_runner/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@
PyScaffold==3.2.3
packageurl-python~=0.11
cryptography>=42.0.4,<43.0.0
etos_lib==4.0.0
etos_environment_provider~=4.1
etos_lib==4.2.0
etos_environment_provider~=4.2
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21
7 changes: 5 additions & 2 deletions projects/etos_suite_runner/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,11 @@ install_requires =
PyScaffold==3.2.3
packageurl-python~=0.11
cryptography>=42.0.4,<43.0.0
etos_lib==4.0.0
etos_environment_provider~=4.1
etos_lib==4.2.0
etos_environment_provider~=4.2
opentelemetry-api~=1.21
opentelemetry-exporter-otlp~=1.21
opentelemetry-sdk~=1.21

python_requires = >=3.4

Expand Down
41 changes: 41 additions & 0 deletions projects/etos_suite_runner/src/etos_suite_runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""ETOS suite runner module."""
import logging
import os
from importlib.metadata import PackageNotFoundError, version

from opentelemetry import trace
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_NAMESPACE, SERVICE_VERSION, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from etos_lib.logging.logger import setup_logging

try:
Expand All @@ -30,3 +37,37 @@
ENVIRONMENT = "development" if DEV else "production"
os.environ["ENVIRONMENT_PROVIDER_DISABLE_LOGGING"] = "true"
setup_logging("ETOS Suite Runner", VERSION, ENVIRONMENT)


LOGGER = logging.getLogger(__name__)

# Setting OTEL_COLLECTOR_HOST will override the default OTEL collector endpoint.
# This is needed because Suite Runner uses the cluster-level OpenTelemetry collector
# instead of a sidecar collector.
if os.getenv("OTEL_COLLECTOR_HOST"):
os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"] = os.getenv("OTEL_COLLECTOR_HOST")
else:
if "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT" in os.environ:
LOGGER.debug("Environment variable OTEL_EXPORTER_OTLP_TRACES_ENDPOINT not used.")
LOGGER.debug("To specify an OpenTelemetry collector host use OTEL_COLLECTOR_HOST.")
del os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"]

if os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"):
LOGGER.info(
"Using OpenTelemetry collector: %s", os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT")
)
PROVIDER = TracerProvider(
resource=Resource.create(
{
SERVICE_NAME: "etos-suite-runner",
SERVICE_VERSION: VERSION,
SERVICE_NAMESPACE: ENVIRONMENT,
}
)
)
EXPORTER = OTLPSpanExporter()
PROCESSOR = BatchSpanProcessor(EXPORTER)
PROVIDER.add_span_processor(PROCESSOR)
trace.set_tracer_provider(PROVIDER)
else:
LOGGER.info("OpenTelemetry not enabled. OTEL_COLLECTOR_HOST not set.")
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from .esr import ESR


LOGGER = logging.getLogger(__name__)


Expand Down
85 changes: 52 additions & 33 deletions projects/etos_suite_runner/src/etos_suite_runner/esr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,18 @@
from etos_lib import ETOS
from etos_lib.logging.logger import FORMAT_CONFIG
from jsontas.jsontas import JsonTas
import opentelemetry

from .lib.esr_parameters import ESRParameters
from .lib.exceptions import EnvironmentProviderException
from .lib.runner import SuiteRunner
from .lib.otel_tracing import get_current_context, OpenTelemetryBase

# Remove spam from pika.
logging.getLogger("pika").setLevel(logging.WARNING)


class ESR: # pylint:disable=too-many-instance-attributes
class ESR(OpenTelemetryBase): # pylint:disable=too-many-instance-attributes
"""Suite runner for ETOS main program.

Run this as a daemon on your system in order to trigger test suites within
Expand All @@ -49,6 +51,7 @@ class ESR: # pylint:disable=too-many-instance-attributes
def __init__(self) -> None:
"""Initialize ESR by creating a rabbitmq publisher."""
self.logger = logging.getLogger("ESR")
self.otel_tracer = opentelemetry.trace.get_tracer(__name__)
self.etos = ETOS("ETOS Suite Runner", os.getenv("SOURCE_HOST"), "ETOS Suite Runner")
signal.signal(signal.SIGTERM, self.graceful_exit)
self.params = ESRParameters(self.etos)
Expand All @@ -67,40 +70,57 @@ def _request_environment(self, ids: list[str]) -> None:
:param ids: Generated suite runner IDs used to correlate environments and the suite
runners.
"""
try:
provider = EnvironmentProvider(self.params.tercc.meta.event_id, ids, copy=False)
result = provider.run()
except Exception:
self.params.set_status("FAILURE", "Failed to run environment provider")
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
raise
if result.get("error") is not None:
self.params.set_status("FAILURE", result.get("error"))
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
else:
self.params.set_status("SUCCESS", result.get("error"))
self.logger.info(
"Environment provider has finished creating an environment for test.",
extra={"user_log": True},
)
span_name = "request_environment"
suite_context = get_current_context()
with self.otel_tracer.start_as_current_span(
span_name,
context=suite_context,
kind=opentelemetry.trace.SpanKind.CLIENT,
):
try:
provider = EnvironmentProvider(self.params.tercc.meta.event_id, ids, copy=False)
result = provider.run()
except Exception as exc:
self.params.set_status("FAILURE", "Failed to run environment provider")
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
self._record_exception(exc)
raise
if result.get("error") is not None:
self.params.set_status("FAILURE", result.get("error"))
self.logger.error(
"Environment provider has failed in creating an environment for test.",
extra={"user_log": True},
)
andmat900 marked this conversation as resolved.
Show resolved Hide resolved
exc = Exception(str(result.get("error")))
self._record_exception(exc)
else:
self.params.set_status("SUCCESS", result.get("error"))
self.logger.info(
"Environment provider has finished creating an environment for test.",
extra={"user_log": True},
)

def _release_environment(self) -> None:
"""Release an environment from the environment provider."""
# TODO: We should remove jsontas as a requirement for this function.
# Passing variables as keyword argument to make it easier to transition to a function where
# jsontas is not required.
jsontas = JsonTas()
status, message = release_full_environment(
etos=self.etos, jsontas=jsontas, suite_id=self.params.tercc.meta.event_id
)
if not status:
self.logger.error(message)
span_name = "release_full_environment"
suite_context = get_current_context()
with self.otel_tracer.start_as_current_span(
span_name,
context=suite_context,
kind=opentelemetry.trace.SpanKind.CLIENT,
):
status, message = release_full_environment(
etos=self.etos, jsontas=jsontas, suite_id=self.params.tercc.meta.event_id
)
if not status:
self.logger.error(message)

def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
"""Start up a suite runner handling multiple suites that execute within test runners.
Expand All @@ -117,13 +137,11 @@ def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
"ESR Docker", {"CONTEXT": context}, image=os.getenv("SUITE_RUNNER")
)
runner = SuiteRunner(self.params, self.etos)

ids = []
for suite in self.params.test_suite:
suite["test_suite_started_id"] = str(uuid4())
ids.append(suite["test_suite_started_id"])
self.logger.info("Number of test suites to run: %d", len(ids), extra={"user_log": True})

try:
self.logger.info("Get test environment.")
threading.Thread(
Expand All @@ -135,10 +153,11 @@ def run_suites(self, triggered: EiffelActivityTriggeredEvent) -> list[str]:
self.logger.info("Starting ESR.")
runner.start_suites_and_wait()
return ids
except EnvironmentProviderException:
except EnvironmentProviderException as exc:
self.logger.info("Release test environment.")
self._release_environment()
raise
self._record_exception(exc)
raise exc

@staticmethod
def verify_input() -> None:
Expand Down Expand Up @@ -176,7 +195,6 @@ def run(self) -> list[str]:
executionType="AUTOMATED",
triggers=[{"type": "EIFFEL_EVENT"}],
)

self.verify_input()
context = triggered.meta.event_id
except: # noqa
Expand Down Expand Up @@ -211,6 +229,7 @@ def run(self) -> list[str]:
"MAJOR",
{"CONTEXT": context},
)
self._record_exception(exception)
raise

def graceful_exit(self, *_) -> None:
Expand Down
44 changes: 31 additions & 13 deletions projects/etos_suite_runner/src/etos_suite_runner/lib/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,19 @@
"""Executor handler module."""
import logging
import os
from json import JSONDecodeError
from json import JSONDecodeError, dumps
from typing import Union

from cryptography.fernet import Fernet
from etos_lib import ETOS
from etos_lib.opentelemetry.semconv import Attributes as SemConvAttributes
from opentelemetry import trace
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import HTTPError

from .otel_tracing import OpenTelemetryBase


class TestStartException(Exception):
"""Exception when starting tests."""
Expand All @@ -35,7 +39,7 @@ def __init__(self, message: dict):
self.error = message.get("error", "Unknown error when starting tests")


class Executor: # pylint:disable=too-few-public-methods
class Executor(OpenTelemetryBase): # pylint:disable=too-few-public-methods
"""Executor for launching ETR."""

logger = logging.getLogger("ESR - Executor")
Expand All @@ -47,6 +51,7 @@ def __init__(self, etos: ETOS) -> None:
"""
self.etos = etos
self.etos.config.set("build_urls", [])
self.tracer = trace.get_tracer(__name__)

def __decrypt(self, password: Union[str, dict]) -> str:
"""Decrypt a password using an encryption key.
Expand Down Expand Up @@ -89,15 +94,28 @@ def run_tests(self, test_suite: dict) -> None:
if request.get("auth"):
request["auth"] = self.__auth(**request["auth"])
method = getattr(self.etos.http, request.pop("method").lower())
try:
response = method(**request)
response.raise_for_status()
except HTTPError as http_error:
span_name = "start_execution_space"
with self.tracer.start_as_current_span(span_name, kind=trace.SpanKind.CLIENT) as span:
span.set_attribute(
SemConvAttributes.EXECUTOR_ID, executor["id"] if "id" in executor else ""
)
span.set_attribute("http.request.body", dumps(request))
try:
raise TestStartException(http_error.response.json()) from http_error
except JSONDecodeError:
raise TestStartException({"error": http_error.response.text}) from http_error
except RequestsConnectionError as connection_error:
raise TestStartException({"error": str(connection_error)}) from connection_error
self.logger.info("%r", response)
self.logger.debug("%r", response.text)
response = method(**request)
response.raise_for_status()
except HTTPError as http_error:
try:
exc = TestStartException(http_error.response.json())
self._record_exception(exc)
raise exc from http_error
except JSONDecodeError:
exc = TestStartException({"error": http_error.response.text})
self._record_exception(exc)
raise exc from http_error
except RequestsConnectionError as connection_error:
exc = TestStartException({"error": str(connection_error)})
self._record_exception(exc)
raise exc from connection_error

self.logger.info("%r", response)
self.logger.debug("%r", response.text)
Loading
Loading