Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-5587] Fix private IP resolution to use juju spaces #122

Merged
merged 23 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion lib/charms/data_platform_libs/v0/data_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def _on_topic_requested(self, event: TopicRequestedEvent):

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 39
LIBPATCH = 40

PYDEPS = ["ops>=2.0.0"]

Expand Down Expand Up @@ -391,6 +391,10 @@ class IllegalOperationError(DataInterfacesError):
"""To be used when an operation is not allowed to be performed."""


class PrematureDataAccessError(DataInterfacesError):
"""To be raised when the Relation Data may be accessed (written) before protocol init complete."""


##############################################################################
# Global helpers / utilities
##############################################################################
Expand Down Expand Up @@ -1453,6 +1457,8 @@ def _on_relation_changed_event(self, event: RelationChangedEvent) -> None:
class ProviderData(Data):
"""Base provides-side of the data products relation."""

RESOURCE_FIELD = "database"

def __init__(
self,
model: Model,
Expand Down Expand Up @@ -1618,6 +1624,15 @@ def _fetch_my_specific_relation_data(
def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None:
"""Set values for fields not caring whether it's a secret or not."""
req_secret_fields = []

keys = set(data.keys())
if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and (
keys - {"endpoints", "read-only-endpoints", "replset"}
):
raise PrematureDataAccessError(
"Premature access to relation data, update is forbidden before the connection is initialized."
)

if relation.app:
req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS)

Expand Down Expand Up @@ -3290,6 +3305,8 @@ class KafkaRequiresEvents(CharmEvents):
class KafkaProviderData(ProviderData):
"""Provider-side of the Kafka relation."""

RESOURCE_FIELD = "topic"

def __init__(self, model: Model, relation_name: str) -> None:
super().__init__(model, relation_name)

Expand Down Expand Up @@ -3539,6 +3556,8 @@ class OpenSearchRequiresEvents(CharmEvents):
class OpenSearchProvidesData(ProviderData):
"""Provider-side of the OpenSearch relation."""

RESOURCE_FIELD = "index"

def __init__(self, model: Model, relation_name: str) -> None:
super().__init__(model, relation_name)

Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pytest = "^8.2.2"
juju = "~3.5.0"
tenacity = "^8.4.2"
coverage = {extras = ["toml"], version = ">7.0"}
pytest-asyncio = "^0.21.2"
pytest-operator = ">0.20"
pytest-operator-cache = {git = "https://github.com/canonical/data-platform-workflows", tag = "v16.7.0", subdirectory = "python/pytest_plugins/pytest_operator_cache"}
pytest-operator-groups = {git = "https://github.com/canonical/data-platform-workflows", tag = "v16.7.0", subdirectory = "python/pytest_plugins/pytest_operator_groups"}
Expand Down
1 change: 0 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def _on_install(self, event: InstallEvent) -> None:

def reconcile(self, event: EventBase) -> None:
"""Generic handler for all 'something changed, update' events across all relations."""

# 1. Block until peer relation is set
if not self.state.peer_relation:
self.unit.status = WaitingStatus(MSG_WAITING_FOR_PEER)
Expand Down
20 changes: 19 additions & 1 deletion src/core/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

"""Collection of global cluster state."""
import logging
from ipaddress import IPv4Address, IPv6Address

from charms.data_platform_libs.v0.data_interfaces import (
DataPeerData,
Expand All @@ -23,6 +24,7 @@
PEER,
PEER_APP_SECRETS,
PEER_UNIT_SECRETS,
SERVER_PORT,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -63,7 +65,7 @@ def opensearch_relation(self) -> Relation | None:

@property
def tls_relation(self) -> Relation | None:
"""The cluster peer relation."""
"""The cluster tls relation."""
return self.model.get_relation(CERTS_REL_NAME)

# --- CORE COMPONENTS---
Expand Down Expand Up @@ -141,6 +143,16 @@ def opensearch_server(self) -> OpensearchServer | None:
local_app=self.cluster.app,
)

@property
def bind_address(self) -> IPv4Address | IPv6Address | str | None:
"""The network binding address from the peer relation."""
bind_address = None
if self.peer_relation:
if binding := self.model.get_binding(self.peer_relation):
bind_address = binding.network.bind_address
# If the relation does not exist, then we get None
return bind_address

# --- CLUSTER INIT ---

@property
Expand All @@ -162,3 +174,9 @@ def stable(self) -> bool:
return False

return True

@property
def url(self) -> str:
"""Service URL."""
scheme = "https" if self.unit_server.tls else "http"
return f"{scheme}://{self.bind_address}:{SERVER_PORT}"
29 changes: 7 additions & 22 deletions src/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@
"""Collection of state objects for relations, apps and units."""
import logging
import socket
import subprocess
from typing import Literal, MutableMapping

from charms.data_platform_libs.v0.data_interfaces import Data, DataDict
from ops.model import Application, Relation, Unit
from typing_extensions import override

from literals import SERVER_PORT

logger = logging.getLogger(__name__)

SUBSTRATES = Literal["vm", "k8s"]
Expand Down Expand Up @@ -178,24 +175,18 @@ def hostname(self) -> str:
@property
def fqdn(self) -> str:
"""The Fully Qualified Domain Name for the unit."""
return socket.getfqdn()
# return socket.getfqdn(self.private_ip)
return socket.getfqdn(self.private_ip)

@property
def private_ip(self) -> str:
"""The IP for the unit."""
"""The IP for the unit recovered using socket."""
return socket.gethostbyname(self.hostname)

@property
def public_ip(self) -> str:
result = subprocess.check_output(
[
"bash",
"-c",
"ip a | grep global | grep -v 'inet 10.' | cut -d' ' -f6 | cut -d'/' -f1",
skourta marked this conversation as resolved.
Show resolved Hide resolved
],
text=True,
)
return result.rstrip()
"""The public IP for the unit."""
return socket.gethostbyname(self.hostname)

@property
def host(self) -> str:
Expand Down Expand Up @@ -245,12 +236,6 @@ def sans(self) -> dict[str, list[str]]:
return {}

return {
"sans_ip": [self.private_ip],
"sans_dns": [self.hostname, self.fqdn],
"sans_ip": [self.private_ip, self.public_ip],
"sans_dns": [dns for dns in {self.hostname, self.fqdn} if dns],
}

@property
def url(self) -> str:
"""Service URL."""
scheme = "https" if self.tls else "http"
return f"{scheme}://{self.private_ip}:{SERVER_PORT}"
16 changes: 10 additions & 6 deletions src/events/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,22 @@ def _request_certificates(self):
if self.charm.state.unit_server.tls:
self._remove_certificates()

sans_ip = set(
self.charm.state.unit_server.sans.get("sans_ip", [])
+ [str(self.charm.state.bind_address or "")]
)
sans_dns = set(self.charm.state.unit_server.sans.get("sans_dns", []))

logger.debug(
"Requesting certificate for: "
f"host {self.charm.state.unit_server.host},"
f"with IP {self.charm.state.unit_server.sans.get('sans_ip', [])},"
f"DNS {self.charm.state.unit_server.sans.get('sans_dns', [])}"
f"host {self.charm.state.unit_server.host}, with IP {sans_ip}, DNS {sans_dns}"
)

csr = generate_csr(
private_key=self.charm.state.unit_server.private_key.encode("utf-8"),
subject=self.charm.state.unit_server.private_ip,
sans_ip=self.charm.state.unit_server.sans.get("sans_ip", []),
sans_dns=self.charm.state.unit_server.sans.get("sans_dns", []),
subject=str(self.charm.state.bind_address or self.charm.state.unit_server.private_ip),
sans_ip=list(sans_ip or ""),
sans_dns=list(sans_dns),
)

self.charm.state.unit_server.update({"csr": csr.decode("utf-8").strip()})
Expand Down
1 change: 1 addition & 0 deletions src/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
MSG_STATUS_ERROR = "Service is an error state"
MSG_STATUS_WORKLOAD_DOWN = "Workload is not alive"
MSG_STATUS_UNKNOWN = "Workload status is not known"
MSG_STATUS_APP_REMOVED = "remove-application was requested: leaving..."

MSG_APP_STATUS = [
MSG_STATUS_DB_DOWN,
Expand Down
2 changes: 1 addition & 1 deletion src/managers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def request(
if None in [endpoint, method]:
raise ValueError("endpoint or method missing")

full_url = f"{self.state.unit_server.url}/api/{endpoint}"
full_url = f"{self.state.url}/api/{endpoint}"

request_kwargs = {
"verify": self.workload.paths.ca,
Expand Down
3 changes: 2 additions & 1 deletion src/managers/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def dashboard_properties(self) -> list[str]:

opensearch_ca = self.workload.paths.opensearch_ca if self.state.opensearch_server else ""

properties += [f"server.host: '{self.state.unit_server.private_ip}'"]
# We are using the address exposed by Juju as service address
properties += [f"server.host: '{self.state.bind_address}'"]
properties += (
[
f"opensearch.username: {opensearch_user}",
Expand Down
4 changes: 4 additions & 0 deletions src/managers/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from exceptions import OSDAPIError
from literals import (
HEALTH_OPENSEARCH_STATUS_URL,
MSG_STATUS_APP_REMOVED,
MSG_STATUS_DB_DOWN,
MSG_STATUS_DB_MISSING,
MSG_STATUS_ERROR,
Expand Down Expand Up @@ -63,6 +64,9 @@ def status_ok(self) -> tuple[bool, str]:
def opensearch_ok(self) -> tuple[bool, str]:
"""Verify if associated Opensearch service is up and running."""

if not self.state.url:
return False, MSG_STATUS_APP_REMOVED

if not self.state.opensearch_server or not (
os.path.exists(self.workload.paths.opensearch_ca)
and os.path.getsize(self.workload.paths.opensearch_ca) > 0
Expand Down
2 changes: 1 addition & 1 deletion src/managers/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,4 @@ def certificate_valid(self) -> bool:
logger.debug(
f"Currently recognized IP using 'gethostbyname': {self.state.unit_server.private_ip}"
)
return self.state.unit_server.private_ip in response
return str(self.state.bind_address) in response
57 changes: 48 additions & 9 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,24 @@
METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
APP_NAME = METADATA["name"]
OPENSEARCH_APP_NAME = "opensearch"
SERIES = "jammy"

OPENSEARCH_APP_NAME = "opensearch"
OPENSEARCH_RELATION_NAME = "opensearch-client"
OPENSEARCH_CONFIG = {
"logging-config": "<root>=INFO;unit=DEBUG",
"cloudinit-userdata": """postruncmd:
- [ 'sysctl', '-w', 'vm.max_map_count=262144' ]
- [ 'sysctl', '-w', 'fs.file-max=1048576' ]
- [ 'sysctl', '-w', 'vm.swappiness=0' ]
- [ 'sysctl', '-w', 'net.ipv4.tcp_retries2=5' ]
""",
}

TLS_CERTIFICATES_APP_NAME = "self-signed-certificates"
COS_AGENT_APP_NAME = "grafana-agent"
COS_AGENT_RELATION_NAME = "cos-agent"
DB_CLIENT_APP_NAME = "application"
juditnovak marked this conversation as resolved.
Show resolved Hide resolved


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -213,7 +231,7 @@ def all_dashboards_unavailable(ops_test: OpsTest, https: bool = False) -> bool:
logger.info(f"Couldn't retrieve host certificate for unit {unit}")
continue

host = get_private_address(ops_test.model.name, unit.name)
host = get_bind_address(ops_test.model.name, unit.name)

# We should retry until a host could be retrieved
if not host:
Expand Down Expand Up @@ -258,7 +276,6 @@ def access_dashboard(
arguments = {"url": url, "headers": headers, "json": data}
if ssl:
arguments["verify"] = "./ca.pem"

response = requests.post(**arguments)
return response.status_code == 200

Expand Down Expand Up @@ -298,7 +315,7 @@ async def access_all_dashboards(
for unit in ops_test.model.applications[APP_NAME].units:
if unit.name in skip:
continue
host = get_private_address(ops_test.model.name, unit.name)
host = get_bind_address(ops_test.model.name, unit.name)
if not host:
logger.error(f"No hostname found for {unit.name}, can't check connection.")
return False
Expand Down Expand Up @@ -371,14 +388,19 @@ async def get_address(ops_test: OpsTest, unit_name: str) -> str:
return address


def get_private_address(model_full_name: str, unit: str):
def get_bind_address(model_full_name: str, unit: str):
try:
private_ip = check_output(
[
"bash",
"-c",
f"JUJU_MODEL={model_full_name} juju ssh {unit} ip a | "
"grep global | grep 'inet 10.*/24' | cut -d' ' -f6 | cut -d'/' -f1",
"juju",
"exec",
f"--model={model_full_name}",
"--unit",
unit,
"--",
"network-get",
OPENSEARCH_RELATION_NAME,
"--bind-address",
],
text=True,
)
Expand Down Expand Up @@ -742,7 +764,7 @@ async def client_run_all_dashboards_request(
return False

for dashboards_unit in ops_test.model.applications[APP_NAME].units:
host = get_private_address(ops_test.model.name, dashboards_unit.name)
host = get_bind_address(ops_test.model.name, dashboards_unit.name)
if not host:
logger.debug(f"No hostname found for {dashboards_unit.name}, can't check connection.")
return False
Expand Down Expand Up @@ -774,3 +796,20 @@ async def destroy_cluster(ops_test, app: str = OPENSEARCH_APP_NAME):
# This case we don't raise an error in the context manager which
# fails to restore the `update-status-hook-interval` value to it's former state.
assert n_apps_after == n_apps_before - 1, "old cluster not destroyed successfully."


async def for_machines(ops_test, machines, state="started"):
for attempt in Retrying(stop=stop_after_attempt(10), wait=wait_fixed(wait=60)):
with attempt:
mach_status = json.loads(
subprocess.check_output(
["juju", "machines", f"--model={ops_test.model.name}", "--format=json"]
)
)["machines"]
for id in machines:
if (
str(id) not in mach_status.keys()
or mach_status[str(id)]["juju-status"]["current"] != state
):
logger.warning(f"machine-{id} either not exist yet or not in {state}")
raise Exception()
2 changes: 2 additions & 0 deletions tests/integration/spaces/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
Loading
Loading