Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-5587] Fix private IP resolution to use juju spaces #122

Merged
merged 23 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pytest = "^8.2.2"
juju = "~3.5.0"
tenacity = "^8.4.2"
coverage = {extras = ["toml"], version = ">7.0"}
pytest-asyncio = "^0.21.2"
pytest-operator = ">0.20"
pytest-operator-cache = {git = "https://github.com/canonical/data-platform-workflows", tag = "v16.7.0", subdirectory = "python/pytest_plugins/pytest_operator_cache"}
pytest-operator-groups = {git = "https://github.com/canonical/data-platform-workflows", tag = "v16.7.0", subdirectory = "python/pytest_plugins/pytest_operator_groups"}
Expand Down
1 change: 0 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def _on_install(self, event: InstallEvent) -> None:

def reconcile(self, event: EventBase) -> None:
"""Generic handler for all 'something changed, update' events across all relations."""

# 1. Block until peer relation is set
if not self.state.peer_relation:
self.unit.status = WaitingStatus(MSG_WAITING_FOR_PEER)
Expand Down
6 changes: 4 additions & 2 deletions src/core/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
OpenSearchRequiresData,
)
from ops.framework import Framework, Object
from ops.model import Relation, Unit
from ops.model import Binding, Relation, Unit
juditnovak marked this conversation as resolved.
Show resolved Hide resolved

from core.models import SUBSTRATES, ODCluster, ODServer, OpensearchServer
from literals import (
Expand Down Expand Up @@ -63,7 +63,7 @@ def opensearch_relation(self) -> Relation | None:

@property
def tls_relation(self) -> Relation | None:
"""The cluster peer relation."""
"""The cluster tls relation."""
return self.model.get_relation(CERTS_REL_NAME)

# --- CORE COMPONENTS---
Expand All @@ -72,6 +72,7 @@ def tls_relation(self) -> Relation | None:
def unit_server(self) -> ODServer:
"""The server state of the current running Unit."""
return ODServer(
model=self.model,
relation=self.peer_relation,
data_interface=self.peer_unit_data,
component=self.model.unit,
Expand Down Expand Up @@ -116,6 +117,7 @@ def servers(self) -> set[ODServer]:
for unit, data_interface in self.peer_units_data.items():
servers.add(
ODServer(
model=self.model,
relation=self.peer_relation,
data_interface=data_interface,
component=unit,
Expand Down
45 changes: 31 additions & 14 deletions src/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Literal, MutableMapping

from charms.data_platform_libs.v0.data_interfaces import Data, DataDict
from ops.model import Application, Relation, Unit
from ops.model import Application, Model, Relation, Unit
from typing_extensions import override

from literals import SERVER_PORT
Expand Down Expand Up @@ -61,7 +61,7 @@ class OpensearchServer(StateBase):

def __init__(
self,
relation: Relation | None,
relation: Relation,
juditnovak marked this conversation as resolved.
Show resolved Hide resolved
data_interface: Data,
component: Application,
substrate: SUBSTRATES,
Expand Down Expand Up @@ -142,13 +142,15 @@ class ODServer(StateBase):

def __init__(
self,
model: Model,
juditnovak marked this conversation as resolved.
Show resolved Hide resolved
relation: Relation | None,
data_interface: Data,
component: Unit,
substrate: SUBSTRATES,
):
super().__init__(relation, data_interface, component, substrate)
self.unit = component
self.model = model

@property
def unit_id(self) -> int:
Expand Down Expand Up @@ -178,24 +180,39 @@ def hostname(self) -> str:
@property
def fqdn(self) -> str:
"""The Fully Qualified Domain Name for the unit."""
return socket.getfqdn()
return socket.getfqdn(self.private_ip)

@property
def private_ip(self) -> str:
"""The IP for the unit."""
return socket.gethostbyname(self.hostname)
# Either values are okay for us because we are interested on their name
# both objects have a .name property.
address = None
if relation := self.relation:
binding_obj = self.model.get_binding(relation.name)
address = (
str(binding_obj.network.bind_address)
if binding_obj and binding_obj.network
phvalguima marked this conversation as resolved.
Show resolved Hide resolved
else None
)
# Optional fallback, if no binding is found
return address or socket.gethostbyname(self.hostname)

@property
def public_ip(self) -> str:
result = subprocess.check_output(
[
"bash",
"-c",
"ip a | grep global | grep -v 'inet 10.' | cut -d' ' -f6 | cut -d'/' -f1",
skourta marked this conversation as resolved.
Show resolved Hide resolved
],
text=True,
)
return result.rstrip()
"""The public IP for the unit."""
# Either values are okay for us because we are interested on their name
# both objects have a .name property.
address = None
if relation := self.relation:
binding_obj = self.model.get_binding(relation.name)
address = (
str(binding_obj.network.ingress_address)
if binding_obj and binding_obj.network
else None
)
# Optional fallback, if no binding is found
return address or socket.gethostbyname(self.hostname)

@property
def host(self) -> str:
Expand Down Expand Up @@ -246,7 +263,7 @@ def sans(self) -> dict[str, list[str]]:

return {
"sans_ip": [self.private_ip],
"sans_dns": [self.hostname, self.fqdn],
"sans_dns": [dns for dns in {self.hostname, self.fqdn} if dns],
}

@property
Expand Down
48 changes: 43 additions & 5 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,23 @@
METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
APP_NAME = METADATA["name"]
OPENSEARCH_APP_NAME = "opensearch"
SERIES = "jammy"

OPENSEARCH_RELATION_NAME = "opensearch-client"
OPENSEARCH_CONFIG = {
"logging-config": "<root>=INFO;unit=DEBUG",
"cloudinit-userdata": """postruncmd:
- [ 'sysctl', '-w', 'vm.max_map_count=262144' ]
- [ 'sysctl', '-w', 'fs.file-max=1048576' ]
- [ 'sysctl', '-w', 'vm.swappiness=0' ]
- [ 'sysctl', '-w', 'net.ipv4.tcp_retries2=5' ]
""",
}

TLS_CERTIFICATES_APP_NAME = "self-signed-certificates"
COS_AGENT_APP_NAME = "grafana-agent"
COS_AGENT_RELATION_NAME = "cos-agent"
DB_CLIENT_APP_NAME = "application"
juditnovak marked this conversation as resolved.
Show resolved Hide resolved


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -258,7 +275,6 @@ def access_dashboard(
arguments = {"url": url, "headers": headers, "json": data}
if ssl:
arguments["verify"] = "./ca.pem"

response = requests.post(**arguments)
return response.status_code == 200

Expand Down Expand Up @@ -375,10 +391,15 @@ def get_private_address(model_full_name: str, unit: str):
try:
private_ip = check_output(
[
"bash",
"-c",
f"JUJU_MODEL={model_full_name} juju ssh {unit} ip a | "
"grep global | grep 'inet 10.*/24' | cut -d' ' -f6 | cut -d'/' -f1",
"juju",
"exec",
f"--model={model_full_name}",
"--unit",
unit,
"--",
"network-get",
OPENSEARCH_RELATION_NAME,
"--bind-address",
],
text=True,
)
Expand Down Expand Up @@ -774,3 +795,20 @@ async def destroy_cluster(ops_test, app: str = OPENSEARCH_APP_NAME):
# This case we don't raise an error in the context manager which
# fails to restore the `update-status-hook-interval` value to it's former state.
assert n_apps_after == n_apps_before - 1, "old cluster not destroyed successfully."


async def for_machines(ops_test, machines, state="started"):
for attempt in Retrying(stop=stop_after_attempt(10), wait=wait_fixed(wait=60)):
with attempt:
mach_status = json.loads(
subprocess.check_output(
["juju", "machines", f"--model={ops_test.model.name}", "--format=json"]
)
)["machines"]
for id in machines:
if (
str(id) not in mach_status.keys()
or mach_status[str(id)]["juju-status"]["current"] != state
):
logger.warning(f"machine-{id} either not exist yet or not in {state}")
raise Exception()
2 changes: 2 additions & 0 deletions tests/integration/spaces/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
124 changes: 124 additions & 0 deletions tests/integration/spaces/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python3
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

import logging
import os
import subprocess

import pytest
from pytest_operator.plugin import OpsTest

logger = logging.getLogger(__name__)


DEFAULT_LXD_NETWORK = "lxdbr0"
RAW_DNSMASQ = """dhcp-option=3
dhcp-option=6"""


def _lxd_network(name: str, subnet: str, external: bool = True):
try:
output = subprocess.run(
[
"sudo",
"lxc",
"network",
"create",
name,
"--type=bridge",
f"ipv4.address={subnet}",
f"ipv4.nat={external}".lower(),
"ipv6.address=none",
"dns.mode=none",
],
capture_output=True,
check=True,
encoding="utf-8",
).stdout
logger.info(f"LXD network created: {output}")
output = subprocess.run(
["sudo", "lxc", "network", "show", name],
capture_output=True,
check=True,
encoding="utf-8",
).stdout
logger.debug(f"LXD network status: {output}")

if not external:
subprocess.run(
["sudo", "lxc", "network", "set", name, "raw.dnsmasq", RAW_DNSMASQ], check=True
)

subprocess.run(f"sudo ip link set up dev {name}".split(), check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Error creating LXD network {name} with: {e.returncode} {e.stderr}")
raise


@pytest.fixture(scope="module")
def lxd():
try:
# Set all networks' dns.mode=none
# We want to avoid check:
# https://github.com/canonical/lxd/blob/
# 762f7dc5c3dc4dbd0863a796898212d8fbe3f7c3/lxd/device/nic_bridged.go#L403
# As described on:
# https://discuss.linuxcontainers.org/t/
# error-failed-start-validation-for-device-enp3s0f0-instance
# -dns-name-net17-nicole-munoz-marketing-already-used-on-network/15586/22?page=2
subprocess.run(
[
"sudo",
"lxc",
"network",
"set",
DEFAULT_LXD_NETWORK,
"dns.mode=none",
],
check=True,
)
except subprocess.CalledProcessError as e:
logger.error(
f"Error creating LXD network {DEFAULT_LXD_NETWORK} with: {e.returncode} {e.stderr}"
)
raise
_lxd_network("client", "10.0.0.1/24", True)
_lxd_network("cluster", "10.10.10.1/24", False)
_lxd_network("backup", "10.20.20.1/24", False)


@pytest.fixture(scope="module")
def lxd_spaces(ops_test: OpsTest, lxd):
subprocess.run(
[
"juju",
"reload-spaces",
f"--model={ops_test.model.name}",
],
)
spaces = [("client", "10.0.0.0/24"), ("cluster", "10.10.10.0/24"), ("backup", "10.20.20.0/24")]
for space in spaces:
subprocess.run(
f"juju add-space --model={ops_test.model.name} {space[0]} {space[1]}".split(),
check=True,
)


@pytest.hookimpl()
def pytest_sessionfinish(session, exitstatus):
if os.environ.get("CI", "true").lower() == "true":
# Nothing to do, as this is a temp runner only
return

def __exec(cmd):
try:
subprocess.run(cmd.split(), check=True)
except subprocess.CalledProcessError as e:
# Log and try to delete the next network
logger.warning(f"Error deleting LXD network with: {e.returncode} {e.stderr}")

for network in ["client", "cluster", "backup"]:
__exec(f"sudo lxc network delete {network}")

__exec(f"sudo lxc network unset {DEFAULT_LXD_NETWORK} dns.mode")
Loading
Loading