Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Restarting the NDP Proxy sometimes crashed #538

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions src/aleph/vm/network/ndp_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,24 @@
and restart the service.
"""

import asyncio
import logging
from dataclasses import dataclass
from ipaddress import IPv6Network
from pathlib import Path
from subprocess import CalledProcessError

from aleph.vm.utils import run_in_subprocess

logger = logging.getLogger(__name__)


class NdpProxyTerminatedError(Exception):
"""Raised when restarting the NDP Proxy fails due to a SIGTERM signal."""

pass


@dataclass
class NdpRule:
address_range: IPv6Network
Expand All @@ -33,15 +41,32 @@
@staticmethod
async def _restart_ndppd():
logger.debug("Restarting ndppd")
await run_in_subprocess(["systemctl", "restart", "ndppd"])
try:
await run_in_subprocess(["systemctl", "restart", "ndppd"])
except CalledProcessError as error:

Check warning on line 46 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L44-L46

Added lines #L44 - L46 were not covered by tests
if "died with <Signals.SIGTERM: 15>." in str(error):
raise NdpProxyTerminatedError("ndppd was terminated by a SIGTERM signal") from error

Check warning on line 48 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L48

Added line #L48 was not covered by tests
else:
raise

Check warning on line 50 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L50

Added line #L50 was not covered by tests

async def _update_ndppd_conf(self):
config = f"proxy {self.host_network_interface} {{\n"
for interface, address_range in self.interface_address_range_mapping.items():
config += f" rule {address_range} {{\n iface {interface}\n }}\n"
config += "}\n"
Path("/etc/ndppd.conf").write_text(config)
await self._restart_ndppd()
for attempt in range(3):
try:
await self._restart_ndppd()
break
except NdpProxyTerminatedError:

Check warning on line 62 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L59-L62

Added lines #L59 - L62 were not covered by tests
if attempt >= 2:
raise
logger.warning(

Check warning on line 65 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L64-L65

Added lines #L64 - L65 were not covered by tests
"ndppd was terminated by a SIGTERM signal while restarting. Waiting 5 seconds and retrying."
)
await asyncio.sleep(5)
continue

Check warning on line 69 in src/aleph/vm/network/ndp_proxy.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/network/ndp_proxy.py#L68-L69

Added lines #L68 - L69 were not covered by tests

async def add_range(self, interface: str, address_range: IPv6Network):
logger.debug("Proxying range %s -> %s", address_range, interface)
Expand Down