diff --git a/src/aleph/vm/network/ndp_proxy.py b/src/aleph/vm/network/ndp_proxy.py index 0af97b7d4..d606d36a7 100644 --- a/src/aleph/vm/network/ndp_proxy.py +++ b/src/aleph/vm/network/ndp_proxy.py @@ -10,16 +10,24 @@ and restart the service. """ +import asyncio import logging from dataclasses import dataclass from ipaddress import IPv6Network from pathlib import Path +from subprocess import CalledProcessError from aleph.vm.utils import run_in_subprocess logger = logging.getLogger(__name__) +class NdpProxyTerminatedError(Exception): + """Raised when restarting the NDP Proxy fails due to a SIGTERM signal.""" + + pass + + @dataclass class NdpRule: address_range: IPv6Network @@ -33,7 +41,13 @@ def __init__(self, host_network_interface: str): @staticmethod async def _restart_ndppd(): logger.debug("Restarting ndppd") - await run_in_subprocess(["systemctl", "restart", "ndppd"]) + try: + await run_in_subprocess(["systemctl", "restart", "ndppd"]) + except CalledProcessError as error: + if "died with ." in str(error): + raise NdpProxyTerminatedError("ndppd was terminated by a SIGTERM signal") from error + else: + raise async def _update_ndppd_conf(self): config = f"proxy {self.host_network_interface} {{\n" @@ -41,7 +55,18 @@ async def _update_ndppd_conf(self): config += f" rule {address_range} {{\n iface {interface}\n }}\n" config += "}\n" Path("/etc/ndppd.conf").write_text(config) - await self._restart_ndppd() + for attempt in range(3): + try: + await self._restart_ndppd() + break + except NdpProxyTerminatedError: + if attempt >= 2: + raise + logger.warning( + "ndppd was terminated by a SIGTERM signal while restarting. Waiting 5 seconds and retrying." + ) + await asyncio.sleep(5) + continue async def add_range(self, interface: str, address_range: IPv6Network): logger.debug("Proxying range %s -> %s", address_range, interface)