From ad97a60002e93f3b260f4f69df5e20b2652019f8 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Wed, 19 Jul 2023 14:05:24 +0200 Subject: [PATCH 1/6] add KernelManager.exit_status --- jupyter_client/manager.py | 11 ++++++++++ tests/test_kernelmanager.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/jupyter_client/manager.py b/jupyter_client/manager.py index f04bd987..8626407d 100644 --- a/jupyter_client/manager.py +++ b/jupyter_client/manager.py @@ -655,6 +655,17 @@ async def _async_is_alive(self) -> bool: is_alive = run_sync(_async_is_alive) + async def _async_exit_status(self) -> int | None: + """Returns 0 if there's no kernel or it exited gracefully, + None if the kernel is running, or a negative value `-N` if the + kernel was killed by signal `N` (posix only).""" + if not self.has_kernel: + return 0 + assert self.provisioner is not None + return await self.provisioner.poll() + + exit_status = run_sync(_async_exit_status) + async def _async_wait(self, pollinterval: float = 0.1) -> None: # Use busy loop at 100ms intervals, polling until the process is # not alive. If we find the process is no longer alive, complete diff --git a/tests/test_kernelmanager.py b/tests/test_kernelmanager.py index f2d749eb..989d5198 100644 --- a/tests/test_kernelmanager.py +++ b/tests/test_kernelmanager.py @@ -160,6 +160,46 @@ async def test_async_signal_kernel_subprocesses(self, name, install, expected): assert km._shutdown_status in expected +class TestKernelManagerExitStatus: + @pytest.mark.skipif(sys.platform == "win32", reason="Windows doesn't support signals") + @pytest.mark.parametrize('_signal', [signal.SIGHUP, signal.SIGTERM, signal.SIGKILL]) + async def test_exit_status(self, _signal): + # install kernel + _install_kernel(name="test_exit_status") + + # start kernel + km, kc = start_new_kernel(kernel_name="test_exit_status") + + # stop restarter - not needed? + # km.stop_restarter() + + # check that process is running + assert km.exit_status() is None + + # get the provisioner + # send signal + provisioner = km.provisioner + assert provisioner is not None + assert provisioner.has_process + await provisioner.send_signal(_signal) + + # wait for the process to exit + try: + await asyncio.wait_for(km._async_wait(), timeout=3.0) + except TimeoutError: + assert False, f'process never stopped for signal {signal}' + + # check that the signal is correct + assert km.exit_status() == -_signal + + # doing a proper shutdown now wipes the status, might be bad? + km.shutdown_kernel(now=True) + assert km.exit_status() == 0 + + # stop channels so cleanup doesn't complain + kc.stop_channels() + + class TestKernelManager: def test_lifecycle(self, km): km.start_kernel(stdout=PIPE, stderr=PIPE) From b6b5cf0a16992d281d2f80cdf15a7fe79631386b Mon Sep 17 00:00:00 2001 From: jakkdl Date: Thu, 20 Jul 2023 12:16:59 +0200 Subject: [PATCH 2/6] fix CI, test signals on windows --- jupyter_client/manager.py | 2 +- tests/test_kernelmanager.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/jupyter_client/manager.py b/jupyter_client/manager.py index 8626407d..7ad2002b 100644 --- a/jupyter_client/manager.py +++ b/jupyter_client/manager.py @@ -655,7 +655,7 @@ async def _async_is_alive(self) -> bool: is_alive = run_sync(_async_is_alive) - async def _async_exit_status(self) -> int | None: + async def _async_exit_status(self) -> t.Optional[int]: """Returns 0 if there's no kernel or it exited gracefully, None if the kernel is running, or a negative value `-N` if the kernel was killed by signal `N` (posix only).""" diff --git a/tests/test_kernelmanager.py b/tests/test_kernelmanager.py index 989d5198..65b787a1 100644 --- a/tests/test_kernelmanager.py +++ b/tests/test_kernelmanager.py @@ -161,8 +161,7 @@ async def test_async_signal_kernel_subprocesses(self, name, install, expected): class TestKernelManagerExitStatus: - @pytest.mark.skipif(sys.platform == "win32", reason="Windows doesn't support signals") - @pytest.mark.parametrize('_signal', [signal.SIGHUP, signal.SIGTERM, signal.SIGKILL]) + @pytest.mark.parametrize('_signal', [signal.SIGILL, signal.SIGSEGV, signal.SIGTERM]) async def test_exit_status(self, _signal): # install kernel _install_kernel(name="test_exit_status") From b6816eaf47291fc0ae40406c80300f5e6f1c7027 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Thu, 17 Aug 2023 12:18:22 +0200 Subject: [PATCH 3/6] add accepts_exit_code parameter to add_restart_callback and make restarter pass the exit code to callback when restarting --- jupyter_client/ioloop/restarter.py | 8 +++---- jupyter_client/manager.py | 10 ++++++-- jupyter_client/restarter.py | 37 ++++++++++++++++++++++-------- tests/test_kernelmanager.py | 1 + tests/test_restarter.py | 8 +++---- 5 files changed, 45 insertions(+), 19 deletions(-) diff --git a/jupyter_client/ioloop/restarter.py b/jupyter_client/ioloop/restarter.py index d0c70396..6a3b9fe2 100644 --- a/jupyter_client/ioloop/restarter.py +++ b/jupyter_client/ioloop/restarter.py @@ -55,9 +55,9 @@ async def poll(self): """Poll the kernel.""" if self.debug: self.log.debug("Polling kernel...") - is_alive = await self.kernel_manager.is_alive() + exit_status = await self.kernel_manager.exit_status() now = time.time() - if not is_alive: + if exit_status is not None: self._last_dead = now if self._restarting: self._restart_count += 1 @@ -66,7 +66,7 @@ async def poll(self): if self._restart_count > self.restart_limit: self.log.warning("AsyncIOLoopKernelRestarter: restart failed") - self._fire_callbacks("dead") + self._fire_callbacks("dead", exit_status) self._restarting = False self._restart_count = 0 self.stop() @@ -78,7 +78,7 @@ async def poll(self): self.restart_limit, "new" if newports else "keep", ) - self._fire_callbacks("restart") + self._fire_callbacks("restart", exit_status) await self.kernel_manager.restart_kernel(now=True, newports=newports) self._restarting = True else: diff --git a/jupyter_client/manager.py b/jupyter_client/manager.py index 7ad2002b..ecd69a6d 100644 --- a/jupyter_client/manager.py +++ b/jupyter_client/manager.py @@ -231,11 +231,17 @@ def stop_restarter(self) -> None: """Stop the kernel restarter.""" pass - def add_restart_callback(self, callback: t.Callable, event: str = "restart") -> None: + def add_restart_callback( + self, + callback: t.Callable[[], object] | t.Callable[[int], object], + event: str = "restart", + *, + accepts_exit_code: bool = False, + ) -> None: """Register a callback to be called when a kernel is restarted""" if self._restarter is None: return - self._restarter.add_callback(callback, event) + self._restarter.add_callback(callback, event, accepts_exit_code=accepts_exit_code) def remove_restart_callback(self, callback: t.Callable, event: str = "restart") -> None: """Unregister a callback to be called when a kernel is restarted""" diff --git a/jupyter_client/restarter.py b/jupyter_client/restarter.py index 194ba907..26bf8da6 100644 --- a/jupyter_client/restarter.py +++ b/jupyter_client/restarter.py @@ -7,7 +7,9 @@ """ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. +import functools import time +import typing as t from traitlets import Bool, Dict, Float, Instance, Integer, default from traitlets.config.configurable import LoggingConfigurable @@ -55,7 +57,8 @@ class KernelRestarter(LoggingConfigurable): def _default_last_dead(self): return time.time() - callbacks = Dict() + # traitlets.Dict is not typed generic + callbacks: t.Dict[str, t.List[t.Callable[[int], object]]] = Dict() # type: ignore def _callbacks_default(self): return {"restart": [], "dead": []} @@ -70,8 +73,14 @@ def stop(self): msg = "Must be implemented in a subclass" raise NotImplementedError(msg) - def add_callback(self, f, event="restart"): - """register a callback to fire on a particular event + def add_callback( + self, + f: t.Callable[[], object] | t.Callable[[int], object], + event: str = "restart", + *, + accepts_exit_code: bool = False, + ) -> None: + """register a callback to fire on a particular event. If ``accepts_exit_code`` is set, the callable will be passed the exit code as reported by `KernelManager.exit_status` Possible values for event: @@ -79,7 +88,16 @@ def add_callback(self, f, event="restart"): 'dead': restart has failed, kernel will be left dead. """ - self.callbacks[event].append(f) + if not accepts_exit_code: + + @functools.wraps(f) + def ignore_exit_code(code: int) -> object: + return f() # type: ignore[call-arg] + + f = ignore_exit_code + self.callbacks[event].append(f) + + self.callbacks[event].append(f) # type: ignore[arg-type] def remove_callback(self, f, event="restart"): """unregister a callback to fire on a particular event @@ -95,11 +113,11 @@ def remove_callback(self, f, event="restart"): except ValueError: pass - def _fire_callbacks(self, event): + def _fire_callbacks(self, event, status): """fire our callbacks for a particular event""" for callback in self.callbacks[event]: try: - callback() + callback(status) except Exception: self.log.error( "KernelRestarter: %s callback %r failed", @@ -115,7 +133,8 @@ def poll(self): self.log.debug("Kernel shutdown in progress...") return now = time.time() - if not self.kernel_manager.is_alive(): + status = self.kernel_manager.exit_status() + if status is not None: self._last_dead = now if self._restarting: self._restart_count += 1 @@ -124,7 +143,7 @@ def poll(self): if self._restart_count > self.restart_limit: self.log.warning("KernelRestarter: restart failed") - self._fire_callbacks("dead") + self._fire_callbacks("dead", status) self._restarting = False self._restart_count = 0 self.stop() @@ -136,7 +155,7 @@ def poll(self): self.restart_limit, "new" if newports else "keep", ) - self._fire_callbacks("restart") + self._fire_callbacks("restart", status) self.kernel_manager.restart_kernel(now=True, newports=newports) self._restarting = True else: diff --git a/tests/test_kernelmanager.py b/tests/test_kernelmanager.py index 65b787a1..986bdda1 100644 --- a/tests/test_kernelmanager.py +++ b/tests/test_kernelmanager.py @@ -161,6 +161,7 @@ async def test_async_signal_kernel_subprocesses(self, name, install, expected): class TestKernelManagerExitStatus: + @pytest.mark.skipif(sys.platform == "win32", reason="Windows doesn't support signals") @pytest.mark.parametrize('_signal', [signal.SIGILL, signal.SIGSEGV, signal.SIGTERM]) async def test_exit_status(self, _signal): # install kernel diff --git a/tests/test_restarter.py b/tests/test_restarter.py index b216842f..bd428162 100644 --- a/tests/test_restarter.py +++ b/tests/test_restarter.py @@ -88,7 +88,7 @@ def debug_logging(): @win_skip async def test_restart_check(config, install_kernel, debug_logging): """Test that the kernel is restarted and recovers""" - # If this test failes, run it with --log-cli-level=DEBUG to inspect + # If this test fails, run it with --log-cli-level=DEBUG to inspect N_restarts = 1 config.KernelRestarter.restart_limit = N_restarts config.KernelRestarter.debug = True @@ -144,7 +144,7 @@ def cb(): @win_skip async def test_restarter_gives_up(config, install_fail_kernel, debug_logging): """Test that the restarter gives up after reaching the restart limit""" - # If this test failes, run it with --log-cli-level=DEBUG to inspect + # If this test fails, run it with --log-cli-level=DEBUG to inspect N_restarts = 1 config.KernelRestarter.restart_limit = N_restarts config.KernelRestarter.debug = True @@ -188,7 +188,7 @@ def on_death(): async def test_async_restart_check(config, install_kernel, debug_logging): """Test that the kernel is restarted and recovers""" - # If this test failes, run it with --log-cli-level=DEBUG to inspect + # If this test fails, run it with --log-cli-level=DEBUG to inspect N_restarts = 1 config.KernelRestarter.restart_limit = N_restarts config.KernelRestarter.debug = True @@ -243,7 +243,7 @@ def cb(): async def test_async_restarter_gives_up(config, install_slow_fail_kernel, debug_logging): """Test that the restarter gives up after reaching the restart limit""" - # If this test failes, run it with --log-cli-level=DEBUG to inspect + # If this test fails, run it with --log-cli-level=DEBUG to inspect N_restarts = 2 config.KernelRestarter.restart_limit = N_restarts config.KernelRestarter.debug = True From 5f7214c54ea7587588f19f16ae04b6a287f3698f Mon Sep 17 00:00:00 2001 From: jakkdl Date: Thu, 17 Aug 2023 13:05:28 +0200 Subject: [PATCH 4/6] the wrapper seems to have broken tests, so doing a slightly messier implementation instead --- jupyter_client/restarter.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/jupyter_client/restarter.py b/jupyter_client/restarter.py index 26bf8da6..a0564879 100644 --- a/jupyter_client/restarter.py +++ b/jupyter_client/restarter.py @@ -7,7 +7,6 @@ """ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. -import functools import time import typing as t @@ -58,7 +57,7 @@ def _default_last_dead(self): return time.time() # traitlets.Dict is not typed generic - callbacks: t.Dict[str, t.List[t.Callable[[int], object]]] = Dict() # type: ignore + callbacks: t.Dict[str, t.List[t.Tuple[t.Callable[[int], object], t.Literal[True]] | t.Tuple[t.Callable[[], object], t.Literal[False]]]] = Dict() # type: ignore[assignment] def _callbacks_default(self): return {"restart": [], "dead": []} @@ -88,16 +87,8 @@ def add_callback( 'dead': restart has failed, kernel will be left dead. """ - if not accepts_exit_code: - - @functools.wraps(f) - def ignore_exit_code(code: int) -> object: - return f() # type: ignore[call-arg] - - f = ignore_exit_code - self.callbacks[event].append(f) - - self.callbacks[event].append(f) # type: ignore[arg-type] + # no dynamic validation that the callable is valid in accordance to accepts_exit_code + self.callbacks[event].append((f, accepts_exit_code)) # type: ignore[arg-type] def remove_callback(self, f, event="restart"): """unregister a callback to fire on a particular event @@ -115,14 +106,18 @@ def remove_callback(self, f, event="restart"): def _fire_callbacks(self, event, status): """fire our callbacks for a particular event""" + # unpacking in the loop breaks the connection between the variables for mypy for callback in self.callbacks[event]: try: - callback(status) + if callback[1] is True: + callback[0](status) + else: + callback[0]() except Exception: self.log.error( "KernelRestarter: %s callback %r failed", event, - callback, + callback[0], exc_info=True, ) From 25586f59d63176a7ce8ca89df7c6c57179dbbed8 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Thu, 17 Aug 2023 13:15:33 +0200 Subject: [PATCH 5/6] add overloads for add_callback --- jupyter_client/restarter.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/jupyter_client/restarter.py b/jupyter_client/restarter.py index a0564879..fcdcc411 100644 --- a/jupyter_client/restarter.py +++ b/jupyter_client/restarter.py @@ -72,6 +72,26 @@ def stop(self): msg = "Must be implemented in a subclass" raise NotImplementedError(msg) + @t.overload + def add_callback( + self, + f: t.Callable[[int], object], + event: str = "restart", + *, + accepts_exit_code: t.Literal[True], + ) -> None: + ... + + @t.overload + def add_callback( + self, + f: t.Callable[[], object], + event: str = "restart", + *, + accepts_exit_code: t.Literal[False] = False, + ) -> None: + ... + def add_callback( self, f: t.Callable[[], object] | t.Callable[[int], object], @@ -87,7 +107,7 @@ def add_callback( 'dead': restart has failed, kernel will be left dead. """ - # no dynamic validation that the callable is valid in accordance to accepts_exit_code + # the type correlation from overloads is not tracked to here by mypy self.callbacks[event].append((f, accepts_exit_code)) # type: ignore[arg-type] def remove_callback(self, f, event="restart"): From 117b66305e0529ce0d9278ad79130e3db2d30ed3 Mon Sep 17 00:00:00 2001 From: jakkdl Date: Thu, 17 Aug 2023 13:18:56 +0200 Subject: [PATCH 6/6] add test_restart_check_exit_status --- tests/test_restarter.py | 62 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/tests/test_restarter.py b/tests/test_restarter.py index bd428162..078a300e 100644 --- a/tests/test_restarter.py +++ b/tests/test_restarter.py @@ -4,7 +4,9 @@ import asyncio import json import os +import signal import sys +import typing as t from concurrent.futures import Future import pytest @@ -95,9 +97,9 @@ async def test_restart_check(config, install_kernel, debug_logging): km = IOLoopKernelManager(kernel_name=install_kernel, config=config) cbs = 0 - restarts: list = [Future() for i in range(N_restarts)] + restarts: t.List[Future[bool]] = [Future() for i in range(N_restarts)] - def cb(): + def cb() -> None: nonlocal cbs if cbs >= N_restarts: raise RuntimeError("Kernel restarted more than %d times!" % N_restarts) @@ -141,6 +143,62 @@ def cb(): assert km.context.closed +@win_skip +async def test_restart_check_exit_status(config, install_kernel, debug_logging): + """Test that the kernel is restarted and recovers, and validates the exit code.""" + # If this test fails, run it with --log-cli-level=DEBUG to inspect + N_restarts = 1 + config.KernelRestarter.restart_limit = N_restarts + config.KernelRestarter.debug = True + km = IOLoopKernelManager(kernel_name=install_kernel, config=config) + + cbs = 0 + restarts: t.List[Future[int]] = [Future() for i in range(N_restarts)] + + def cb(exit_status: int) -> None: + nonlocal cbs + if cbs >= N_restarts: + raise RuntimeError("Kernel restarted more than %d times!" % N_restarts) + restarts[cbs].set_result(exit_status) + cbs += 1 + + try: + km.start_kernel() + km.add_restart_callback(cb, 'restart', accepts_exit_code=True) + except BaseException: + if km.has_kernel: + km.shutdown_kernel() + raise + + try: + for i in range(N_restarts + 1): + kc = km.client() + kc.start_channels() + kc.wait_for_ready(timeout=60) + kc.stop_channels() + if i < N_restarts: + # Kill without cleanup to simulate crash: + assert km.provisioner is not None + await km.provisioner.kill() + assert restarts[i].result() == -signal.SIGKILL + # Wait for kill + restart + max_wait = 10.0 + waited = 0.0 + while waited < max_wait and km.is_alive(): + await asyncio.sleep(0.1) + waited += 0.1 + while waited < max_wait and not km.is_alive(): + await asyncio.sleep(0.1) + waited += 0.1 + + assert cbs == N_restarts + assert km.is_alive() + + finally: + km.shutdown_kernel(now=True) + assert km.context.closed + + @win_skip async def test_restarter_gives_up(config, install_fail_kernel, debug_logging): """Test that the restarter gives up after reaching the restart limit"""