From d6410652cb2de989f524fa1eca71d299cc32a5c1 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Thu, 26 Sep 2024 16:50:17 +0300 Subject: [PATCH] Kill the process group when conversion terminates Instead of killing just the invoked Podman/Docker/qrexec process, kill the whole process group, to make sure that other components that have been spawned die as well. In the case of Podman, conmon is one of the processes that lingers, so that's one way to kill it. --- dangerzone/isolation_provider/base.py | 37 +++++++++++++++++++++- dangerzone/isolation_provider/container.py | 9 ++++-- tests/isolation_provider/base.py | 10 +++--- tests/isolation_provider/test_container.py | 4 +-- 4 files changed, 50 insertions(+), 10 deletions(-) diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 6d9a082..0a46ce0 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -1,5 +1,8 @@ import contextlib import logging +import os +import platform +import signal import subprocess import sys import tempfile @@ -27,6 +30,38 @@ TIMEOUT_GRACE = 15 TIMEOUT_FORCE = 5 +def _signal_process_group(p: subprocess.Popen, signo: int) -> None: + """Send a signal to a process group.""" + try: + os.killpg(os.getpgid(p.pid), signo) + except (ProcessLookupError, PermissionError): + # If the process no longer exists, we may encounter the above errors, either + # when looking for the process group (ProcessLookupError), or when trying to + # kill a process group that no longer exists (PermissionError) + return + except Exception: + log.exception( + f"Unexpected error while sending signal {signo} to the" + f"document-to-pixels process group (PID: {p.pid})" + ) + + +def terminate_process_group(p: subprocess.Popen) -> None: + """Terminate a process group.""" + if platform.system() == "Windows": + p.terminate() + else: + _signal_process_group(p, signal.SIGTERM) + + +def kill_process_group(p: subprocess.Popen) -> None: + """Forcefully kill a process group.""" + if platform.system() == "Windows": + p.kill() + else: + _signal_process_group(p, signal.SIGKILL) + + def read_bytes(f: IO[bytes], size: int, exact: bool = True) -> bytes: """Read bytes from a file-like object.""" buf = f.read(size) @@ -240,7 +275,7 @@ class IsolationProvider(ABC): ) # Forcefully kill the running process. - p.kill() + kill_process_group(p) try: p.wait(timeout_force) except subprocess.TimeoutExpired: diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 166c285..fe6626c 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -13,7 +13,12 @@ from ..conversion import errors from ..document import Document from ..util import get_tmp_dir # NOQA : required for mocking in our tests. from ..util import get_resource_path, get_subprocess_startupinfo -from .base import PIXELS_TO_PDF_LOG_END, PIXELS_TO_PDF_LOG_START, IsolationProvider +from .base import ( + PIXELS_TO_PDF_LOG_END, + PIXELS_TO_PDF_LOG_START, + IsolationProvider, + terminate_process_group, +) TIMEOUT_KILL = 5 # Timeout in seconds until the kill command returns. @@ -436,7 +441,7 @@ class Container(IsolationProvider): # # See also https://github.com/freedomofpress/dangerzone/issues/791 self.kill_container(self.doc_to_pixels_container_name(document)) - p.terminate() + terminate_process_group(p) def ensure_stop_doc_to_pixels_proc( # type: ignore [no-untyped-def] self, document: Document, *args, **kwargs diff --git a/tests/isolation_provider/base.py b/tests/isolation_provider/base.py index 8e4a236..1bf2d91 100644 --- a/tests/isolation_provider/base.py +++ b/tests/isolation_provider/base.py @@ -140,14 +140,14 @@ class IsolationProviderTermination: terminate_proc_mock = mocker.patch.object( provider_wait, "terminate_doc_to_pixels_proc", return_value=None ) - popen_kill_spy = mocker.spy(subprocess.Popen, "kill") + kill_pg_spy = mocker.spy(base, "kill_process_group") with provider_wait.doc_to_pixels_proc(doc, timeout_grace=0) as proc: pass get_proc_exception_spy.assert_not_called() terminate_proc_mock.assert_called() - popen_kill_spy.assert_called() + kill_pg_spy.assert_called() assert proc.poll() is not None def test_linger_unkillable( @@ -165,8 +165,8 @@ class IsolationProviderTermination: terminate_proc_mock = mocker.patch.object( provider_wait, "terminate_doc_to_pixels_proc", return_value=None ) - popen_kill_mock = mocker.patch.object( - subprocess.Popen, "kill", return_value=None + kill_pg_mock = mocker.patch( + "dangerzone.isolation_provider.base.kill_process_group", return_value=None ) with provider_wait.doc_to_pixels_proc( @@ -176,7 +176,7 @@ class IsolationProviderTermination: get_proc_exception_spy.assert_not_called() terminate_proc_mock.assert_called() - popen_kill_mock.assert_called() + kill_pg_mock.assert_called() assert proc.poll() is None # Reset the function to the original state. diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index 2828ca3..bb07aa9 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -65,7 +65,7 @@ class TestContainerTermination(IsolationProviderTermination): provider_wait.progress_callback = mocker.MagicMock() get_proc_exception_spy = mocker.spy(provider_wait, "get_proc_exception") terminate_proc_spy = mocker.spy(provider_wait, "terminate_doc_to_pixels_proc") - popen_kill_spy = mocker.spy(subprocess.Popen, "kill") + kill_proc_spy = mocker.spy(base, "kill_process_group") # Switch the subprocess.run() function with a patched function that # intercepts the `kill` command and switches it with `wait` instead. This way, @@ -98,5 +98,5 @@ class TestContainerTermination(IsolationProviderTermination): get_proc_exception_spy.assert_not_called() terminate_proc_spy.assert_called() - popen_kill_spy.assert_called() + kill_proc_spy.assert_called() assert proc.poll() is not None