From d66af442b324435271cd3e446d1677803c8afbfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20M=C3=A9taireau?= Date: Tue, 26 Nov 2024 17:19:34 +0100 Subject: [PATCH 1/2] Add a --debug flag to the CLI to help retrieve more logs. When the flag is set, the `RUNSC_DEBUG=1` environment variable is added to the outer container --- dangerzone/cli.py | 8 +++++++- dangerzone/isolation_provider/base.py | 16 ++++++++++++---- dangerzone/isolation_provider/container.py | 5 +++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/dangerzone/cli.py b/dangerzone/cli.py index 8f68e62..7a0acfb 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -42,6 +42,11 @@ def print_header(s: str) -> None: type=click.UNPROCESSED, callback=args.validate_input_filenames, ) +@click.option( + "--debug", + "debug", + flag_value=True, + help="Run Dangerzone in debug mode, to get logs from gVisor.") @click.version_option(version=get_version(), message="%(version)s") @errors.handle_document_errors def cli_main( @@ -50,6 +55,7 @@ def cli_main( filenames: List[str], archive: bool, dummy_conversion: bool, + debug: bool, ) -> None: setup_logging() @@ -58,7 +64,7 @@ def cli_main( elif is_qubes_native_conversion(): dangerzone = DangerzoneCore(Qubes()) else: - dangerzone = DangerzoneCore(Container()) + dangerzone = DangerzoneCore(Container(debug=debug)) display_banner() if len(filenames) == 1 and output_filename: diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 7b353b7..59c8b60 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -86,12 +86,20 @@ class IsolationProvider(ABC): Abstracts an isolation provider """ - def __init__(self) -> None: - if getattr(sys, "dangerzone_dev", False) is True: + def __init__(self, debug: bool = False) -> None: + self.debug = debug + if self.should_capture_stderr(): self.proc_stderr = subprocess.PIPE else: self.proc_stderr = subprocess.DEVNULL + def should_capture_stderr(self) -> bool: + return self.debug or getattr(sys, "dangerzone_dev", False) + + @staticmethod + def is_runtime_available() -> bool: + return True + @abstractmethod def install(self) -> bool: pass @@ -344,9 +352,9 @@ class IsolationProvider(ABC): ) # Read the stderr of the process only if: - # * Dev mode is enabled. + # * We're in debug mode # * The process has exited (else we risk hanging). - if getattr(sys, "dangerzone_dev", False) and p.poll() is not None: + if self.should_capture_stderr() and p.poll() is not None: assert p.stderr debug_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS) log.info( diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 1a08385..8d3ca05 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -168,6 +168,10 @@ class Container(IsolationProvider): ) -> subprocess.Popen: container_runtime = container_utils.get_runtime() security_args = self.get_runtime_security_args() + debug_args = [] + if self.debug: + debug_args += ["-e", "RUNSC_DEBUG=1"] + enable_stdin = ["-i"] set_name = ["--name", name] prevent_leakage_args = ["--rm"] @@ -177,6 +181,7 @@ class Container(IsolationProvider): args = ( ["run"] + security_args + + debug_args + prevent_leakage_args + enable_stdin + set_name From bea256d6f13728e178cbb2973912a56c32ccc0fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexis=20M=C3=A9taireau?= Date: Wed, 18 Dec 2024 16:40:18 +0100 Subject: [PATCH 2/2] Use a separate thread to read stderr when needed --- dangerzone/isolation_provider/base.py | 23 ++++++++++++++++++++++ dangerzone/isolation_provider/container.py | 17 +++++++++++++++- dangerzone/logic.py | 14 +++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 59c8b60..23a8314 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -5,6 +5,8 @@ import platform import signal import subprocess import sys +import threading +from queue import Queue from abc import ABC, abstractmethod from typing import IO, Callable, Iterator, Optional @@ -92,6 +94,7 @@ class IsolationProvider(ABC): self.proc_stderr = subprocess.PIPE else: self.proc_stderr = subprocess.DEVNULL + self.stderr_queue = Queue() def should_capture_stderr(self) -> bool: return self.debug or getattr(sys, "dangerzone_dev", False) @@ -363,3 +366,23 @@ class IsolationProvider(ABC): f"{debug_log}" # no need for an extra newline here f"{DOC_TO_PIXELS_LOG_END}" ) + + def _stream_stderr(self, stderr, queue: Queue) -> None: + """Read stderr in a separate thread to avoid blocking""" + try: + for line in stderr: + queue.put(line) + if self.debug: + log.debug(line.decode().strip()) + except (ValueError, IOError) as e: + log.debug(f"Stderr stream closed: {e}") + + def start_stderr_thread(self, process: subprocess.Popen) -> None: + """Start a thread to read stderr from the process""" + if process.stderr: + stderr_thread = threading.Thread( + target=self._stream_stderr, + args=(process.stderr, self.stderr_queue), + daemon=True + ) + stderr_thread.start() diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 8d3ca05..881914a 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -189,7 +189,22 @@ class Container(IsolationProvider): + command ) args = [container_runtime] + args - return self.exec(args) + args_str = " ".join(shlex.quote(s) for s in args) + log.info("> " + args_str) + + process = subprocess.Popen( + args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, # Always pipe stderr + startupinfo=startupinfo, + start_new_session=True, + ) + + # Start stderr reader thread + self.start_stderr_thread(process) + + return process def kill_container(self, name: str) -> None: """Terminate a spawned container. diff --git a/dangerzone/logic.py b/dangerzone/logic.py index 786fc44..9bf8e05 100644 --- a/dangerzone/logic.py +++ b/dangerzone/logic.py @@ -66,11 +66,25 @@ class DangerzoneCore(object): ) -> None: def convert_doc(document: Document) -> None: try: + # Clear any existing stderr output + while not self.isolation_provider.stderr_queue.empty(): + self.isolation_provider.stderr_queue.get_nowait() + self.isolation_provider.convert( document, ocr_lang, stdout_callback, ) + + # Process any stderr output that was captured + while not self.isolation_provider.stderr_queue.empty(): + try: + line = self.isolation_provider.stderr_queue.get_nowait() + if stdout_callback: + stdout_callback(True, line.decode().strip(), -1) + except Exception as e: + log.error(f"Error processing stderr: {e}") + except Exception: log.exception( f"Unexpected error occurred while converting '{document}'"