From 134dc27364ec5ae8c32de2be035da2705e927577 Mon Sep 17 00:00:00 2001 From: deeplow Date: Tue, 14 Feb 2023 11:33:55 +0000 Subject: [PATCH] Log container output in dev mode Additionally save a file with the container output next to the original file `[original].pdf.container_log`. This is needed in order to increase visibility of non-critical errors/warnings. Previously some issues went undetected because if this [1]. [1]: https://github.com/freedomofpress/dangerzone/issues/316 --- container/dangerzone.py | 12 ++++--- dangerzone/isolation_provider/container.py | 42 ++++++++++++++++++---- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/container/dangerzone.py b/container/dangerzone.py index 7285482..e38bd93 100644 --- a/container/dangerzone.py +++ b/container/dangerzone.py @@ -29,7 +29,9 @@ TIMEOUT_PER_PAGE: float = 10 # (seconds) TIMEOUT_PER_MB: float = 10 # (seconds) -async def read_stream(sr: asyncio.StreamReader, callback: Callable = None) -> bytes: +async def read_stream( + cmd_args: List[str], sr: asyncio.StreamReader, callback: Callable = None +) -> bytes: """Consume a byte stream line-by-line. Read all lines in a stream until EOF. If a user has passed a callback, call it for @@ -44,10 +46,10 @@ async def read_stream(sr: asyncio.StreamReader, callback: Callable = None) -> by line = await sr.readline() if sr.at_eof(): break + if os.environ.get("DZ_DEBUG_CONTAINER", "no") == "yes": + print(f"DEBUG:{cmd_args[0]}: {line.decode().rstrip()}") if callback is not None: callback(line) - # TODO: This would be a good place to log the received line, mostly for debug - # logging. buf += line return buf @@ -82,8 +84,8 @@ async def run_command( # Create asynchronous tasks that will consume the standard streams of the command, # and call callbacks if necessary. - stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback)) - stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback)) + stdout_task = asyncio.create_task(read_stream(args, proc.stdout, stdout_callback)) + stderr_task = asyncio.create_task(read_stream(args, proc.stderr, stderr_callback)) # Wait until the command has finished, for a specific timeout. Then, verify that the # command has completed successfully. In any other case, raise an exception. diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index fb4e4a0..8147d67 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -7,6 +7,7 @@ import pipes import platform import shutil import subprocess +import sys import tempfile from typing import Callable, List, Optional, Tuple @@ -134,17 +135,39 @@ class Container(IsolationProvider): return installed - def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]: + def log_container_output( + self, document: Document, line: str, error: bool = False + ) -> None: + if not getattr(sys, "dangerzone_dev", False): + # Skip logging when in production mode + return + if error: + line = f"error: {line}" + log.debug(f"[ CONTAINER ] {line.rstrip()}") + + # Log to .log file + if os.environ.get("DZ_LOG_CONTAINER", "no").lower() in ["yes", "true"]: + with open(f"{document.input_filename}.container_log", "a") as f: + f.write(f"{line.rstrip()}\n") + + def parse_progress( + self, document: Document, line: str + ) -> None | Tuple[bool, str, int]: """ Parses a line returned by the container. """ try: status = json.loads(line) except: - error_message = f"Invalid JSON returned from container:\n\n\t {line}" - log.error(error_message) - return (True, error_message, -1) + if line.startswith("DEBUG:") and getattr(sys, "dangerzone_dev", False): + self.log_container_output(document, line.lstrip("DEBUG:")) + return None + else: + error_message = f"Invalid JSON returned from container:\n\n\t {line}" + log.error(error_message) + return (True, error_message, -1) + self.log_container_output(document, status["text"], status["error"]) self.print_progress( document, status["error"], status["text"], status["percentage"] ) @@ -170,9 +193,11 @@ class Container(IsolationProvider): ) as p: if p.stdout is not None: for line in p.stdout: - (error, text, percentage) = self.parse_progress(document, line) - if stdout_callback: - stdout_callback(error, text, percentage) + progress_tuple = self.parse_progress(document, line) + if progress_tuple is not None: + (error, text, percentage) = progress_tuple + if stdout_callback: + stdout_callback(error, text, percentage) p.communicate() return p.returncode @@ -274,6 +299,9 @@ class Container(IsolationProvider): "-e", f"ENABLE_TIMEOUTS={self.enable_timeouts}", ] + if getattr(sys, "dangerzone_dev", False): + extra_args += ["-e", f"DZ_DEBUG_CONTAINER=yes"] + ret = self.exec_container(document, command, extra_args, stdout_callback) if ret != 0: log.error("documents-to-pixels failed")