From b4c3e07d3638e0850feb34786a7c9125886afcdf Mon Sep 17 00:00:00 2001 From: deeplow Date: Wed, 30 Aug 2023 13:33:09 +0100 Subject: [PATCH] Remove attacker-controlled error messages Creates exceptions in the server code to be shared with the client via an identifying exit code. These exceptions are then reconstructed in the client. Refs #456 but does not completely fix it. Unexpected exceptions and progress descriptions are still passed in Containers. --- dangerzone/conversion/doc_to_pixels.py | 22 +++--- .../conversion/doc_to_pixels_qubes_wrapper.py | 10 ++- dangerzone/conversion/errors.py | 73 +++++++++++++++++++ dangerzone/isolation_provider/base.py | 6 +- dangerzone/isolation_provider/container.py | 4 + dangerzone/isolation_provider/qubes.py | 10 ++- 6 files changed, 111 insertions(+), 14 deletions(-) create mode 100644 dangerzone/conversion/errors.py diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index e8dd530..d442c1b 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -13,10 +13,11 @@ import os import re import shutil import sys -from typing import Dict, Optional +from typing import Dict, List, Optional import magic +from . import errors from .common import DangerzoneConverter, running_on_qubes @@ -138,7 +139,7 @@ class DocumentToPixels(DangerzoneConverter): # Validate MIME type if mime_type not in conversions: - raise ValueError("The document format is not supported") + raise errors.DocFormatUnsupported() # Temporary fix for the HWPX format # Should be removed after new release of `file' (current release 5.44) @@ -167,7 +168,7 @@ class DocumentToPixels(DangerzoneConverter): # https://github.com/freedomofpress/dangerzone/issues/494 # https://github.com/freedomofpress/dangerzone/issues/498 if libreoffice_ext == "h2orestart.oxt" and running_on_qubes(): - raise ValueError("HWP / HWPX formats are not supported in Qubes") + raise errors.DocFormatUnsupportedHWPQubes() if libreoffice_ext: await self.install_libreoffice_ext(libreoffice_ext) self.update_progress("Converting to PDF using LibreOffice") @@ -196,7 +197,7 @@ class DocumentToPixels(DangerzoneConverter): # # https://github.com/freedomofpress/dangerzone/issues/494 if not os.path.exists(pdf_filename): - raise ValueError("Conversion to PDF with LibreOffice failed") + raise errors.LibreofficeFailure() elif conversion["type"] == "convert": self.update_progress("Converting to PDF using GraphicsMagick") args = [ @@ -216,7 +217,7 @@ class DocumentToPixels(DangerzoneConverter): ) pdf_filename = "/tmp/input_file.pdf" else: - raise ValueError( + raise errors.InvalidGMConversion( f"Invalid conversion type {conversion['type']} for MIME type {mime_type}" ) self.percentage += 3 @@ -236,7 +237,7 @@ class DocumentToPixels(DangerzoneConverter): if search is not None: num_pages: int = int(search.group(1)) else: - raise ValueError("Number of pages could not be extracted from PDF") + raise errors.NoPageCountException() # Get a more precise timeout, based on the number of pages timeout = self.calculate_timeout(size, num_pages) @@ -283,7 +284,7 @@ class DocumentToPixels(DangerzoneConverter): # Read the header header = f.readline().decode().strip() if header != "P6": - raise ValueError("Invalid PPM header") + raise errors.PDFtoPPMInvalidHeader() # Save the width and height dims = f.readline().decode().strip() @@ -296,7 +297,7 @@ class DocumentToPixels(DangerzoneConverter): maxval = int(f.readline().decode().strip()) # Check that the depth is 8 if maxval != 255: - raise ValueError("Invalid PPM depth") + raise errors.PDFtoPPMInvalidDepth() data = f.read() @@ -370,10 +371,11 @@ async def main() -> int: try: await converter.convert() error_code = 0 # Success! - except (RuntimeError, TimeoutError, ValueError) as e: + except errors.ConversionException as e: # Expected Errors + error_code = e.error_code + except (RuntimeError, TimeoutError, ValueError) as e: # Unexpected Errors converter.update_progress(str(e), error=True) error_code = 1 - if not running_on_qubes(): # Write debug information (containers version) with open("/tmp/dangerzone/captured_output.txt", "wb") as container_log: diff --git a/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py b/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py index 676b018..e4ef2dc 100644 --- a/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py +++ b/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py @@ -6,6 +6,7 @@ import tempfile from pathlib import Path from typing import Optional, TextIO +from . import errors from .doc_to_pixels import DocumentToPixels @@ -67,8 +68,13 @@ async def main() -> None: with open("/tmp/input_file", "wb") as f: f.write(data) - converter = DocumentToPixels() - await converter.convert() + try: + converter = DocumentToPixels() + await converter.convert() + except errors.ConversionException as e: + sys.exit(e.error_code) + except (RuntimeError, TimeoutError, ValueError) as e: + sys.exit(1) num_pages = len(list(out_dir.glob("*.rgb"))) await write_int(num_pages) diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py new file mode 100644 index 0000000..f86e27c --- /dev/null +++ b/dangerzone/conversion/errors.py @@ -0,0 +1,73 @@ +from typing import List, Optional, Type + + +class ConversionException(Exception): + error_message = "Unspecified error" + error_code = -1 + + def __init__(self, error_message: Optional[str] = None) -> None: + if error_message: + self.error_message = error_message + super().__init__(self.error_message) + + @classmethod + def get_subclasses(cls) -> List[Type["ConversionException"]]: + subclasses = [cls] + for subclass in cls.__subclasses__(): + subclasses += subclass.get_subclasses() + return subclasses + + +class DocFormatUnsupported(ConversionException): + error_code = 10 + error_message = "The document format is not supported" + + +class DocFormatUnsupportedHWPQubes(DocFormatUnsupported): + error_code = 16 + error_message = "HWP / HWPX formats are not supported in Qubes" + + +class LibreofficeFailure(ConversionException): + error_code = 20 + error_message = "Conversion to PDF with LibreOffice failed" + + +class InvalidGMConversion(ConversionException): + error_code = 30 + error_message = "Invalid conversion (Graphics Magic)" + + def __init__(self, error_message: str) -> None: + super(error_message) + + +class PagesException(ConversionException): + error_code = 40 + + +class NoPageCountException(PagesException): + error_code = 41 + error_message = "Number of pages could not be extracted from PDF" + + +class PDFtoPPMException(ConversionException): + error_code = 50 + error_message = "Error converting PDF to Pixels (pdftoppm)" + + +class PDFtoPPMInvalidHeader(PDFtoPPMException): + error_code = 51 + error_message = "Error converting PDF to Pixels (Invalid PPM header)" + + +class PDFtoPPMInvalidDepth(PDFtoPPMException): + error_code = 52 + error_message = "Error converting PDF to Pixels (Invalid PPM depth)" + + +def exception_from_error_code(error_code: int) -> Optional[ConversionException]: + """returns the conversion exception corresponding to the error code""" + for cls in ConversionException.get_subclasses(): + if cls.error_code == error_code: + return cls() + raise ValueError(f"Unknown error code '{error_code}'") diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 00552c9..c3484b3 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -5,6 +5,7 @@ from typing import Callable, Optional from colorama import Fore, Style +from ..conversion.errors import ConversionException from ..document import Document from ..util import replace_control_chars @@ -36,7 +37,10 @@ class IsolationProvider(ABC): document.mark_as_converting() try: success = self._convert(document, ocr_lang) - except Exception: + except ConversionException as e: + success = False + self.print_progress_trusted(document, True, e.error_message, 0) + except Exception as e: success = False log.exception( f"An exception occurred while converting document '{document.id}'" diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 9d00985..7ef6026 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -11,6 +11,7 @@ import sys import tempfile from typing import Any, Callable, List, Optional, Tuple +from ..conversion.errors import exception_from_error_code from ..document import Document from ..util import ( get_resource_path, @@ -305,6 +306,9 @@ class Container(IsolationProvider): if ret != 0: log.error("documents-to-pixels failed") + + # XXX Reconstruct exception from error code + raise exception_from_error_code(ret) # type: ignore [misc] else: # TODO: validate convert to pixels output diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index 8b56ae5..75b7ed5 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -20,6 +20,7 @@ from .base import IsolationProvider log = logging.getLogger(__name__) from ..conversion.common import running_on_qubes +from ..conversion.errors import exception_from_error_code from ..conversion.pixels_to_pdf import PixelsToPDF from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir from .base import MAX_CONVERSION_LOG_CHARS @@ -38,6 +39,8 @@ def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes: def read_int(p: subprocess.Popen) -> int: """Read 2 bytes from stdout, and decode them as int.""" untrusted_int = p.stdout.read(2) # type: ignore [union-attr] + if untrusted_int == b"": + raise ValueError("Nothing read from stdout (expected an integer)") return int.from_bytes(untrusted_int, signed=False) @@ -104,7 +107,12 @@ class Qubes(IsolationProvider): stderr=subprocess.DEVNULL, ) - n_pages = read_int(p) + try: + n_pages = read_int(p) + except ValueError: + error_code = p.wait() + # XXX Reconstruct exception from error code + raise exception_from_error_code(error_code) # type: ignore [misc] if n_pages == 0: # FIXME: Fail loudly in that case return False