From 80db7bb02e800eb4194bde1e01ba4f7c706ef696 Mon Sep 17 00:00:00 2001 From: deeplow Date: Tue, 19 Dec 2023 15:16:25 +0000 Subject: [PATCH] Remove pre-pymupdf exceptions and detect pymupdf ones --- dangerzone/conversion/doc_to_pixels.py | 15 ++++++++++----- dangerzone/conversion/errors.py | 24 +++--------------------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index 9c38212..6ac2dd0 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -174,7 +174,10 @@ class DocumentToPixels(DangerzoneConverter): # Convert input document to PDF conversion = conversions[mime_type] if conversion["type"] is None: - doc = fitz.open("/tmp/input_file", filetype=mime_type) + try: + doc = fitz.open("/tmp/input_file", filetype=mime_type) + except (ValueError, fitz.FileDataError): + raise errors.DocCorruptedException() elif conversion["type"] == "libreoffice": libreoffice_ext = conversion.get("libreoffice_ext", None) # Disable conversion for HWP/HWPX on specific platforms. See: @@ -207,11 +210,13 @@ class DocumentToPixels(DangerzoneConverter): # https://github.com/freedomofpress/dangerzone/issues/494 if not os.path.exists(pdf_filename): raise errors.LibreofficeFailure() - doc = fitz.open(pdf_filename) + try: + doc = fitz.open(pdf_filename) + except (ValueError, fitz.FileDataError): + raise errors.DocCorruptedException() else: - raise errors.InvalidGMConversion( - f"Invalid conversion type {conversion['type']} for MIME type {mime_type}" - ) + # NOTE: This should never be reached + raise errors.DocFormatUnsupported() self.percentage += 3 # Obtain number of pages diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py index 2aeb38d..459e533 100644 --- a/dangerzone/conversion/errors.py +++ b/dangerzone/conversion/errors.py @@ -47,12 +47,9 @@ class LibreofficeFailure(ConversionException): error_message = "Conversion to PDF with LibreOffice failed" -class InvalidGMConversion(ConversionException): +class DocCorruptedException(ConversionException): error_code = ERROR_SHIFT + 30 - error_message = "Invalid conversion (Graphics Magic)" - - def __init__(self, error_message: str) -> None: - super(error_message) + error_message = "The document appears to be corrupted and could not be opened" class PagesException(ConversionException): @@ -89,21 +86,6 @@ class PageCountMismatch(PagesException): ) -class PDFtoPPMException(ConversionException): - error_code = ERROR_SHIFT + 50 - error_message = "Error converting PDF to Pixels (pdftoppm)" - - -class PDFtoPPMInvalidHeader(PDFtoPPMException): - error_code = ERROR_SHIFT + 51 - error_message = "Error converting PDF to Pixels (Invalid PPM header)" - - -class PDFtoPPMInvalidDepth(PDFtoPPMException): - error_code = ERROR_SHIFT + 52 - error_message = "Error converting PDF to Pixels (Invalid PPM depth)" - - class InterruptedConversion(ConversionException): """Protocol received num of bytes different than expected""" @@ -113,7 +95,7 @@ class InterruptedConversion(ConversionException): ) -class UnexpectedConversionError(PDFtoPPMException): +class UnexpectedConversionError(ConversionException): error_code = ERROR_SHIFT + 100 error_message = "Some unexpected error occurred while converting the document"