diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index 4bf135c..96d1f05 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -239,6 +239,9 @@ class DocumentToPixels(DangerzoneConverter): else: raise errors.NoPageCountException() + if num_pages > errors.MAX_PAGES: + raise errors.MaxPagesException() + # Get a more precise timeout, based on the number of pages timeout = self.calculate_timeout(size, num_pages) diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py index f7c6cbb..97d5e91 100644 --- a/dangerzone/conversion/errors.py +++ b/dangerzone/conversion/errors.py @@ -2,6 +2,7 @@ from typing import List, Optional, Type # XXX: errors start at 128 for conversion-related issues ERROR_SHIFT = 128 +MAX_PAGES = 10000 class ConversionException(Exception): @@ -53,6 +54,14 @@ class NoPageCountException(PagesException): error_message = "Number of pages could not be extracted from PDF" +class MaxPagesException(PagesException): + """Max number of pages enforced by the client (to fail early) but also the + server, which distrusts the client""" + + error_code = ERROR_SHIFT + 42 + error_message = f"Number of pages exceeds maximum ({MAX_PAGES})" + + class PDFtoPPMException(ConversionException): error_code = ERROR_SHIFT + 50 error_message = "Error converting PDF to Pixels (pdftoppm)" diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index 01214df..b9af677 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -125,9 +125,8 @@ class Qubes(IsolationProvider): os.set_blocking(self.proc.stdout.fileno(), False) n_pages = read_int(self.proc.stdout, timeout) - if n_pages == 0: - # FIXME: Fail loudly in that case - return False + if n_pages == 0 or n_pages > errors.MAX_PAGES: + raise errors.MaxPagesException() if ocr_lang: percentage_per_page = 50.0 / n_pages else: diff --git a/tests/__init__.py b/tests/__init__.py index 57e7d0b..35d2e83 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ import sys +import zipfile from pathlib import Path from typing import Callable, List @@ -13,7 +14,11 @@ SAMPLE_DIRECTORY = "test_docs" BASIC_SAMPLE_PDF = "sample-pdf.pdf" BASIC_SAMPLE_DOC = "sample-doc.doc" SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external" +SAMPLE_COMPRESSED_DIRECTORY = "test_docs_compressed" + test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY) +test_docs_compressed_dir = Path(__file__).parent.joinpath(SAMPLE_COMPRESSED_DIRECTORY) + test_docs = [ p for p in test_docs_dir.rglob("*") @@ -73,6 +78,20 @@ def unreadable_pdf(tmp_path: Path) -> str: return str(file_path) +@pytest.fixture +def pdf_11k_pages(tmp_path: Path) -> str: + """11K page document with pages of 1x1 px. Generated with the command: + + gs -sDEVICE=pdfwrite -o sample-11k-pages.pdf -dDEVICEWIDTHPOINTS=1 -dDEVICEHEIGHTPOINTS=1 -c 11000 {showpage} repeat + """ + + filename = "sample-11k-pages.pdf" + zip_path = test_docs_compressed_dir / f"{filename}.zip" + with zipfile.ZipFile(zip_path, "r") as zip_file: + zip_file.extractall(tmp_path) + return str(tmp_path / filename) + + @pytest.fixture def uncommon_text() -> str: """Craft a string with Unicode characters that are considered not common. diff --git a/tests/isolation_provider/base.py b/tests/isolation_provider/base.py index c6a12c9..2fcdb61 100644 --- a/tests/isolation_provider/base.py +++ b/tests/isolation_provider/base.py @@ -2,10 +2,11 @@ import pytest from colorama import Style from pytest_mock import MockerFixture +from dangerzone.conversion import errors from dangerzone.document import Document from dangerzone.isolation_provider import base -from .. import sanitized_text, uncommon_text +from .. import pdf_11k_pages, sanitized_text, uncommon_text class IsolationProviderTest: @@ -48,3 +49,15 @@ class IsolationProviderTest: else: assert log_info_spy.call_args[0][0].endswith(sanitized_text) log_error_spy.assert_not_called() + + def test_max_pages_received( + self, + pdf_11k_pages: str, + provider: base.IsolationProvider, + mocker: MockerFixture, + ) -> None: + provider.progress_callback = mocker.MagicMock() + doc = Document(pdf_11k_pages) + with pytest.raises(errors.MaxPagesException): + success = provider._convert(doc, ocr_lang=None) + assert not success diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index a0424a9..4ad7831 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -8,7 +8,8 @@ from pytest_mock import MockerFixture from dangerzone.document import Document from dangerzone.isolation_provider.container import Container -from .. import sanitized_text, uncommon_text +# XXX Fixtures used in abstract Test class need to be imported regardless +from .. import pdf_11k_pages, sanitized_text, uncommon_text from .base import IsolationProviderTest diff --git a/tests/isolation_provider/test_qubes.py b/tests/isolation_provider/test_qubes.py index 4d968b6..42d23eb 100644 --- a/tests/isolation_provider/test_qubes.py +++ b/tests/isolation_provider/test_qubes.py @@ -2,7 +2,8 @@ import pytest from dangerzone.isolation_provider.qubes import Qubes -from .. import sanitized_text, uncommon_text +# XXX Fixtures used in abstract Test class need to be imported regardless +from .. import pdf_11k_pages, sanitized_text, uncommon_text from .base import IsolationProviderTest diff --git a/tests/test_docs_compressed/sample-11k-pages.pdf.zip b/tests/test_docs_compressed/sample-11k-pages.pdf.zip new file mode 100644 index 0000000..dd7b7ee Binary files /dev/null and b/tests/test_docs_compressed/sample-11k-pages.pdf.zip differ