Add page limit of 10000

Theoretically the max pages would be 65536 (2byte unsigned int.
However this limit is much higher than practical documents have
and larger ones can lead to unforseen problems, for example RAM
limitations.

We thus opted to use a lower limit of 10K. The limit must be
detected client-side, given that the server is distrusted. However
we also check it in the server, just as a fail-early mechanism.
This commit is contained in:
deeplow 2023-08-29 11:56:45 +01:00
parent afba362d22
commit 54b8ffbf96
No known key found for this signature in database
GPG key ID: 577982871529A52A
8 changed files with 51 additions and 6 deletions

View file

@ -239,6 +239,9 @@ class DocumentToPixels(DangerzoneConverter):
else: else:
raise errors.NoPageCountException() raise errors.NoPageCountException()
if num_pages > errors.MAX_PAGES:
raise errors.MaxPagesException()
# Get a more precise timeout, based on the number of pages # Get a more precise timeout, based on the number of pages
timeout = self.calculate_timeout(size, num_pages) timeout = self.calculate_timeout(size, num_pages)

View file

@ -2,6 +2,7 @@ from typing import List, Optional, Type
# XXX: errors start at 128 for conversion-related issues # XXX: errors start at 128 for conversion-related issues
ERROR_SHIFT = 128 ERROR_SHIFT = 128
MAX_PAGES = 10000
class ConversionException(Exception): class ConversionException(Exception):
@ -53,6 +54,14 @@ class NoPageCountException(PagesException):
error_message = "Number of pages could not be extracted from PDF" error_message = "Number of pages could not be extracted from PDF"
class MaxPagesException(PagesException):
"""Max number of pages enforced by the client (to fail early) but also the
server, which distrusts the client"""
error_code = ERROR_SHIFT + 42
error_message = f"Number of pages exceeds maximum ({MAX_PAGES})"
class PDFtoPPMException(ConversionException): class PDFtoPPMException(ConversionException):
error_code = ERROR_SHIFT + 50 error_code = ERROR_SHIFT + 50
error_message = "Error converting PDF to Pixels (pdftoppm)" error_message = "Error converting PDF to Pixels (pdftoppm)"

View file

@ -125,9 +125,8 @@ class Qubes(IsolationProvider):
os.set_blocking(self.proc.stdout.fileno(), False) os.set_blocking(self.proc.stdout.fileno(), False)
n_pages = read_int(self.proc.stdout, timeout) n_pages = read_int(self.proc.stdout, timeout)
if n_pages == 0: if n_pages == 0 or n_pages > errors.MAX_PAGES:
# FIXME: Fail loudly in that case raise errors.MaxPagesException()
return False
if ocr_lang: if ocr_lang:
percentage_per_page = 50.0 / n_pages percentage_per_page = 50.0 / n_pages
else: else:

View file

@ -1,4 +1,5 @@
import sys import sys
import zipfile
from pathlib import Path from pathlib import Path
from typing import Callable, List from typing import Callable, List
@ -13,7 +14,11 @@ SAMPLE_DIRECTORY = "test_docs"
BASIC_SAMPLE_PDF = "sample-pdf.pdf" BASIC_SAMPLE_PDF = "sample-pdf.pdf"
BASIC_SAMPLE_DOC = "sample-doc.doc" BASIC_SAMPLE_DOC = "sample-doc.doc"
SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external" SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external"
SAMPLE_COMPRESSED_DIRECTORY = "test_docs_compressed"
test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY) test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY)
test_docs_compressed_dir = Path(__file__).parent.joinpath(SAMPLE_COMPRESSED_DIRECTORY)
test_docs = [ test_docs = [
p p
for p in test_docs_dir.rglob("*") for p in test_docs_dir.rglob("*")
@ -73,6 +78,20 @@ def unreadable_pdf(tmp_path: Path) -> str:
return str(file_path) return str(file_path)
@pytest.fixture
def pdf_11k_pages(tmp_path: Path) -> str:
"""11K page document with pages of 1x1 px. Generated with the command:
gs -sDEVICE=pdfwrite -o sample-11k-pages.pdf -dDEVICEWIDTHPOINTS=1 -dDEVICEHEIGHTPOINTS=1 -c 11000 {showpage} repeat
"""
filename = "sample-11k-pages.pdf"
zip_path = test_docs_compressed_dir / f"{filename}.zip"
with zipfile.ZipFile(zip_path, "r") as zip_file:
zip_file.extractall(tmp_path)
return str(tmp_path / filename)
@pytest.fixture @pytest.fixture
def uncommon_text() -> str: def uncommon_text() -> str:
"""Craft a string with Unicode characters that are considered not common. """Craft a string with Unicode characters that are considered not common.

View file

@ -2,10 +2,11 @@ import pytest
from colorama import Style from colorama import Style
from pytest_mock import MockerFixture from pytest_mock import MockerFixture
from dangerzone.conversion import errors
from dangerzone.document import Document from dangerzone.document import Document
from dangerzone.isolation_provider import base from dangerzone.isolation_provider import base
from .. import sanitized_text, uncommon_text from .. import pdf_11k_pages, sanitized_text, uncommon_text
class IsolationProviderTest: class IsolationProviderTest:
@ -48,3 +49,15 @@ class IsolationProviderTest:
else: else:
assert log_info_spy.call_args[0][0].endswith(sanitized_text) assert log_info_spy.call_args[0][0].endswith(sanitized_text)
log_error_spy.assert_not_called() log_error_spy.assert_not_called()
def test_max_pages_received(
self,
pdf_11k_pages: str,
provider: base.IsolationProvider,
mocker: MockerFixture,
) -> None:
provider.progress_callback = mocker.MagicMock()
doc = Document(pdf_11k_pages)
with pytest.raises(errors.MaxPagesException):
success = provider._convert(doc, ocr_lang=None)
assert not success

View file

@ -8,7 +8,8 @@ from pytest_mock import MockerFixture
from dangerzone.document import Document from dangerzone.document import Document
from dangerzone.isolation_provider.container import Container from dangerzone.isolation_provider.container import Container
from .. import sanitized_text, uncommon_text # XXX Fixtures used in abstract Test class need to be imported regardless
from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .base import IsolationProviderTest from .base import IsolationProviderTest

View file

@ -2,7 +2,8 @@ import pytest
from dangerzone.isolation_provider.qubes import Qubes from dangerzone.isolation_provider.qubes import Qubes
from .. import sanitized_text, uncommon_text # XXX Fixtures used in abstract Test class need to be imported regardless
from .. import pdf_11k_pages, sanitized_text, uncommon_text
from .base import IsolationProviderTest from .base import IsolationProviderTest

Binary file not shown.