mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 09:52:37 +02:00
Add page limit of 10000
Theoretically the max pages would be 65536 (2byte unsigned int. However this limit is much higher than practical documents have and larger ones can lead to unforseen problems, for example RAM limitations. We thus opted to use a lower limit of 10K. The limit must be detected client-side, given that the server is distrusted. However we also check it in the server, just as a fail-early mechanism.
This commit is contained in:
parent
afba362d22
commit
54b8ffbf96
8 changed files with 51 additions and 6 deletions
|
@ -239,6 +239,9 @@ class DocumentToPixels(DangerzoneConverter):
|
|||
else:
|
||||
raise errors.NoPageCountException()
|
||||
|
||||
if num_pages > errors.MAX_PAGES:
|
||||
raise errors.MaxPagesException()
|
||||
|
||||
# Get a more precise timeout, based on the number of pages
|
||||
timeout = self.calculate_timeout(size, num_pages)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ from typing import List, Optional, Type
|
|||
|
||||
# XXX: errors start at 128 for conversion-related issues
|
||||
ERROR_SHIFT = 128
|
||||
MAX_PAGES = 10000
|
||||
|
||||
|
||||
class ConversionException(Exception):
|
||||
|
@ -53,6 +54,14 @@ class NoPageCountException(PagesException):
|
|||
error_message = "Number of pages could not be extracted from PDF"
|
||||
|
||||
|
||||
class MaxPagesException(PagesException):
|
||||
"""Max number of pages enforced by the client (to fail early) but also the
|
||||
server, which distrusts the client"""
|
||||
|
||||
error_code = ERROR_SHIFT + 42
|
||||
error_message = f"Number of pages exceeds maximum ({MAX_PAGES})"
|
||||
|
||||
|
||||
class PDFtoPPMException(ConversionException):
|
||||
error_code = ERROR_SHIFT + 50
|
||||
error_message = "Error converting PDF to Pixels (pdftoppm)"
|
||||
|
|
|
@ -125,9 +125,8 @@ class Qubes(IsolationProvider):
|
|||
os.set_blocking(self.proc.stdout.fileno(), False)
|
||||
|
||||
n_pages = read_int(self.proc.stdout, timeout)
|
||||
if n_pages == 0:
|
||||
# FIXME: Fail loudly in that case
|
||||
return False
|
||||
if n_pages == 0 or n_pages > errors.MAX_PAGES:
|
||||
raise errors.MaxPagesException()
|
||||
if ocr_lang:
|
||||
percentage_per_page = 50.0 / n_pages
|
||||
else:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Callable, List
|
||||
|
||||
|
@ -13,7 +14,11 @@ SAMPLE_DIRECTORY = "test_docs"
|
|||
BASIC_SAMPLE_PDF = "sample-pdf.pdf"
|
||||
BASIC_SAMPLE_DOC = "sample-doc.doc"
|
||||
SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external"
|
||||
SAMPLE_COMPRESSED_DIRECTORY = "test_docs_compressed"
|
||||
|
||||
test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY)
|
||||
test_docs_compressed_dir = Path(__file__).parent.joinpath(SAMPLE_COMPRESSED_DIRECTORY)
|
||||
|
||||
test_docs = [
|
||||
p
|
||||
for p in test_docs_dir.rglob("*")
|
||||
|
@ -73,6 +78,20 @@ def unreadable_pdf(tmp_path: Path) -> str:
|
|||
return str(file_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pdf_11k_pages(tmp_path: Path) -> str:
|
||||
"""11K page document with pages of 1x1 px. Generated with the command:
|
||||
|
||||
gs -sDEVICE=pdfwrite -o sample-11k-pages.pdf -dDEVICEWIDTHPOINTS=1 -dDEVICEHEIGHTPOINTS=1 -c 11000 {showpage} repeat
|
||||
"""
|
||||
|
||||
filename = "sample-11k-pages.pdf"
|
||||
zip_path = test_docs_compressed_dir / f"{filename}.zip"
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_file:
|
||||
zip_file.extractall(tmp_path)
|
||||
return str(tmp_path / filename)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def uncommon_text() -> str:
|
||||
"""Craft a string with Unicode characters that are considered not common.
|
||||
|
|
|
@ -2,10 +2,11 @@ import pytest
|
|||
from colorama import Style
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from dangerzone.conversion import errors
|
||||
from dangerzone.document import Document
|
||||
from dangerzone.isolation_provider import base
|
||||
|
||||
from .. import sanitized_text, uncommon_text
|
||||
from .. import pdf_11k_pages, sanitized_text, uncommon_text
|
||||
|
||||
|
||||
class IsolationProviderTest:
|
||||
|
@ -48,3 +49,15 @@ class IsolationProviderTest:
|
|||
else:
|
||||
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
|
||||
log_error_spy.assert_not_called()
|
||||
|
||||
def test_max_pages_received(
|
||||
self,
|
||||
pdf_11k_pages: str,
|
||||
provider: base.IsolationProvider,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
provider.progress_callback = mocker.MagicMock()
|
||||
doc = Document(pdf_11k_pages)
|
||||
with pytest.raises(errors.MaxPagesException):
|
||||
success = provider._convert(doc, ocr_lang=None)
|
||||
assert not success
|
||||
|
|
|
@ -8,7 +8,8 @@ from pytest_mock import MockerFixture
|
|||
from dangerzone.document import Document
|
||||
from dangerzone.isolation_provider.container import Container
|
||||
|
||||
from .. import sanitized_text, uncommon_text
|
||||
# XXX Fixtures used in abstract Test class need to be imported regardless
|
||||
from .. import pdf_11k_pages, sanitized_text, uncommon_text
|
||||
from .base import IsolationProviderTest
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,8 @@ import pytest
|
|||
|
||||
from dangerzone.isolation_provider.qubes import Qubes
|
||||
|
||||
from .. import sanitized_text, uncommon_text
|
||||
# XXX Fixtures used in abstract Test class need to be imported regardless
|
||||
from .. import pdf_11k_pages, sanitized_text, uncommon_text
|
||||
from .base import IsolationProviderTest
|
||||
|
||||
|
||||
|
|
BIN
tests/test_docs_compressed/sample-11k-pages.pdf.zip
Normal file
BIN
tests/test_docs_compressed/sample-11k-pages.pdf.zip
Normal file
Binary file not shown.
Loading…
Reference in a new issue