diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py index 6f5753c..d817f2a 100644 --- a/dangerzone/conversion/errors.py +++ b/dangerzone/conversion/errors.py @@ -118,6 +118,11 @@ class InterruptedConversion(ConversionException): ) +class PixelFilesMismatch(ConversionException): + error_code = ERROR_SHIFT + 70 + error_message = "The number of pages received is different than expected" + + class UnexpectedConversionError(PDFtoPPMException): error_code = ERROR_SHIFT + 100 error_message = "Some unexpected error occurred while converting the document" diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 9efd439..70cae54 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -110,83 +110,20 @@ class IsolationProvider(ABC): """ Reconstruct PPM files and save as PNG to save space """ + if not (1 <= width <= errors.MAX_PAGE_WIDTH): + raise errors.MaxPageWidthException() + if not (1 <= height <= errors.MAX_PAGE_HEIGHT): + raise errors.MaxPageHeightException() + ppm_header = f"P6\n{width} {height}\n255\n".encode() ppm_data = io.BytesIO(ppm_header + rgb_data) png_path = Path(tempdir) / f"page-{page}.png" + # Verify the exact data was received + if len(rgb_data) != width * height * 3: + raise errors.InterruptedConversion() + try: Image.open(ppm_data).save(png_path, "PNG") except UnidentifiedImageError as e: raise errors.PPMtoPNGError() from e - - -# From global_common: - -# def validate_convert_to_pixel_output(self, common, output): -# """ -# Take the output from the convert to pixels tasks and validate it. Returns -# a tuple like: (success (boolean), error_message (str)) -# """ -# max_image_width = 10000 -# max_image_height = 10000 - -# # Did we hit an error? -# for line in output.split("\n"): -# if ( -# "failed:" in line -# or "The document format is not supported" in line -# or "Error" in line -# ): -# return False, output - -# # How many pages was that? -# num_pages = None -# for line in output.split("\n"): -# if line.startswith("Document has "): -# num_pages = line.split(" ")[2] -# break -# if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0: -# return False, "Invalid number of pages returned" -# num_pages = int(num_pages) - -# # Make sure we have the files we expect -# expected_filenames = [] -# for i in range(1, num_pages + 1): -# expected_filenames += [ -# f"page-{i}.rgb", -# f"page-{i}.width", -# f"page-{i}.height", -# ] -# expected_filenames.sort() -# actual_filenames = os.listdir(common.pixel_dir.name) -# actual_filenames.sort() - -# if expected_filenames != actual_filenames: -# return ( -# False, -# f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}", -# ) - -# # Make sure the files are the correct sizes -# for i in range(1, num_pages + 1): -# with open(f"{common.pixel_dir.name}/page-{i}.width") as f: -# w_str = f.read().strip() -# with open(f"{common.pixel_dir.name}/page-{i}.height") as f: -# h_str = f.read().strip() -# w = int(w_str) -# h = int(h_str) -# if ( -# not w_str.isdigit() -# or not h_str.isdigit() -# or w <= 0 -# or w > max_image_width -# or h <= 0 -# or h > max_image_height -# ): -# return False, f"Page {i} has invalid geometry" - -# # Make sure the RGB file is the correct size -# if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3: -# return False, f"Page {i} has an invalid RGB file size" - -# return True, True diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 9235c7f..388a160 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -306,6 +306,8 @@ class Container(IsolationProvider): ) num_pages = len(glob.glob(f"{pixel_dir}/page-*.rgb")) + self.verify_received_pixel_files(pixel_dir, num_pages) + for page in range(1, num_pages + 1): filename_base = f"{pixel_dir}/page-{page}" rgb_filename = f"{filename_base}.rgb" @@ -379,6 +381,25 @@ class Container(IsolationProvider): return success + def verify_received_pixel_files( + self, pixel_dir: pathlib.Path, num_pages: int + ) -> None: + """Make sure we have the files we expect""" + + expected_filenames = ["captured_output.txt"] + for i in range(1, num_pages + 1): + expected_filenames += [ + f"page-{i}.rgb", + f"page-{i}.width", + f"page-{i}.height", + ] + expected_filenames.sort() + actual_filenames = os.listdir(pixel_dir) + actual_filenames.sort() + + if expected_filenames != actual_filenames: + raise errors.PixelFilesMismatch() + def get_max_parallel_conversions(self) -> int: # FIXME hardcoded 1 until timeouts are more limited and better handled # https://github.com/freedomofpress/dangerzone/issues/257 diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index 9dc15e3..b4cb5db 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -109,10 +109,6 @@ class Qubes(IsolationProvider): width = read_int(self.proc.stdout, timeout=sw.remaining) height = read_int(self.proc.stdout, timeout=sw.remaining) - if not (1 <= width <= errors.MAX_PAGE_WIDTH): - raise errors.MaxPageWidthException() - if not (1 <= height <= errors.MAX_PAGE_HEIGHT): - raise errors.MaxPageHeightException() num_pixels = width * height * 3 # three color channels untrusted_pixels = read_bytes( diff --git a/tests/isolation_provider/base.py b/tests/isolation_provider/base.py index 52a7d0f..88bb32b 100644 --- a/tests/isolation_provider/base.py +++ b/tests/isolation_provider/base.py @@ -9,7 +9,13 @@ from dangerzone.document import Document from dangerzone.isolation_provider import base from dangerzone.isolation_provider.qubes import running_on_qubes -from .. import pdf_11k_pages, sanitized_text, uncommon_text +from .. import ( + pdf_11k_pages, + sample_bad_height, + sample_bad_width, + sanitized_text, + uncommon_text, +) @pytest.mark.skipif( @@ -68,3 +74,18 @@ class IsolationProviderTest: with pytest.raises(errors.MaxPagesException): success = provider._convert(doc, ocr_lang=None) assert not success + + def test_max_dimensions( + self, + sample_bad_width: str, + sample_bad_height: str, + provider: base.IsolationProvider, + mocker: MockerFixture, + ) -> None: + provider.progress_callback = mocker.MagicMock() + with pytest.raises(errors.MaxPageWidthException): + success = provider._convert(Document(sample_bad_width), ocr_lang=None) + assert not success + with pytest.raises(errors.MaxPageHeightException): + success = provider._convert(Document(sample_bad_height), ocr_lang=None) + assert not success diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index 4ad7831..da5dce8 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -9,7 +9,13 @@ from dangerzone.document import Document from dangerzone.isolation_provider.container import Container # XXX Fixtures used in abstract Test class need to be imported regardless -from .. import pdf_11k_pages, sanitized_text, uncommon_text +from .. import ( + pdf_11k_pages, + sample_bad_height, + sample_bad_width, + sanitized_text, + uncommon_text, +) from .base import IsolationProviderTest diff --git a/tests/isolation_provider/test_qubes.py b/tests/isolation_provider/test_qubes.py index 78ce9e2..ff0b37f 100644 --- a/tests/isolation_provider/test_qubes.py +++ b/tests/isolation_provider/test_qubes.py @@ -45,21 +45,6 @@ class TestQubes(IsolationProviderTest): success = provider._convert(doc, ocr_lang=None) assert not success - def test_max_dimensions( - self, - sample_bad_width: str, - sample_bad_height: str, - provider: Qubes, - mocker: MockerFixture, - ) -> None: - provider.progress_callback = mocker.MagicMock() - with pytest.raises(errors.MaxPageWidthException): - success = provider._convert(Document(sample_bad_width), ocr_lang=None) - assert not success - with pytest.raises(errors.MaxPageHeightException): - success = provider._convert(Document(sample_bad_height), ocr_lang=None) - assert not success - def test_out_of_ram( self, provider: Qubes,