Move the ConvertToPixels task validation into global_common so the CLI and GUI can share it

2025-04-28 18:02:38 +02:00 · 2021-06-09 16:32:06 -07:00 · 2021-06-09 16:32:06 -07:00 · 73d412501c
commit 73d412501c
parent 8aaf7ebcf1
3 changed files with 107 additions and 69 deletions
--- a/dangerzone/cli.py
+++ b/dangerzone/cli.py
@ -1,6 +1,7 @@
 import click
 from .global_common import GlobalCommon
 from .common import Common
 def exec_container(global_common, args):
@ -35,6 +36,7 @@ def exec_container(global_common, args):
@click.argument("filename", required=True)
 def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filename):
    global_common = GlobalCommon()
    common = Common()
    # Make sure custom container exists
    if custom_container:
@ -56,8 +58,34 @@ def cli_main(custom_container, safe_pdf_filename, ocr_lang, skip_update, filenam
                )
                return
    # Pull the latest image
    if not skip_update:
        click.echo("Pulling container image (this might take a few minutes)")
        returncode, _, _ = exec_container(global_common, ["pull"])
        if returncode != 0:
            return
    # Document to pixels
    click.echo("Converting document to pixels")
    returncode, output, _ = exec_container(
        global_common,
        [
            "documenttopixels",
            "--document-filename",
            common.document_filename,
            "--pixel-dir",
            common.pixel_dir.name,
            "--container-name",
            global_common.get_container_name(),
        ],
    )
    if returncode != 0:
        return
    success, error_message = global_common.validate_convert_to_pixel_output(
        common, output
    )
    if not success:
        click.echo(error_message)
        return
--- a/dangerzone/global_common.py
+++ b/dangerzone/global_common.py
@ -276,6 +276,10 @@ class GlobalCommon(object):
            return None
    def container_exists(self, container_name):
        """
        Check if container_name is a valid container. Returns a tuple like:
        (success (boolean), error_message (str))
        """
        # Do we have this container?
        with self.exec_dangerzone_container(
            ["ls", "--container-name", container_name]
@ -297,3 +301,72 @@ class GlobalCommon(object):
                return False, f"Container '{container_name}' not found"
        return True, True
    def validate_convert_to_pixel_output(self, common, output):
        """
        Take the output from the convert to pixels tasks and validate it. Returns
        a tuple like: (success (boolean), error_message (str))
        """
        max_image_width = 10000
        max_image_height = 10000
        # Did we hit an error?
        for line in output.split("\n"):
            if (
                "failed:" in line
                or "The document format is not supported" in line
                or "Error" in line
            ):
                return False, output
        # How many pages was that?
        num_pages = None
        for line in output.split("\n"):
            if line.startswith("Document has "):
                num_pages = line.split(" ")[2]
                break
        if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
            return False, "Invalid number of pages returned"
        num_pages = int(num_pages)
        # Make sure we have the files we expect
        expected_filenames = []
        for i in range(1, num_pages + 1):
            expected_filenames += [
                f"page-{i}.rgb",
                f"page-{i}.width",
                f"page-{i}.height",
            ]
        expected_filenames.sort()
        actual_filenames = os.listdir(common.pixel_dir.name)
        actual_filenames.sort()
        if expected_filenames != actual_filenames:
            return (
                False,
                f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}",
            )
        # Make sure the files are the correct sizes
        for i in range(1, num_pages + 1):
            with open(f"{common.pixel_dir.name}/page-{i}.width") as f:
                w_str = f.read().strip()
            with open(f"{common.pixel_dir.name}/page-{i}.height") as f:
                h_str = f.read().strip()
            w = int(w_str)
            h = int(h_str)
            if (
                not w_str.isdigit()
                or not h_str.isdigit()
                or w <= 0
                or w > max_image_width
                or h <= 0
                or h > max_image_height
            ):
                return False, f"Page {i} has invalid geometry"
            # Make sure the RGB file is the correct size
            if os.path.getsize(f"{common.pixel_dir.name}/page-{i}.rgb") != w * h * 3:
                return False, f"Page {i} has an invalid RGB file size"
        return True, True
--- a/dangerzone/gui/tasks.py
+++ b/dangerzone/gui/tasks.py
@ -65,10 +65,6 @@ class ConvertToPixels(TaskBase):
        self.global_common = global_common
        self.common = common
        self.max_image_width = 10000
        self.max_image_height = 10000
        self.max_image_size = self.max_image_width * self.max_image_height * 3
    def run(self):
        self.update_label.emit("Converting document to pixels")
        args = [
@ -80,76 +76,17 @@ class ConvertToPixels(TaskBase):
            "--container-name",
            self.global_common.get_container_name(),
        ]
-        returncode, output, stderr = self.exec_container(args)
+        returncode, output, _ = self.exec_container(args)
        if returncode != 0:
            return
-        # Did we hit an error?
+        success, error_message = self.global_common.validate_convert_to_pixel_output(
-        for line in output.split("\n"):
+            self.common, output
-            if (
+        )
-                "failed:" in line
+        if not success:
-                or "The document format is not supported" in line
+            self.task_failed.emit(error_message)
                or "Error" in line
            ):
                self.task_failed.emit(output)
                return
        # How many pages was that?
        num_pages = None
        for line in output.split("\n"):
            if line.startswith("Document has "):
                num_pages = line.split(" ")[2]
                break
        if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
            self.task_failed.emit("Invalid number of pages returned")
            return
        num_pages = int(num_pages)
        # Make sure we have the files we expect
        expected_filenames = []
        for i in range(1, num_pages + 1):
            expected_filenames += [
                f"page-{i}.rgb",
                f"page-{i}.width",
                f"page-{i}.height",
            ]
        expected_filenames.sort()
        actual_filenames = os.listdir(self.common.pixel_dir.name)
        actual_filenames.sort()
        if expected_filenames != actual_filenames:
            self.task_failed.emit(
                f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}"
            )
            return
        # Make sure the files are the correct sizes
        for i in range(1, num_pages + 1):
            with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f:
                w_str = f.read().strip()
            with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f:
                h_str = f.read().strip()
            w = int(w_str)
            h = int(h_str)
            if (
                not w_str.isdigit()
                or not h_str.isdigit()
                or w <= 0
                or w > self.max_image_width
                or h <= 0
                or h > self.max_image_height
            ):
                self.task_failed.emit(f"Page {i} has invalid geometry")
                return
            # Make sure the RGB file is the correct size
            if (
                os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb")
                != w * h * 3
            ):
                self.task_failed.emit(f"Page {i} has an invalid RGB file size")
                return
        self.task_finished.emit()