Convert to pixel data

2025-04-28 18:02:38 +02:00 · 2020-01-06 17:02:12 -08:00 · 2020-01-06 17:02:12 -08:00 · bcad2f6256
commit bcad2f6256
parent fe028e5f0c
5 changed files with 148 additions and 13 deletions
--- a/dangerzone/common.py
+++ b/dangerzone/common.py
@ -1,6 +1,7 @@
 import sys
 import os
 import inspect
 import tempfile
 class Common(object):
@ -9,7 +10,9 @@ class Common(object):
    """
    def __init__(self):
-        pass
+        # Temporary directory to store pixel data
        self.tmpdir = tempfile.TemporaryDirectory()
        print(f"tmpdir is: {self.tmpdir.name}")
    def get_resource_path(self, filename):
        if getattr(sys, "dangerzone_dev", False):
--- a/dangerzone/main_window.py
+++ b/dangerzone/main_window.py
@ -1,6 +1,6 @@
 from PyQt5 import QtCore, QtGui, QtWidgets
-from .tasks import PullImageTask, BuildContainerTask
+from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels
 class MainWindow(QtWidgets.QMainWindow):
@ -40,11 +40,11 @@ class MainWindow(QtWidgets.QMainWindow):
        central_widget.setLayout(layout)
        self.setCentralWidget(central_widget)
-        self.tasks = [PullImageTask, BuildContainerTask]
+        self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels]
    def start(self, filename):
        print(f"Input document: {filename}")
-        self.document_filename = filename
+        self.common.document_filename = filename
        self.show()
        self.next_task()
@ -52,12 +52,16 @@ class MainWindow(QtWidgets.QMainWindow):
    def next_task(self):
        if len(self.tasks) == 0:
            print("Tasks finished")
            self.task_label.setText("Tasks finished")
            return
        self.task_details.setText("")
        self.current_task = self.tasks.pop(0)(self.common)
        self.current_task.update_label.connect(self.update_label)
        self.current_task.update_details.connect(self.update_details)
-        self.current_task.thread_finished.connect(self.next_task)
+        self.current_task.task_finished.connect(self.next_task)
        self.current_task.task_failed.connect(self.task_failed)
        self.current_task.start()
    def update_label(self, s):
@ -66,10 +70,16 @@ class MainWindow(QtWidgets.QMainWindow):
    def update_details(self, s):
        self.task_details.setText(s)
    def task_failed(self, err):
        self.task_label.setText("Task failed :(")
        self.task_details.setWordWrap(True)
        self.task_details.setText(
            f"Temporary directory: {self.common.tmpdir.name}\n\n{err}"
        )
    def scroll_to_bottom(self, minimum, maximum):
        self.details_scrollarea.verticalScrollBar().setValue(maximum)
    def closeEvent(self, e):
        print("closing")
        e.accept()
        self.app.quit()
--- a/dangerzone/tasks.py
+++ b/dangerzone/tasks.py
@ -1,10 +1,13 @@
 import subprocess
 import time
 import tempfile
 import os
 from PyQt5 import QtCore, QtWidgets, QtGui
 class TaskBase(QtCore.QThread):
-    thread_finished = QtCore.pyqtSignal()
+    task_finished = QtCore.pyqtSignal()
    task_failed = QtCore.pyqtSignal(str)
    update_label = QtCore.pyqtSignal(str)
    update_details = QtCore.pyqtSignal(str)
@ -34,6 +37,8 @@ class TaskBase(QtCore.QThread):
            output += p.stdout.read()
            self.update_details.emit(output)
        return output
 class PullImageTask(TaskBase):
    def __init__(self, common):
@ -45,7 +50,7 @@ class PullImageTask(TaskBase):
        self.update_details.emit("")
        args = ["podman", "pull", "ubuntu:18.04"]
        self.execute_podman(args, watch="stderr")
-        self.thread_finished.emit()
+        self.task_finished.emit()
 class BuildContainerTask(TaskBase):
@ -59,4 +64,85 @@ class BuildContainerTask(TaskBase):
        self.update_details.emit("")
        args = ["podman", "build", "-t", "dangerzone", container_path]
        self.execute_podman(args)
-        self.thread_finished.emit()
+        self.task_finished.emit()
 class ConvertToPixels(TaskBase):
    def __init__(self, common):
        super(ConvertToPixels, self).__init__()
        self.common = common
        self.max_image_width = 10000
        self.max_image_height = 10000
        self.max_image_size = self.max_image_width * self.max_image_height * 3
    def run(self):
        self.update_label.emit("Converting document to pixels")
        args = [
            "podman",
            "run",
            "--network",
            "none",
            "-v",
            f"{self.common.document_filename}:/tmp/input_file",
            "-v",
            f"{self.common.tmpdir.name}:/dangerzone",
            "dangerzone",
            "/tmp/convert_to_pixels",
        ]
        output = self.execute_podman(args)
        # How many pages was that?
        num_pages = None
        for line in output.split("\n"):
            if line.startswith("Document has "):
                num_pages = line.split(" ")[2]
                break
        if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
            self.task_failed.emit("Invalid number of pages returned")
            return
        num_pages = int(num_pages)
        # Make sure we have the files we expect
        expected_filenames = []
        for i in range(1, num_pages + 1):
            expected_filenames += [
                f"page-{i}.rgb",
                f"page-{i}.width",
                f"page-{i}.height",
            ]
        expected_filenames.sort()
        actual_filenames = os.listdir(self.common.tmpdir.name)
        actual_filenames.sort()
        if expected_filenames != actual_filenames:
            self.task_failed.emit(
                f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}"
            )
            return
        # Make sure the files are the correct sizes
        for i in range(1, num_pages + 1):
            with open(f"{self.common.tmpdir.name}/page-{i}.width") as f:
                w_str = f.read().strip()
            with open(f"{self.common.tmpdir.name}/page-{i}.height") as f:
                h_str = f.read().strip()
            w = int(w_str)
            h = int(h_str)
            if (
                not w_str.isdigit()
                or not h_str.isdigit()
                or w <= 0
                or w > self.max_image_width
                or h <= 0
                or h > self.max_image_height
            ):
                self.task_failed.emit(f"Page {i} has invalid geometry")
                return
            # Make sure the RGB file is the correct size
            if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3:
                self.task_failed.emit(f"Page {i} has an invalid RGB file size")
                return
        self.task_finished.emit()
--- a/share/container/Containerfile
+++ b/share/container/Containerfile
@ -1,8 +1,6 @@
 FROM ubuntu:18.04
 RUN apt-get update && \
-    apt-get install -y poppler-utils imagemagick
+    apt-get install -y file poppler-utils imagemagick
 RUN useradd -ms /bin/bash user
 USER user:user
 COPY convert_to_pixels /tmp/convert_to_pixels
--- a/share/container/convert_to_pixels
+++ b/share/container/convert_to_pixels
@ -0,0 +1,38 @@
 #!/bin/bash
 IMG_DEPTH=8
 echo "Separating document into pages"
 pdfseparate /tmp/input_file /tmp/page-%d.pdf
 NUM_PAGES=$(find /tmp/page-*.pdf |wc -l)
 echo "Document has $NUM_PAGES pages"
 echo
 for FILENAME in $(find /tmp/page-*.pdf); do
    FILENAME_BASE=${FILENAME%.pdf}
    PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2)
    echo "Converting page $PAGE to pixels"
    # Convert to png
    pdftocairo "$FILENAME" -png -singlefile "$FILENAME_BASE"
    # Get the width and height
    IMG_WIDTH=$(identify -format "%w" "$FILENAME_BASE.png")
    IMG_HEIGHT=$(identify -format "%h" "$FILENAME_BASE.png")
    echo $IMG_WIDTH > $FILENAME_BASE.width
    echo $IMG_HEIGHT > $FILENAME_BASE.height
    # Convert to rgb
    convert "$FILENAME_BASE.png" -depth $IMG_DEPTH rgb:"$FILENAME_BASE.rgb"
    # Delete the png
    rm "$FILENAME_BASE.png"
    # Move files needed for the next step to the mounted volume
    mv "$FILENAME_BASE.rgb" /dangerzone
    mv "$FILENAME_BASE.width" /dangerzone
    mv "$FILENAME_BASE.height" /dangerzone
 done