diff --git a/dangerzone/common.py b/dangerzone/common.py index 6bbdd0d..0f91bf7 100644 --- a/dangerzone/common.py +++ b/dangerzone/common.py @@ -1,6 +1,7 @@ import sys import os import inspect +import tempfile class Common(object): @@ -9,7 +10,9 @@ class Common(object): """ def __init__(self): - pass + # Temporary directory to store pixel data + self.tmpdir = tempfile.TemporaryDirectory() + print(f"tmpdir is: {self.tmpdir.name}") def get_resource_path(self, filename): if getattr(sys, "dangerzone_dev", False): diff --git a/dangerzone/main_window.py b/dangerzone/main_window.py index 3760929..c166b87 100644 --- a/dangerzone/main_window.py +++ b/dangerzone/main_window.py @@ -1,6 +1,6 @@ from PyQt5 import QtCore, QtGui, QtWidgets -from .tasks import PullImageTask, BuildContainerTask +from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels class MainWindow(QtWidgets.QMainWindow): @@ -40,11 +40,11 @@ class MainWindow(QtWidgets.QMainWindow): central_widget.setLayout(layout) self.setCentralWidget(central_widget) - self.tasks = [PullImageTask, BuildContainerTask] + self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels] def start(self, filename): print(f"Input document: {filename}") - self.document_filename = filename + self.common.document_filename = filename self.show() self.next_task() @@ -52,12 +52,16 @@ class MainWindow(QtWidgets.QMainWindow): def next_task(self): if len(self.tasks) == 0: print("Tasks finished") + self.task_label.setText("Tasks finished") return + self.task_details.setText("") + self.current_task = self.tasks.pop(0)(self.common) self.current_task.update_label.connect(self.update_label) self.current_task.update_details.connect(self.update_details) - self.current_task.thread_finished.connect(self.next_task) + self.current_task.task_finished.connect(self.next_task) + self.current_task.task_failed.connect(self.task_failed) self.current_task.start() def update_label(self, s): @@ -66,10 +70,16 @@ class MainWindow(QtWidgets.QMainWindow): def update_details(self, s): self.task_details.setText(s) + def task_failed(self, err): + self.task_label.setText("Task failed :(") + self.task_details.setWordWrap(True) + self.task_details.setText( + f"Temporary directory: {self.common.tmpdir.name}\n\n{err}" + ) + def scroll_to_bottom(self, minimum, maximum): self.details_scrollarea.verticalScrollBar().setValue(maximum) def closeEvent(self, e): - print("closing") e.accept() self.app.quit() diff --git a/dangerzone/tasks.py b/dangerzone/tasks.py index 40247ed..5f428b1 100644 --- a/dangerzone/tasks.py +++ b/dangerzone/tasks.py @@ -1,10 +1,13 @@ import subprocess import time +import tempfile +import os from PyQt5 import QtCore, QtWidgets, QtGui class TaskBase(QtCore.QThread): - thread_finished = QtCore.pyqtSignal() + task_finished = QtCore.pyqtSignal() + task_failed = QtCore.pyqtSignal(str) update_label = QtCore.pyqtSignal(str) update_details = QtCore.pyqtSignal(str) @@ -34,6 +37,8 @@ class TaskBase(QtCore.QThread): output += p.stdout.read() self.update_details.emit(output) + return output + class PullImageTask(TaskBase): def __init__(self, common): @@ -45,7 +50,7 @@ class PullImageTask(TaskBase): self.update_details.emit("") args = ["podman", "pull", "ubuntu:18.04"] self.execute_podman(args, watch="stderr") - self.thread_finished.emit() + self.task_finished.emit() class BuildContainerTask(TaskBase): @@ -59,4 +64,85 @@ class BuildContainerTask(TaskBase): self.update_details.emit("") args = ["podman", "build", "-t", "dangerzone", container_path] self.execute_podman(args) - self.thread_finished.emit() + self.task_finished.emit() + + +class ConvertToPixels(TaskBase): + def __init__(self, common): + super(ConvertToPixels, self).__init__() + self.common = common + + self.max_image_width = 10000 + self.max_image_height = 10000 + self.max_image_size = self.max_image_width * self.max_image_height * 3 + + def run(self): + self.update_label.emit("Converting document to pixels") + args = [ + "podman", + "run", + "--network", + "none", + "-v", + f"{self.common.document_filename}:/tmp/input_file", + "-v", + f"{self.common.tmpdir.name}:/dangerzone", + "dangerzone", + "/tmp/convert_to_pixels", + ] + output = self.execute_podman(args) + + # How many pages was that? + num_pages = None + for line in output.split("\n"): + if line.startswith("Document has "): + num_pages = line.split(" ")[2] + break + if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0: + self.task_failed.emit("Invalid number of pages returned") + return + num_pages = int(num_pages) + + # Make sure we have the files we expect + expected_filenames = [] + for i in range(1, num_pages + 1): + expected_filenames += [ + f"page-{i}.rgb", + f"page-{i}.width", + f"page-{i}.height", + ] + expected_filenames.sort() + actual_filenames = os.listdir(self.common.tmpdir.name) + actual_filenames.sort() + + if expected_filenames != actual_filenames: + self.task_failed.emit( + f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}" + ) + return + + # Make sure the files are the correct sizes + for i in range(1, num_pages + 1): + with open(f"{self.common.tmpdir.name}/page-{i}.width") as f: + w_str = f.read().strip() + with open(f"{self.common.tmpdir.name}/page-{i}.height") as f: + h_str = f.read().strip() + w = int(w_str) + h = int(h_str) + if ( + not w_str.isdigit() + or not h_str.isdigit() + or w <= 0 + or w > self.max_image_width + or h <= 0 + or h > self.max_image_height + ): + self.task_failed.emit(f"Page {i} has invalid geometry") + return + + # Make sure the RGB file is the correct size + if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3: + self.task_failed.emit(f"Page {i} has an invalid RGB file size") + return + + self.task_finished.emit() diff --git a/share/container/Containerfile b/share/container/Containerfile index 91c2c03..6b3e243 100644 --- a/share/container/Containerfile +++ b/share/container/Containerfile @@ -1,8 +1,6 @@ FROM ubuntu:18.04 RUN apt-get update && \ - apt-get install -y poppler-utils imagemagick - -RUN useradd -ms /bin/bash user -USER user:user + apt-get install -y file poppler-utils imagemagick +COPY convert_to_pixels /tmp/convert_to_pixels diff --git a/share/container/convert_to_pixels b/share/container/convert_to_pixels new file mode 100755 index 0000000..5154723 --- /dev/null +++ b/share/container/convert_to_pixels @@ -0,0 +1,38 @@ +#!/bin/bash + +IMG_DEPTH=8 + +echo "Separating document into pages" + +pdfseparate /tmp/input_file /tmp/page-%d.pdf + +NUM_PAGES=$(find /tmp/page-*.pdf |wc -l) +echo "Document has $NUM_PAGES pages" +echo + +for FILENAME in $(find /tmp/page-*.pdf); do + FILENAME_BASE=${FILENAME%.pdf} + PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2) + + echo "Converting page $PAGE to pixels" + + # Convert to png + pdftocairo "$FILENAME" -png -singlefile "$FILENAME_BASE" + + # Get the width and height + IMG_WIDTH=$(identify -format "%w" "$FILENAME_BASE.png") + IMG_HEIGHT=$(identify -format "%h" "$FILENAME_BASE.png") + echo $IMG_WIDTH > $FILENAME_BASE.width + echo $IMG_HEIGHT > $FILENAME_BASE.height + + # Convert to rgb + convert "$FILENAME_BASE.png" -depth $IMG_DEPTH rgb:"$FILENAME_BASE.rgb" + + # Delete the png + rm "$FILENAME_BASE.png" + + # Move files needed for the next step to the mounted volume + mv "$FILENAME_BASE.rgb" /dangerzone + mv "$FILENAME_BASE.width" /dangerzone + mv "$FILENAME_BASE.height" /dangerzone +done