From 195386d52606345cde3eb26dfb0b8a65dcee983b Mon Sep 17 00:00:00 2001 From: Micah Lee Date: Mon, 6 Jan 2020 18:17:48 -0800 Subject: [PATCH] Finish making convert to safe PDF work --- dangerzone/common.py | 6 ++-- dangerzone/main_window.py | 33 ++++++++++++++++---- dangerzone/tasks.py | 43 +++++++++++++++++++++----- share/container/Containerfile | 8 +++-- share/container/convert_to_pdf | 55 ++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+), 16 deletions(-) create mode 100755 share/container/convert_to_pdf diff --git a/dangerzone/common.py b/dangerzone/common.py index 0f91bf7..b6b8c1f 100644 --- a/dangerzone/common.py +++ b/dangerzone/common.py @@ -11,8 +11,10 @@ class Common(object): def __init__(self): # Temporary directory to store pixel data - self.tmpdir = tempfile.TemporaryDirectory() - print(f"tmpdir is: {self.tmpdir.name}") + self.pixel_dir = tempfile.TemporaryDirectory() + self.safe_dir = tempfile.TemporaryDirectory() + print(f"pixel_dir is: {self.pixel_dir.name}") + print(f"safe_dir is: {self.safe_dir.name}") def get_resource_path(self, filename): if getattr(sys, "dangerzone_dev", False): diff --git a/dangerzone/main_window.py b/dangerzone/main_window.py index c166b87..89ad591 100644 --- a/dangerzone/main_window.py +++ b/dangerzone/main_window.py @@ -1,6 +1,8 @@ +import shutil +import os from PyQt5 import QtCore, QtGui, QtWidgets -from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels +from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF class MainWindow(QtWidgets.QMainWindow): @@ -40,7 +42,7 @@ class MainWindow(QtWidgets.QMainWindow): central_widget.setLayout(layout) self.setCentralWidget(central_widget) - self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels] + self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF] def start(self, filename): print(f"Input document: {filename}") @@ -51,8 +53,7 @@ class MainWindow(QtWidgets.QMainWindow): def next_task(self): if len(self.tasks) == 0: - print("Tasks finished") - self.task_label.setText("Tasks finished") + self.save_safe_pdf() return self.task_details.setText("") @@ -74,9 +75,31 @@ class MainWindow(QtWidgets.QMainWindow): self.task_label.setText("Task failed :(") self.task_details.setWordWrap(True) self.task_details.setText( - f"Temporary directory: {self.common.tmpdir.name}\n\n{err}" + f"Directory with pixel data: {self.common.pixel_dir.name}\n\n{err}" ) + def save_safe_pdf(self): + suggested_filename = ( + f"{os.path.splitext(self.common.document_filename)[0]}-safe.pdf" + ) + + filename = QtWidgets.QFileDialog.getSaveFileName( + self, "Save safe PDF", suggested_filename, filter="Documents (*.pdf)" + ) + if filename[0] == "": + print("Save file dialog canceled") + else: + source_filename = f"{self.common.safe_dir.name}/safe-output-compressed.pdf" + dest_filename = filename[0] + shutil.move(source_filename, dest_filename) + + # Clean up + self.common.pixel_dir.cleanup() + self.common.safe_dir.cleanup() + + # Quit + self.app.quit() + def scroll_to_bottom(self, minimum, maximum): self.details_scrollarea.verticalScrollBar().setValue(maximum) diff --git a/dangerzone/tasks.py b/dangerzone/tasks.py index 5f428b1..44d4a58 100644 --- a/dangerzone/tasks.py +++ b/dangerzone/tasks.py @@ -34,7 +34,10 @@ class TaskBase(QtCore.QThread): output += line self.update_details.emit(output) - output += p.stdout.read() + if watch == "stdout": + output += p.stderr.read() + else: + output += p.stdout.read() self.update_details.emit(output) return output @@ -86,9 +89,9 @@ class ConvertToPixels(TaskBase): "-v", f"{self.common.document_filename}:/tmp/input_file", "-v", - f"{self.common.tmpdir.name}:/dangerzone", + f"{self.common.pixel_dir.name}:/dangerzone", "dangerzone", - "/tmp/convert_to_pixels", + "convert_to_pixels", ] output = self.execute_podman(args) @@ -112,7 +115,7 @@ class ConvertToPixels(TaskBase): f"page-{i}.height", ] expected_filenames.sort() - actual_filenames = os.listdir(self.common.tmpdir.name) + actual_filenames = os.listdir(self.common.pixel_dir.name) actual_filenames.sort() if expected_filenames != actual_filenames: @@ -123,9 +126,9 @@ class ConvertToPixels(TaskBase): # Make sure the files are the correct sizes for i in range(1, num_pages + 1): - with open(f"{self.common.tmpdir.name}/page-{i}.width") as f: + with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f: w_str = f.read().strip() - with open(f"{self.common.tmpdir.name}/page-{i}.height") as f: + with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f: h_str = f.read().strip() w = int(w_str) h = int(h_str) @@ -141,8 +144,34 @@ class ConvertToPixels(TaskBase): return # Make sure the RGB file is the correct size - if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3: + if ( + os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb") + != w * h * 3 + ): self.task_failed.emit(f"Page {i} has an invalid RGB file size") return self.task_finished.emit() + + +class ConvertToPDF(TaskBase): + def __init__(self, common): + super(ConvertToPDF, self).__init__() + self.common = common + + def run(self): + self.update_label.emit("Converting pixels to safe PDF") + args = [ + "podman", + "run", + "--network", + "none", + "-v", + f"{self.common.pixel_dir.name}:/dangerzone", + "-v", + f"{self.common.safe_dir.name}:/safezone", + "dangerzone", + "convert_to_pdf", + ] + self.execute_podman(args) + self.task_finished.emit() diff --git a/share/container/Containerfile b/share/container/Containerfile index 6b3e243..411d269 100644 --- a/share/container/Containerfile +++ b/share/container/Containerfile @@ -1,6 +1,10 @@ FROM ubuntu:18.04 RUN apt-get update && \ - apt-get install -y file poppler-utils imagemagick + apt-get install -y file poppler-utils imagemagick ghostscript -COPY convert_to_pixels /tmp/convert_to_pixels +# Fix imagemagick policy to allow writing PDFs +RUN sed -i '/rights="none" pattern="PDF"/c\' /etc/ImageMagick-6/policy.xml + +COPY convert_to_pixels /usr/local/bin/convert_to_pixels +COPY convert_to_pdf /usr/local/bin/convert_to_pdf diff --git a/share/container/convert_to_pdf b/share/container/convert_to_pdf new file mode 100755 index 0000000..ebab8fb --- /dev/null +++ b/share/container/convert_to_pdf @@ -0,0 +1,55 @@ +#!/bin/bash + +die() { + echo "$1" >&2 + exit 1 +} + +IMG_DEPTH=8 +NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l) + +echo "Document has $NUM_PAGES pages" +echo + +# Convert rgb files to png files +for RGB_FILENAME in $(find /dangerzone/page-*.rgb); do + FILENAME_BASE=${RGB_FILENAME%.rgb} + PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2) + + WIDTH_FILENAME=${FILENAME_BASE}.width + HEIGHT_FILENAME=${FILENAME_BASE}.height + PNG_FILENAME=/tmp/$(basename $FILENAME_BASE).png + PDF_FILENAME=/tmp/$(basename $FILENAME_BASE).pdf + + echo "Converting page $PAGE from pixels to PNG" + + IMG_WIDTH=$(cat $WIDTH_FILENAME) + IMG_HEIGHT=$(cat $HEIGHT_FILENAME) + + CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi + + echo "Converting page $PAGE from PNG to PDF" + CONVERT_MSGS=$(convert "$PNG_FILENAME" "$PDF_FILENAME" 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi +done + +echo "Merging $NUM_PAGES pages into a single PDF" + +# Put PDF filenames into an array +declare -a args +for PAGE in $(seq 1 $NUM_PAGES); do + args+=("/tmp/page-$PAGE.pdf") +done +args+=("/safezone/safe-output.pdf") + +# Merge them +pdfunite "${args[@]}" + +# Compress +echo "Compressing PDF" +ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf