Finish making convert to safe PDF work

This commit is contained in:
Micah Lee 2020-01-06 18:17:48 -08:00
parent bcad2f6256
commit 195386d526
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
5 changed files with 129 additions and 16 deletions

View file

@ -11,8 +11,10 @@ class Common(object):
def __init__(self): def __init__(self):
# Temporary directory to store pixel data # Temporary directory to store pixel data
self.tmpdir = tempfile.TemporaryDirectory() self.pixel_dir = tempfile.TemporaryDirectory()
print(f"tmpdir is: {self.tmpdir.name}") self.safe_dir = tempfile.TemporaryDirectory()
print(f"pixel_dir is: {self.pixel_dir.name}")
print(f"safe_dir is: {self.safe_dir.name}")
def get_resource_path(self, filename): def get_resource_path(self, filename):
if getattr(sys, "dangerzone_dev", False): if getattr(sys, "dangerzone_dev", False):

View file

@ -1,6 +1,8 @@
import shutil
import os
from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5 import QtCore, QtGui, QtWidgets
from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF
class MainWindow(QtWidgets.QMainWindow): class MainWindow(QtWidgets.QMainWindow):
@ -40,7 +42,7 @@ class MainWindow(QtWidgets.QMainWindow):
central_widget.setLayout(layout) central_widget.setLayout(layout)
self.setCentralWidget(central_widget) self.setCentralWidget(central_widget)
self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels] self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF]
def start(self, filename): def start(self, filename):
print(f"Input document: {filename}") print(f"Input document: {filename}")
@ -51,8 +53,7 @@ class MainWindow(QtWidgets.QMainWindow):
def next_task(self): def next_task(self):
if len(self.tasks) == 0: if len(self.tasks) == 0:
print("Tasks finished") self.save_safe_pdf()
self.task_label.setText("Tasks finished")
return return
self.task_details.setText("") self.task_details.setText("")
@ -74,9 +75,31 @@ class MainWindow(QtWidgets.QMainWindow):
self.task_label.setText("Task failed :(") self.task_label.setText("Task failed :(")
self.task_details.setWordWrap(True) self.task_details.setWordWrap(True)
self.task_details.setText( self.task_details.setText(
f"Temporary directory: {self.common.tmpdir.name}\n\n{err}" f"Directory with pixel data: {self.common.pixel_dir.name}\n\n{err}"
) )
def save_safe_pdf(self):
suggested_filename = (
f"{os.path.splitext(self.common.document_filename)[0]}-safe.pdf"
)
filename = QtWidgets.QFileDialog.getSaveFileName(
self, "Save safe PDF", suggested_filename, filter="Documents (*.pdf)"
)
if filename[0] == "":
print("Save file dialog canceled")
else:
source_filename = f"{self.common.safe_dir.name}/safe-output-compressed.pdf"
dest_filename = filename[0]
shutil.move(source_filename, dest_filename)
# Clean up
self.common.pixel_dir.cleanup()
self.common.safe_dir.cleanup()
# Quit
self.app.quit()
def scroll_to_bottom(self, minimum, maximum): def scroll_to_bottom(self, minimum, maximum):
self.details_scrollarea.verticalScrollBar().setValue(maximum) self.details_scrollarea.verticalScrollBar().setValue(maximum)

View file

@ -34,6 +34,9 @@ class TaskBase(QtCore.QThread):
output += line output += line
self.update_details.emit(output) self.update_details.emit(output)
if watch == "stdout":
output += p.stderr.read()
else:
output += p.stdout.read() output += p.stdout.read()
self.update_details.emit(output) self.update_details.emit(output)
@ -86,9 +89,9 @@ class ConvertToPixels(TaskBase):
"-v", "-v",
f"{self.common.document_filename}:/tmp/input_file", f"{self.common.document_filename}:/tmp/input_file",
"-v", "-v",
f"{self.common.tmpdir.name}:/dangerzone", f"{self.common.pixel_dir.name}:/dangerzone",
"dangerzone", "dangerzone",
"/tmp/convert_to_pixels", "convert_to_pixels",
] ]
output = self.execute_podman(args) output = self.execute_podman(args)
@ -112,7 +115,7 @@ class ConvertToPixels(TaskBase):
f"page-{i}.height", f"page-{i}.height",
] ]
expected_filenames.sort() expected_filenames.sort()
actual_filenames = os.listdir(self.common.tmpdir.name) actual_filenames = os.listdir(self.common.pixel_dir.name)
actual_filenames.sort() actual_filenames.sort()
if expected_filenames != actual_filenames: if expected_filenames != actual_filenames:
@ -123,9 +126,9 @@ class ConvertToPixels(TaskBase):
# Make sure the files are the correct sizes # Make sure the files are the correct sizes
for i in range(1, num_pages + 1): for i in range(1, num_pages + 1):
with open(f"{self.common.tmpdir.name}/page-{i}.width") as f: with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f:
w_str = f.read().strip() w_str = f.read().strip()
with open(f"{self.common.tmpdir.name}/page-{i}.height") as f: with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f:
h_str = f.read().strip() h_str = f.read().strip()
w = int(w_str) w = int(w_str)
h = int(h_str) h = int(h_str)
@ -141,8 +144,34 @@ class ConvertToPixels(TaskBase):
return return
# Make sure the RGB file is the correct size # Make sure the RGB file is the correct size
if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3: if (
os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb")
!= w * h * 3
):
self.task_failed.emit(f"Page {i} has an invalid RGB file size") self.task_failed.emit(f"Page {i} has an invalid RGB file size")
return return
self.task_finished.emit() self.task_finished.emit()
class ConvertToPDF(TaskBase):
def __init__(self, common):
super(ConvertToPDF, self).__init__()
self.common = common
def run(self):
self.update_label.emit("Converting pixels to safe PDF")
args = [
"podman",
"run",
"--network",
"none",
"-v",
f"{self.common.pixel_dir.name}:/dangerzone",
"-v",
f"{self.common.safe_dir.name}:/safezone",
"dangerzone",
"convert_to_pdf",
]
self.execute_podman(args)
self.task_finished.emit()

View file

@ -1,6 +1,10 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y file poppler-utils imagemagick apt-get install -y file poppler-utils imagemagick ghostscript
COPY convert_to_pixels /tmp/convert_to_pixels # Fix imagemagick policy to allow writing PDFs
RUN sed -i '/rights="none" pattern="PDF"/c\<policy domain="coder" rights="read|write" pattern="PDF" />' /etc/ImageMagick-6/policy.xml
COPY convert_to_pixels /usr/local/bin/convert_to_pixels
COPY convert_to_pdf /usr/local/bin/convert_to_pdf

55
share/container/convert_to_pdf Executable file
View file

@ -0,0 +1,55 @@
#!/bin/bash
die() {
echo "$1" >&2
exit 1
}
IMG_DEPTH=8
NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l)
echo "Document has $NUM_PAGES pages"
echo
# Convert rgb files to png files
for RGB_FILENAME in $(find /dangerzone/page-*.rgb); do
FILENAME_BASE=${RGB_FILENAME%.rgb}
PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2)
WIDTH_FILENAME=${FILENAME_BASE}.width
HEIGHT_FILENAME=${FILENAME_BASE}.height
PNG_FILENAME=/tmp/$(basename $FILENAME_BASE).png
PDF_FILENAME=/tmp/$(basename $FILENAME_BASE).pdf
echo "Converting page $PAGE from pixels to PNG"
IMG_WIDTH=$(cat $WIDTH_FILENAME)
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
echo "Converting page $PAGE from PNG to PDF"
CONVERT_MSGS=$(convert "$PNG_FILENAME" "$PDF_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
done
echo "Merging $NUM_PAGES pages into a single PDF"
# Put PDF filenames into an array
declare -a args
for PAGE in $(seq 1 $NUM_PAGES); do
args+=("/tmp/page-$PAGE.pdf")
done
args+=("/safezone/safe-output.pdf")
# Merge them
pdfunite "${args[@]}"
# Compress
echo "Compressing PDF"
ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf