Finish making convert to safe PDF work

This commit is contained in:
Micah Lee 2020-01-06 18:17:48 -08:00
parent bcad2f6256
commit 195386d526
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
5 changed files with 129 additions and 16 deletions

View file

@ -11,8 +11,10 @@ class Common(object):
def __init__(self):
# Temporary directory to store pixel data
self.tmpdir = tempfile.TemporaryDirectory()
print(f"tmpdir is: {self.tmpdir.name}")
self.pixel_dir = tempfile.TemporaryDirectory()
self.safe_dir = tempfile.TemporaryDirectory()
print(f"pixel_dir is: {self.pixel_dir.name}")
print(f"safe_dir is: {self.safe_dir.name}")
def get_resource_path(self, filename):
if getattr(sys, "dangerzone_dev", False):

View file

@ -1,6 +1,8 @@
import shutil
import os
from PyQt5 import QtCore, QtGui, QtWidgets
from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels
from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF
class MainWindow(QtWidgets.QMainWindow):
@ -40,7 +42,7 @@ class MainWindow(QtWidgets.QMainWindow):
central_widget.setLayout(layout)
self.setCentralWidget(central_widget)
self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels]
self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels, ConvertToPDF]
def start(self, filename):
print(f"Input document: {filename}")
@ -51,8 +53,7 @@ class MainWindow(QtWidgets.QMainWindow):
def next_task(self):
if len(self.tasks) == 0:
print("Tasks finished")
self.task_label.setText("Tasks finished")
self.save_safe_pdf()
return
self.task_details.setText("")
@ -74,9 +75,31 @@ class MainWindow(QtWidgets.QMainWindow):
self.task_label.setText("Task failed :(")
self.task_details.setWordWrap(True)
self.task_details.setText(
f"Temporary directory: {self.common.tmpdir.name}\n\n{err}"
f"Directory with pixel data: {self.common.pixel_dir.name}\n\n{err}"
)
def save_safe_pdf(self):
suggested_filename = (
f"{os.path.splitext(self.common.document_filename)[0]}-safe.pdf"
)
filename = QtWidgets.QFileDialog.getSaveFileName(
self, "Save safe PDF", suggested_filename, filter="Documents (*.pdf)"
)
if filename[0] == "":
print("Save file dialog canceled")
else:
source_filename = f"{self.common.safe_dir.name}/safe-output-compressed.pdf"
dest_filename = filename[0]
shutil.move(source_filename, dest_filename)
# Clean up
self.common.pixel_dir.cleanup()
self.common.safe_dir.cleanup()
# Quit
self.app.quit()
def scroll_to_bottom(self, minimum, maximum):
self.details_scrollarea.verticalScrollBar().setValue(maximum)

View file

@ -34,7 +34,10 @@ class TaskBase(QtCore.QThread):
output += line
self.update_details.emit(output)
output += p.stdout.read()
if watch == "stdout":
output += p.stderr.read()
else:
output += p.stdout.read()
self.update_details.emit(output)
return output
@ -86,9 +89,9 @@ class ConvertToPixels(TaskBase):
"-v",
f"{self.common.document_filename}:/tmp/input_file",
"-v",
f"{self.common.tmpdir.name}:/dangerzone",
f"{self.common.pixel_dir.name}:/dangerzone",
"dangerzone",
"/tmp/convert_to_pixels",
"convert_to_pixels",
]
output = self.execute_podman(args)
@ -112,7 +115,7 @@ class ConvertToPixels(TaskBase):
f"page-{i}.height",
]
expected_filenames.sort()
actual_filenames = os.listdir(self.common.tmpdir.name)
actual_filenames = os.listdir(self.common.pixel_dir.name)
actual_filenames.sort()
if expected_filenames != actual_filenames:
@ -123,9 +126,9 @@ class ConvertToPixels(TaskBase):
# Make sure the files are the correct sizes
for i in range(1, num_pages + 1):
with open(f"{self.common.tmpdir.name}/page-{i}.width") as f:
with open(f"{self.common.pixel_dir.name}/page-{i}.width") as f:
w_str = f.read().strip()
with open(f"{self.common.tmpdir.name}/page-{i}.height") as f:
with open(f"{self.common.pixel_dir.name}/page-{i}.height") as f:
h_str = f.read().strip()
w = int(w_str)
h = int(h_str)
@ -141,8 +144,34 @@ class ConvertToPixels(TaskBase):
return
# Make sure the RGB file is the correct size
if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3:
if (
os.path.getsize(f"{self.common.pixel_dir.name}/page-{i}.rgb")
!= w * h * 3
):
self.task_failed.emit(f"Page {i} has an invalid RGB file size")
return
self.task_finished.emit()
class ConvertToPDF(TaskBase):
def __init__(self, common):
super(ConvertToPDF, self).__init__()
self.common = common
def run(self):
self.update_label.emit("Converting pixels to safe PDF")
args = [
"podman",
"run",
"--network",
"none",
"-v",
f"{self.common.pixel_dir.name}:/dangerzone",
"-v",
f"{self.common.safe_dir.name}:/safezone",
"dangerzone",
"convert_to_pdf",
]
self.execute_podman(args)
self.task_finished.emit()

View file

@ -1,6 +1,10 @@
FROM ubuntu:18.04
RUN apt-get update && \
apt-get install -y file poppler-utils imagemagick
apt-get install -y file poppler-utils imagemagick ghostscript
COPY convert_to_pixels /tmp/convert_to_pixels
# Fix imagemagick policy to allow writing PDFs
RUN sed -i '/rights="none" pattern="PDF"/c\<policy domain="coder" rights="read|write" pattern="PDF" />' /etc/ImageMagick-6/policy.xml
COPY convert_to_pixels /usr/local/bin/convert_to_pixels
COPY convert_to_pdf /usr/local/bin/convert_to_pdf

55
share/container/convert_to_pdf Executable file
View file

@ -0,0 +1,55 @@
#!/bin/bash
die() {
echo "$1" >&2
exit 1
}
IMG_DEPTH=8
NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l)
echo "Document has $NUM_PAGES pages"
echo
# Convert rgb files to png files
for RGB_FILENAME in $(find /dangerzone/page-*.rgb); do
FILENAME_BASE=${RGB_FILENAME%.rgb}
PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2)
WIDTH_FILENAME=${FILENAME_BASE}.width
HEIGHT_FILENAME=${FILENAME_BASE}.height
PNG_FILENAME=/tmp/$(basename $FILENAME_BASE).png
PDF_FILENAME=/tmp/$(basename $FILENAME_BASE).pdf
echo "Converting page $PAGE from pixels to PNG"
IMG_WIDTH=$(cat $WIDTH_FILENAME)
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
echo "Converting page $PAGE from PNG to PDF"
CONVERT_MSGS=$(convert "$PNG_FILENAME" "$PDF_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
done
echo "Merging $NUM_PAGES pages into a single PDF"
# Put PDF filenames into an array
declare -a args
for PAGE in $(seq 1 $NUM_PAGES); do
args+=("/tmp/page-$PAGE.pdf")
done
args+=("/safezone/safe-output.pdf")
# Merge them
pdfunite "${args[@]}"
# Compress
echo "Compressing PDF"
ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf