Convert to pixel data

This commit is contained in:
Micah Lee 2020-01-06 17:02:12 -08:00
parent fe028e5f0c
commit bcad2f6256
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
5 changed files with 148 additions and 13 deletions

View file

@ -1,6 +1,7 @@
import sys import sys
import os import os
import inspect import inspect
import tempfile
class Common(object): class Common(object):
@ -9,7 +10,9 @@ class Common(object):
""" """
def __init__(self): def __init__(self):
pass # Temporary directory to store pixel data
self.tmpdir = tempfile.TemporaryDirectory()
print(f"tmpdir is: {self.tmpdir.name}")
def get_resource_path(self, filename): def get_resource_path(self, filename):
if getattr(sys, "dangerzone_dev", False): if getattr(sys, "dangerzone_dev", False):

View file

@ -1,6 +1,6 @@
from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5 import QtCore, QtGui, QtWidgets
from .tasks import PullImageTask, BuildContainerTask from .tasks import PullImageTask, BuildContainerTask, ConvertToPixels
class MainWindow(QtWidgets.QMainWindow): class MainWindow(QtWidgets.QMainWindow):
@ -40,11 +40,11 @@ class MainWindow(QtWidgets.QMainWindow):
central_widget.setLayout(layout) central_widget.setLayout(layout)
self.setCentralWidget(central_widget) self.setCentralWidget(central_widget)
self.tasks = [PullImageTask, BuildContainerTask] self.tasks = [PullImageTask, BuildContainerTask, ConvertToPixels]
def start(self, filename): def start(self, filename):
print(f"Input document: {filename}") print(f"Input document: {filename}")
self.document_filename = filename self.common.document_filename = filename
self.show() self.show()
self.next_task() self.next_task()
@ -52,12 +52,16 @@ class MainWindow(QtWidgets.QMainWindow):
def next_task(self): def next_task(self):
if len(self.tasks) == 0: if len(self.tasks) == 0:
print("Tasks finished") print("Tasks finished")
self.task_label.setText("Tasks finished")
return return
self.task_details.setText("")
self.current_task = self.tasks.pop(0)(self.common) self.current_task = self.tasks.pop(0)(self.common)
self.current_task.update_label.connect(self.update_label) self.current_task.update_label.connect(self.update_label)
self.current_task.update_details.connect(self.update_details) self.current_task.update_details.connect(self.update_details)
self.current_task.thread_finished.connect(self.next_task) self.current_task.task_finished.connect(self.next_task)
self.current_task.task_failed.connect(self.task_failed)
self.current_task.start() self.current_task.start()
def update_label(self, s): def update_label(self, s):
@ -66,10 +70,16 @@ class MainWindow(QtWidgets.QMainWindow):
def update_details(self, s): def update_details(self, s):
self.task_details.setText(s) self.task_details.setText(s)
def task_failed(self, err):
self.task_label.setText("Task failed :(")
self.task_details.setWordWrap(True)
self.task_details.setText(
f"Temporary directory: {self.common.tmpdir.name}\n\n{err}"
)
def scroll_to_bottom(self, minimum, maximum): def scroll_to_bottom(self, minimum, maximum):
self.details_scrollarea.verticalScrollBar().setValue(maximum) self.details_scrollarea.verticalScrollBar().setValue(maximum)
def closeEvent(self, e): def closeEvent(self, e):
print("closing")
e.accept() e.accept()
self.app.quit() self.app.quit()

View file

@ -1,10 +1,13 @@
import subprocess import subprocess
import time import time
import tempfile
import os
from PyQt5 import QtCore, QtWidgets, QtGui from PyQt5 import QtCore, QtWidgets, QtGui
class TaskBase(QtCore.QThread): class TaskBase(QtCore.QThread):
thread_finished = QtCore.pyqtSignal() task_finished = QtCore.pyqtSignal()
task_failed = QtCore.pyqtSignal(str)
update_label = QtCore.pyqtSignal(str) update_label = QtCore.pyqtSignal(str)
update_details = QtCore.pyqtSignal(str) update_details = QtCore.pyqtSignal(str)
@ -34,6 +37,8 @@ class TaskBase(QtCore.QThread):
output += p.stdout.read() output += p.stdout.read()
self.update_details.emit(output) self.update_details.emit(output)
return output
class PullImageTask(TaskBase): class PullImageTask(TaskBase):
def __init__(self, common): def __init__(self, common):
@ -45,7 +50,7 @@ class PullImageTask(TaskBase):
self.update_details.emit("") self.update_details.emit("")
args = ["podman", "pull", "ubuntu:18.04"] args = ["podman", "pull", "ubuntu:18.04"]
self.execute_podman(args, watch="stderr") self.execute_podman(args, watch="stderr")
self.thread_finished.emit() self.task_finished.emit()
class BuildContainerTask(TaskBase): class BuildContainerTask(TaskBase):
@ -59,4 +64,85 @@ class BuildContainerTask(TaskBase):
self.update_details.emit("") self.update_details.emit("")
args = ["podman", "build", "-t", "dangerzone", container_path] args = ["podman", "build", "-t", "dangerzone", container_path]
self.execute_podman(args) self.execute_podman(args)
self.thread_finished.emit() self.task_finished.emit()
class ConvertToPixels(TaskBase):
def __init__(self, common):
super(ConvertToPixels, self).__init__()
self.common = common
self.max_image_width = 10000
self.max_image_height = 10000
self.max_image_size = self.max_image_width * self.max_image_height * 3
def run(self):
self.update_label.emit("Converting document to pixels")
args = [
"podman",
"run",
"--network",
"none",
"-v",
f"{self.common.document_filename}:/tmp/input_file",
"-v",
f"{self.common.tmpdir.name}:/dangerzone",
"dangerzone",
"/tmp/convert_to_pixels",
]
output = self.execute_podman(args)
# How many pages was that?
num_pages = None
for line in output.split("\n"):
if line.startswith("Document has "):
num_pages = line.split(" ")[2]
break
if not num_pages or not num_pages.isdigit() or int(num_pages) <= 0:
self.task_failed.emit("Invalid number of pages returned")
return
num_pages = int(num_pages)
# Make sure we have the files we expect
expected_filenames = []
for i in range(1, num_pages + 1):
expected_filenames += [
f"page-{i}.rgb",
f"page-{i}.width",
f"page-{i}.height",
]
expected_filenames.sort()
actual_filenames = os.listdir(self.common.tmpdir.name)
actual_filenames.sort()
if expected_filenames != actual_filenames:
self.task_failed.emit(
f"We expected these files:\n{expected_filenames}\n\nBut we got these files:\n{actual_filenames}"
)
return
# Make sure the files are the correct sizes
for i in range(1, num_pages + 1):
with open(f"{self.common.tmpdir.name}/page-{i}.width") as f:
w_str = f.read().strip()
with open(f"{self.common.tmpdir.name}/page-{i}.height") as f:
h_str = f.read().strip()
w = int(w_str)
h = int(h_str)
if (
not w_str.isdigit()
or not h_str.isdigit()
or w <= 0
or w > self.max_image_width
or h <= 0
or h > self.max_image_height
):
self.task_failed.emit(f"Page {i} has invalid geometry")
return
# Make sure the RGB file is the correct size
if os.path.getsize(f"{self.common.tmpdir.name}/page-{i}.rgb") != w * h * 3:
self.task_failed.emit(f"Page {i} has an invalid RGB file size")
return
self.task_finished.emit()

View file

@ -1,8 +1,6 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y poppler-utils imagemagick apt-get install -y file poppler-utils imagemagick
RUN useradd -ms /bin/bash user
USER user:user
COPY convert_to_pixels /tmp/convert_to_pixels

View file

@ -0,0 +1,38 @@
#!/bin/bash
IMG_DEPTH=8
echo "Separating document into pages"
pdfseparate /tmp/input_file /tmp/page-%d.pdf
NUM_PAGES=$(find /tmp/page-*.pdf |wc -l)
echo "Document has $NUM_PAGES pages"
echo
for FILENAME in $(find /tmp/page-*.pdf); do
FILENAME_BASE=${FILENAME%.pdf}
PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2)
echo "Converting page $PAGE to pixels"
# Convert to png
pdftocairo "$FILENAME" -png -singlefile "$FILENAME_BASE"
# Get the width and height
IMG_WIDTH=$(identify -format "%w" "$FILENAME_BASE.png")
IMG_HEIGHT=$(identify -format "%h" "$FILENAME_BASE.png")
echo $IMG_WIDTH > $FILENAME_BASE.width
echo $IMG_HEIGHT > $FILENAME_BASE.height
# Convert to rgb
convert "$FILENAME_BASE.png" -depth $IMG_DEPTH rgb:"$FILENAME_BASE.rgb"
# Delete the png
rm "$FILENAME_BASE.png"
# Move files needed for the next step to the mounted volume
mv "$FILENAME_BASE.rgb" /dangerzone
mv "$FILENAME_BASE.width" /dangerzone
mv "$FILENAME_BASE.height" /dangerzone
done