Pass OCR environment variables into container

This commit is contained in:
Micah Lee 2020-01-07 16:46:04 -08:00
parent 757a30794d
commit 5e0c521293
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
2 changed files with 51 additions and 19 deletions

View file

@ -167,17 +167,31 @@ class ConvertToPDF(TaskBase):
def run(self):
self.update_label.emit("Converting pixels to safe PDF")
args = [
"podman",
"run",
"--network",
"none",
"-v",
f"{self.common.pixel_dir.name}:/dangerzone",
"-v",
f"{self.common.safe_dir.name}:/safezone",
"dangerzone",
"pixels-to-pdf",
# Build environment variables list
envs = []
if self.common.settings.get("ocr"):
envs += ["-e", "OCR=1"]
else:
envs += ["-e", "OCR=0"]
envs += [
"-e",
f"OCR_LANGUAGE={self.common.ocr_languages[self.common.settings.get('ocr_language')]}",
]
args = (
[
"podman",
"run",
"--network",
"none",
"-v",
f"{self.common.pixel_dir.name}:/dangerzone",
"-v",
f"{self.common.safe_dir.name}:/safezone",
]
+ envs
+ ["dangerzone", "pixels-to-pdf",]
)
self.execute_podman(args)
self.task_finished.emit()

View file

@ -24,16 +24,34 @@ for PAGE in $(seq 1 $NUM_PAGES); do
IMG_WIDTH=$(cat $WIDTH_FILENAME)
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
echo "Converting page $PAGE from pixels to searchable PDF"
if [ $OCR = "1" ]; then
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
echo "Converting page $PAGE from pixels to searchable PDF"
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf -l $OCR_LANGUAGE 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
else
echo "Converting page $PAGE from pixels to PDF"
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
CONVERT_MSGS=$(convert $PNG_FILENAME $PDF_FILENAME 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
done