Pass OCR environment variables into container

This commit is contained in:
Micah Lee 2020-01-07 16:46:04 -08:00
parent 757a30794d
commit 5e0c521293
No known key found for this signature in database
GPG key ID: 403C2657CD994F73
2 changed files with 51 additions and 19 deletions

View file

@ -167,7 +167,20 @@ class ConvertToPDF(TaskBase):
def run(self): def run(self):
self.update_label.emit("Converting pixels to safe PDF") self.update_label.emit("Converting pixels to safe PDF")
args = [
# Build environment variables list
envs = []
if self.common.settings.get("ocr"):
envs += ["-e", "OCR=1"]
else:
envs += ["-e", "OCR=0"]
envs += [
"-e",
f"OCR_LANGUAGE={self.common.ocr_languages[self.common.settings.get('ocr_language')]}",
]
args = (
[
"podman", "podman",
"run", "run",
"--network", "--network",
@ -176,8 +189,9 @@ class ConvertToPDF(TaskBase):
f"{self.common.pixel_dir.name}:/dangerzone", f"{self.common.pixel_dir.name}:/dangerzone",
"-v", "-v",
f"{self.common.safe_dir.name}:/safezone", f"{self.common.safe_dir.name}:/safezone",
"dangerzone",
"pixels-to-pdf",
] ]
+ envs
+ ["dangerzone", "pixels-to-pdf",]
)
self.execute_podman(args) self.execute_podman(args)
self.task_finished.emit() self.task_finished.emit()

View file

@ -24,6 +24,8 @@ for PAGE in $(seq 1 $NUM_PAGES); do
IMG_WIDTH=$(cat $WIDTH_FILENAME) IMG_WIDTH=$(cat $WIDTH_FILENAME)
IMG_HEIGHT=$(cat $HEIGHT_FILENAME) IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
if [ $OCR = "1" ]; then
echo "Converting page $PAGE from pixels to searchable PDF" echo "Converting page $PAGE from pixels to searchable PDF"
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
@ -31,10 +33,26 @@ for PAGE in $(seq 1 $NUM_PAGES); do
die "Page $PAGE conversion failed: $CONVERT_MSGS" die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi fi
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf 2>&1) CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf -l $OCR_LANGUAGE 2>&1)
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS" die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi fi
else
echo "Converting page $PAGE from pixels to PDF"
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
CONVERT_MSGS=$(convert $PNG_FILENAME $PDF_FILENAME 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
fi
done done
echo echo