From 5e0c52129393e04981e899f9585d2e4d3a4cfbeb Mon Sep 17 00:00:00 2001 From: Micah Lee Date: Tue, 7 Jan 2020 16:46:04 -0800 Subject: [PATCH] Pass OCR environment variables into container --- dangerzone/tasks.py | 36 ++++++++++++++++++++++++----------- share/container/pixels-to-pdf | 34 +++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/dangerzone/tasks.py b/dangerzone/tasks.py index fdad5ad..11400aa 100644 --- a/dangerzone/tasks.py +++ b/dangerzone/tasks.py @@ -167,17 +167,31 @@ class ConvertToPDF(TaskBase): def run(self): self.update_label.emit("Converting pixels to safe PDF") - args = [ - "podman", - "run", - "--network", - "none", - "-v", - f"{self.common.pixel_dir.name}:/dangerzone", - "-v", - f"{self.common.safe_dir.name}:/safezone", - "dangerzone", - "pixels-to-pdf", + + # Build environment variables list + envs = [] + if self.common.settings.get("ocr"): + envs += ["-e", "OCR=1"] + else: + envs += ["-e", "OCR=0"] + envs += [ + "-e", + f"OCR_LANGUAGE={self.common.ocr_languages[self.common.settings.get('ocr_language')]}", ] + + args = ( + [ + "podman", + "run", + "--network", + "none", + "-v", + f"{self.common.pixel_dir.name}:/dangerzone", + "-v", + f"{self.common.safe_dir.name}:/safezone", + ] + + envs + + ["dangerzone", "pixels-to-pdf",] + ) self.execute_podman(args) self.task_finished.emit() diff --git a/share/container/pixels-to-pdf b/share/container/pixels-to-pdf index 695a717..2f1fcba 100755 --- a/share/container/pixels-to-pdf +++ b/share/container/pixels-to-pdf @@ -24,16 +24,34 @@ for PAGE in $(seq 1 $NUM_PAGES); do IMG_WIDTH=$(cat $WIDTH_FILENAME) IMG_HEIGHT=$(cat $HEIGHT_FILENAME) - echo "Converting page $PAGE from pixels to searchable PDF" + if [ $OCR = "1" ]; then - CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) - if [ $? -ne 0 ]; then - die "Page $PAGE conversion failed: $CONVERT_MSGS" - fi + echo "Converting page $PAGE from pixels to searchable PDF" + + CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi + + CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf -l $OCR_LANGUAGE 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi + + else + + echo "Converting page $PAGE from pixels to PDF" + + CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi + + CONVERT_MSGS=$(convert $PNG_FILENAME $PDF_FILENAME 2>&1) + if [ $? -ne 0 ]; then + die "Page $PAGE conversion failed: $CONVERT_MSGS" + fi - CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf 2>&1) - if [ $? -ne 0 ]; then - die "Page $PAGE conversion failed: $CONVERT_MSGS" fi done