#!/bin/bash die() { echo "$1" >&2 exit 1 } IMG_DEPTH=8 NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l) echo "Document has $NUM_PAGES pages" echo # Convert rgb files to png files for PAGE in $(seq 1 $NUM_PAGES); do FILENAME_BASE=/dangerzone/page-$PAGE RGB_FILENAME=${FILENAME_BASE}.rgb WIDTH_FILENAME=${FILENAME_BASE}.width HEIGHT_FILENAME=${FILENAME_BASE}.height PNG_FILENAME=/tmp/page-$PAGE.png OCR_FILENAME=/tmp/page-$PAGE PDF_FILENAME=/tmp/page-$PAGE.pdf IMG_WIDTH=$(cat $WIDTH_FILENAME) IMG_HEIGHT=$(cat $HEIGHT_FILENAME) if [ $OCR = "1" ]; then echo "Converting page $PAGE from pixels to searchable PDF" CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf -l $OCR_LANGUAGE 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi else echo "Converting page $PAGE from pixels to PDF" CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi CONVERT_MSGS=$(convert $PNG_FILENAME $PDF_FILENAME 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi fi done echo echo "Merging $NUM_PAGES pages into a single PDF" # Put PDF filenames into an array declare -a args for PAGE in $(seq 1 $NUM_PAGES); do args+=("/tmp/page-$PAGE.pdf") done args+=("/safezone/safe-output.pdf") # Merge them pdfunite "${args[@]}" # Compress echo "Compressing PDF" ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf