dangerzone/share/container/pixels-to-pdf
2020-01-07 09:52:04 -08:00

59 lines
1.5 KiB
Bash
Executable file

#!/bin/bash
die() {
echo "$1" >&2
exit 1
}
IMG_DEPTH=8
NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l)
echo "Document has $NUM_PAGES pages"
echo
# Convert rgb files to png files
for RGB_FILENAME in $(find /dangerzone/page-*.rgb); do
FILENAME_BASE=${RGB_FILENAME%.rgb}
PAGE=$(basename $FILENAME_BASE |cut -d"-" -f2)
WIDTH_FILENAME=${FILENAME_BASE}.width
HEIGHT_FILENAME=${FILENAME_BASE}.height
PNG_FILENAME=/tmp/$(basename $FILENAME_BASE).png
OCR_FILENAME=/tmp/$(basename $FILENAME_BASE)
PDF_FILENAME=/tmp/$(basename $FILENAME_BASE).pdf
echo "Converting page $PAGE from pixels to PNG"
IMG_WIDTH=$(cat $WIDTH_FILENAME)
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
echo "Converting page $PAGE from PNG to searchable PDF"
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf 2>&1)
if [ $? -ne 0 ]; then
die "Page $PAGE conversion failed: $CONVERT_MSGS"
fi
echo
done
echo
echo "Merging $NUM_PAGES pages into a single PDF"
# Put PDF filenames into an array
declare -a args
for PAGE in $(seq 1 $NUM_PAGES); do
args+=("/tmp/page-$PAGE.pdf")
done
args+=("/safezone/safe-output.pdf")
# Merge them
pdfunite "${args[@]}"
# Compress
echo "Compressing PDF"
ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf