mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
55 lines
1.3 KiB
Bash
Executable file
55 lines
1.3 KiB
Bash
Executable file
#!/bin/bash
|
|
|
|
die() {
|
|
echo "$1" >&2
|
|
exit 1
|
|
}
|
|
|
|
IMG_DEPTH=8
|
|
NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l)
|
|
|
|
echo "Document has $NUM_PAGES pages"
|
|
echo
|
|
|
|
# Convert rgb files to png files
|
|
for PAGE in $(seq 1 $NUM_PAGES); do
|
|
FILENAME_BASE=/dangerzone/page-$PAGE
|
|
RGB_FILENAME=${FILENAME_BASE}.rgb
|
|
WIDTH_FILENAME=${FILENAME_BASE}.width
|
|
HEIGHT_FILENAME=${FILENAME_BASE}.height
|
|
PNG_FILENAME=/tmp/page-$PAGE.png
|
|
OCR_FILENAME=/tmp/page-$PAGE
|
|
PDF_FILENAME=/tmp/page-$PAGE.pdf
|
|
|
|
IMG_WIDTH=$(cat $WIDTH_FILENAME)
|
|
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
|
|
|
|
echo "Converting page $PAGE from pixels to searchable PDF"
|
|
|
|
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
|
|
if [ $? -ne 0 ]; then
|
|
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
|
fi
|
|
|
|
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf 2>&1)
|
|
if [ $? -ne 0 ]; then
|
|
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
|
fi
|
|
done
|
|
|
|
echo
|
|
echo "Merging $NUM_PAGES pages into a single PDF"
|
|
|
|
# Put PDF filenames into an array
|
|
declare -a args
|
|
for PAGE in $(seq 1 $NUM_PAGES); do
|
|
args+=("/tmp/page-$PAGE.pdf")
|
|
done
|
|
args+=("/safezone/safe-output.pdf")
|
|
|
|
# Merge them
|
|
pdfunite "${args[@]}"
|
|
|
|
# Compress
|
|
echo "Compressing PDF"
|
|
ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf
|