#!/bin/bash die() { echo "$1" >&2 exit 1 } # Detect the mime type MIME_TYPE=$(file -b --mime-type /tmp/input_file) echo "Documet MIME type is $MIME_TYPE" echo # .pdf if [ $MIME_TYPE = "application/pdf" ]; then ORIGINAL_PDF=/tmp/input_file # .docx elif [ $MIME_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ]; then echo "Converting DOCX to PDF" CONVERT_MSGS=$(libreoffice --headless --convert-to pdf:writer_pdf_Export --outdir /tmp /tmp/input_file 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi ORIGINAL_PDF=/tmp/input_file.pdf # .xlsx elif [ $MIME_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ]; then echo "Converting XLSX to PDF" CONVERT_MSGS=$(libreoffice --headless --convert-to pdf:calc_pdf_Export --outdir /tmp /tmp/input_file 2>&1) if [ $? -ne 0 ]; then die "Page $PAGE conversion failed: $CONVERT_MSGS" fi ORIGINAL_PDF=/tmp/input_file.pdf else die "The document format is not supported" fi echo "Separating document into pages" CONVERT_MSGS=$(pdfseparate $ORIGINAL_PDF /tmp/page-%d.pdf 2>&1) if [ $? -ne 0 ]; then die "Separating document into pages failed: $CONVERT_MSGS" fi IMG_DEPTH=8 NUM_PAGES=$(find /tmp/page-*.pdf |wc -l) echo "Document has $NUM_PAGES pages" echo for PAGE in $(seq 1 $NUM_PAGES); do FILENAME=/tmp/page-$PAGE.pdf FILENAME_BASE=/tmp/page-$PAGE echo "Converting page $PAGE to pixels" # Convert to png pdftocairo "$FILENAME" -png -singlefile "$FILENAME_BASE" # Get the width and height IMG_WIDTH=$(identify -format "%w" "$FILENAME_BASE.png") IMG_HEIGHT=$(identify -format "%h" "$FILENAME_BASE.png") echo $IMG_WIDTH > $FILENAME_BASE.width echo $IMG_HEIGHT > $FILENAME_BASE.height # Convert to rgb convert "$FILENAME_BASE.png" -depth $IMG_DEPTH rgb:"$FILENAME_BASE.rgb" # Delete the png rm "$FILENAME_BASE.png" # Move files needed for the next step to the mounted volume mv "$FILENAME_BASE.rgb" /dangerzone mv "$FILENAME_BASE.width" /dangerzone mv "$FILENAME_BASE.height" /dangerzone done