mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 09:52:37 +02:00
Remove container in preparation of making it a git submodule
This commit is contained in:
parent
48fd8ece40
commit
911a511e11
3 changed files with 0 additions and 155 deletions
|
@ -1,10 +0,0 @@
|
|||
FROM ubuntu:18.04
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y file poppler-utils imagemagick ghostscript tesseract-ocr tesseract-ocr-all libreoffice
|
||||
|
||||
# Fix imagemagick policy to allow writing PDFs
|
||||
RUN sed -i '/rights="none" pattern="PDF"/c\<policy domain="coder" rights="read|write" pattern="PDF" />' /etc/ImageMagick-6/policy.xml
|
||||
|
||||
COPY document-to-pixels /usr/local/bin/document-to-pixels
|
||||
COPY pixels-to-pdf /usr/local/bin/pixels-to-pdf
|
|
@ -1,72 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
die() {
|
||||
echo "$1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Detect the mime type
|
||||
MIME_TYPE=$(file -b --mime-type /tmp/input_file)
|
||||
echo "Documet MIME type is $MIME_TYPE"
|
||||
echo
|
||||
|
||||
# .pdf
|
||||
if [ $MIME_TYPE = "application/pdf" ]; then
|
||||
ORIGINAL_PDF=/tmp/input_file
|
||||
# .docx
|
||||
elif [ $MIME_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ]; then
|
||||
echo "Converting DOCX to PDF"
|
||||
CONVERT_MSGS=$(libreoffice --headless --convert-to pdf:writer_pdf_Export --outdir /tmp /tmp/input_file 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
ORIGINAL_PDF=/tmp/input_file.pdf
|
||||
# .xlsx
|
||||
elif [ $MIME_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ]; then
|
||||
echo "Converting XLSX to PDF"
|
||||
CONVERT_MSGS=$(libreoffice --headless --convert-to pdf:calc_pdf_Export --outdir /tmp /tmp/input_file 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
ORIGINAL_PDF=/tmp/input_file.pdf
|
||||
else
|
||||
die "The document format is not supported"
|
||||
fi
|
||||
|
||||
echo "Separating document into pages"
|
||||
CONVERT_MSGS=$(pdfseparate $ORIGINAL_PDF /tmp/page-%d.pdf 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Separating document into pages failed: $CONVERT_MSGS"
|
||||
fi
|
||||
|
||||
IMG_DEPTH=8
|
||||
NUM_PAGES=$(find /tmp/page-*.pdf |wc -l)
|
||||
echo "Document has $NUM_PAGES pages"
|
||||
echo
|
||||
|
||||
for PAGE in $(seq 1 $NUM_PAGES); do
|
||||
FILENAME=/tmp/page-$PAGE.pdf
|
||||
FILENAME_BASE=/tmp/page-$PAGE
|
||||
|
||||
echo "Converting page $PAGE to pixels"
|
||||
|
||||
# Convert to png
|
||||
pdftocairo "$FILENAME" -png -singlefile "$FILENAME_BASE"
|
||||
|
||||
# Get the width and height
|
||||
IMG_WIDTH=$(identify -format "%w" "$FILENAME_BASE.png")
|
||||
IMG_HEIGHT=$(identify -format "%h" "$FILENAME_BASE.png")
|
||||
echo $IMG_WIDTH > $FILENAME_BASE.width
|
||||
echo $IMG_HEIGHT > $FILENAME_BASE.height
|
||||
|
||||
# Convert to rgb
|
||||
convert "$FILENAME_BASE.png" -depth $IMG_DEPTH rgb:"$FILENAME_BASE.rgb"
|
||||
|
||||
# Delete the png
|
||||
rm "$FILENAME_BASE.png"
|
||||
|
||||
# Move files needed for the next step to the mounted volume
|
||||
mv "$FILENAME_BASE.rgb" /dangerzone
|
||||
mv "$FILENAME_BASE.width" /dangerzone
|
||||
mv "$FILENAME_BASE.height" /dangerzone
|
||||
done
|
|
@ -1,73 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
die() {
|
||||
echo "$1" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
IMG_DEPTH=8
|
||||
NUM_PAGES=$(find /dangerzone/page-*.rgb |wc -l)
|
||||
|
||||
echo "Document has $NUM_PAGES pages"
|
||||
echo
|
||||
|
||||
# Convert rgb files to png files
|
||||
for PAGE in $(seq 1 $NUM_PAGES); do
|
||||
FILENAME_BASE=/dangerzone/page-$PAGE
|
||||
RGB_FILENAME=${FILENAME_BASE}.rgb
|
||||
WIDTH_FILENAME=${FILENAME_BASE}.width
|
||||
HEIGHT_FILENAME=${FILENAME_BASE}.height
|
||||
PNG_FILENAME=/tmp/page-$PAGE.png
|
||||
OCR_FILENAME=/tmp/page-$PAGE
|
||||
PDF_FILENAME=/tmp/page-$PAGE.pdf
|
||||
|
||||
IMG_WIDTH=$(cat $WIDTH_FILENAME)
|
||||
IMG_HEIGHT=$(cat $HEIGHT_FILENAME)
|
||||
|
||||
if [ $OCR = "1" ]; then
|
||||
|
||||
echo "Converting page $PAGE from pixels to searchable PDF"
|
||||
|
||||
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
|
||||
CONVERT_MSGS=$(tesseract $PNG_FILENAME $OCR_FILENAME pdf -l $OCR_LANGUAGE 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
|
||||
else
|
||||
|
||||
echo "Converting page $PAGE from pixels to PDF"
|
||||
|
||||
CONVERT_MSGS=$(convert -size "${IMG_WIDTH}x${IMG_HEIGHT}" -depth ${IMG_DEPTH} rgb:"$RGB_FILENAME" png:"$PNG_FILENAME" 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
|
||||
CONVERT_MSGS=$(convert $PNG_FILENAME $PDF_FILENAME 2>&1)
|
||||
if [ $? -ne 0 ]; then
|
||||
die "Page $PAGE conversion failed: $CONVERT_MSGS"
|
||||
fi
|
||||
|
||||
fi
|
||||
done
|
||||
|
||||
echo
|
||||
echo "Merging $NUM_PAGES pages into a single PDF"
|
||||
|
||||
# Put PDF filenames into an array
|
||||
declare -a args
|
||||
for PAGE in $(seq 1 $NUM_PAGES); do
|
||||
args+=("/tmp/page-$PAGE.pdf")
|
||||
done
|
||||
args+=("/safezone/safe-output.pdf")
|
||||
|
||||
# Merge them
|
||||
pdfunite "${args[@]}"
|
||||
|
||||
# Compress
|
||||
echo "Compressing PDF"
|
||||
ps2pdf /safezone/safe-output.pdf /safezone/safe-output-compressed.pdf
|
Loading…
Reference in a new issue