mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00

PDFtk actually isn't needed. It was being used for breaking a PDF into pages but this is something that be replaced by the already present 'pdftoppm'. Furthermore, by removing this dependency we contribute to reproducible builds and overall supply chain security because it was obtained from gitlab with no signature verification or version pinning. The replacement 'pdftoppm' enabled us to do a shortcut: - before: PDF -> PDF pages -> PNG images -> RGB images - after: PDF -> PPM images -> RGB images And this last conversion step is trivial since the RGB format we were using is just a PPM file without the metadata in its header.
93 lines
2.5 KiB
Docker
93 lines
2.5 KiB
Docker
FROM alpine:latest
|
|
|
|
# Install dependencies
|
|
RUN apk -U upgrade && \
|
|
apk add \
|
|
ghostscript \
|
|
graphicsmagick \
|
|
libreoffice \
|
|
openjdk8 \
|
|
poppler-utils \
|
|
python3 \
|
|
py3-magic \
|
|
sudo \
|
|
tesseract-ocr \
|
|
tesseract-ocr-data-afr \
|
|
tesseract-ocr-data-ara \
|
|
tesseract-ocr-data-aze \
|
|
tesseract-ocr-data-bel \
|
|
tesseract-ocr-data-ben \
|
|
tesseract-ocr-data-bul \
|
|
tesseract-ocr-data-cat \
|
|
tesseract-ocr-data-ces \
|
|
tesseract-ocr-data-chi_sim \
|
|
tesseract-ocr-data-chi_tra \
|
|
tesseract-ocr-data-chr \
|
|
tesseract-ocr-data-dan \
|
|
tesseract-ocr-data-deu \
|
|
tesseract-ocr-data-ell \
|
|
tesseract-ocr-data-enm \
|
|
tesseract-ocr-data-epo \
|
|
tesseract-ocr-data-equ \
|
|
tesseract-ocr-data-est \
|
|
tesseract-ocr-data-eus \
|
|
tesseract-ocr-data-fin \
|
|
tesseract-ocr-data-fra \
|
|
tesseract-ocr-data-frk \
|
|
tesseract-ocr-data-frm \
|
|
tesseract-ocr-data-glg \
|
|
tesseract-ocr-data-grc \
|
|
tesseract-ocr-data-heb \
|
|
tesseract-ocr-data-hin \
|
|
tesseract-ocr-data-hrv \
|
|
tesseract-ocr-data-hun \
|
|
tesseract-ocr-data-ind \
|
|
tesseract-ocr-data-isl \
|
|
tesseract-ocr-data-ita \
|
|
tesseract-ocr-data-ita_old \
|
|
tesseract-ocr-data-jpn \
|
|
tesseract-ocr-data-kan \
|
|
tesseract-ocr-data-kat \
|
|
tesseract-ocr-data-kor \
|
|
tesseract-ocr-data-lav \
|
|
tesseract-ocr-data-lit \
|
|
tesseract-ocr-data-mal \
|
|
tesseract-ocr-data-mkd \
|
|
tesseract-ocr-data-mlt \
|
|
tesseract-ocr-data-msa \
|
|
tesseract-ocr-data-nld \
|
|
tesseract-ocr-data-nor \
|
|
tesseract-ocr-data-pol \
|
|
tesseract-ocr-data-por \
|
|
tesseract-ocr-data-ron \
|
|
tesseract-ocr-data-rus \
|
|
tesseract-ocr-data-slk \
|
|
tesseract-ocr-data-slv \
|
|
tesseract-ocr-data-spa \
|
|
tesseract-ocr-data-spa_old \
|
|
tesseract-ocr-data-sqi \
|
|
tesseract-ocr-data-srp \
|
|
tesseract-ocr-data-swa \
|
|
tesseract-ocr-data-swe \
|
|
tesseract-ocr-data-tam \
|
|
tesseract-ocr-data-tel \
|
|
tesseract-ocr-data-tgl \
|
|
tesseract-ocr-data-tha \
|
|
tesseract-ocr-data-tur \
|
|
tesseract-ocr-data-ukr \
|
|
tesseract-ocr-data-vie
|
|
|
|
COPY dangerzone.py /usr/local/bin/
|
|
RUN chmod +x /usr/local/bin/dangerzone.py
|
|
|
|
# Add the unprivileged user
|
|
RUN adduser -s /bin/sh -D dangerzone
|
|
USER dangerzone
|
|
|
|
# /tmp/input_file is where the first convert expects the input file to be, and
|
|
# /tmp where it will write the pixel files
|
|
#
|
|
# /dangerzone is where the second script expects files to be put by the first one
|
|
#
|
|
# /safezone is where the wrapper eventually moves the sanitized files.
|
|
VOLUME /dangerzone /tmp/input_file /safezone
|