mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00

Some documents were reporting the following error when running them over pdftoppm: Syntax Error: Missing language pack for 'Adobe-Japan1' mapping This did not necessarily make the document fail but it could be that some fonts were not properly rendered due to the missing package.
93 lines
2.5 KiB
Docker
93 lines
2.5 KiB
Docker
FROM alpine:latest
|
|
|
|
# Install dependencies
|
|
RUN apk -U upgrade && \
|
|
apk add \
|
|
ghostscript \
|
|
graphicsmagick \
|
|
libreoffice \
|
|
openjdk8 \
|
|
poppler-utils \
|
|
poppler-data \
|
|
python3 \
|
|
py3-magic \
|
|
tesseract-ocr \
|
|
tesseract-ocr-data-afr \
|
|
tesseract-ocr-data-ara \
|
|
tesseract-ocr-data-aze \
|
|
tesseract-ocr-data-bel \
|
|
tesseract-ocr-data-ben \
|
|
tesseract-ocr-data-bul \
|
|
tesseract-ocr-data-cat \
|
|
tesseract-ocr-data-ces \
|
|
tesseract-ocr-data-chi_sim \
|
|
tesseract-ocr-data-chi_tra \
|
|
tesseract-ocr-data-chr \
|
|
tesseract-ocr-data-dan \
|
|
tesseract-ocr-data-deu \
|
|
tesseract-ocr-data-ell \
|
|
tesseract-ocr-data-enm \
|
|
tesseract-ocr-data-epo \
|
|
tesseract-ocr-data-equ \
|
|
tesseract-ocr-data-est \
|
|
tesseract-ocr-data-eus \
|
|
tesseract-ocr-data-fin \
|
|
tesseract-ocr-data-fra \
|
|
tesseract-ocr-data-frk \
|
|
tesseract-ocr-data-frm \
|
|
tesseract-ocr-data-glg \
|
|
tesseract-ocr-data-grc \
|
|
tesseract-ocr-data-heb \
|
|
tesseract-ocr-data-hin \
|
|
tesseract-ocr-data-hrv \
|
|
tesseract-ocr-data-hun \
|
|
tesseract-ocr-data-ind \
|
|
tesseract-ocr-data-isl \
|
|
tesseract-ocr-data-ita \
|
|
tesseract-ocr-data-ita_old \
|
|
tesseract-ocr-data-jpn \
|
|
tesseract-ocr-data-kan \
|
|
tesseract-ocr-data-kat \
|
|
tesseract-ocr-data-kor \
|
|
tesseract-ocr-data-lav \
|
|
tesseract-ocr-data-lit \
|
|
tesseract-ocr-data-mal \
|
|
tesseract-ocr-data-mkd \
|
|
tesseract-ocr-data-mlt \
|
|
tesseract-ocr-data-msa \
|
|
tesseract-ocr-data-nld \
|
|
tesseract-ocr-data-nor \
|
|
tesseract-ocr-data-pol \
|
|
tesseract-ocr-data-por \
|
|
tesseract-ocr-data-ron \
|
|
tesseract-ocr-data-rus \
|
|
tesseract-ocr-data-slk \
|
|
tesseract-ocr-data-slv \
|
|
tesseract-ocr-data-spa \
|
|
tesseract-ocr-data-spa_old \
|
|
tesseract-ocr-data-sqi \
|
|
tesseract-ocr-data-srp \
|
|
tesseract-ocr-data-swa \
|
|
tesseract-ocr-data-swe \
|
|
tesseract-ocr-data-tam \
|
|
tesseract-ocr-data-tel \
|
|
tesseract-ocr-data-tgl \
|
|
tesseract-ocr-data-tha \
|
|
tesseract-ocr-data-tur \
|
|
tesseract-ocr-data-ukr \
|
|
tesseract-ocr-data-vie
|
|
|
|
COPY dangerzone.py /usr/local/bin/
|
|
RUN chmod +x /usr/local/bin/dangerzone.py
|
|
|
|
# Add the unprivileged user
|
|
RUN adduser -s /bin/sh -D dangerzone
|
|
USER dangerzone
|
|
|
|
# /tmp/input_file is where the first convert expects the input file to be, and
|
|
# /tmp where it will write the pixel files
|
|
#
|
|
# /dangerzone is where the second script expects files to be put by the first one
|
|
#
|
|
# /safezone is where the wrapper eventually moves the sanitized files.
|
|
VOLUME /dangerzone /tmp/input_file /safezone
|