FROM alpine:latest # Install dependencies RUN apk -U upgrade && \ apk add \ ghostscript \ graphicsmagick \ libreoffice \ openjdk8 \ poppler-utils \ python3 \ py3-magic \ py3-pillow \ sudo \ tesseract-ocr \ tesseract-ocr-data-afr \ tesseract-ocr-data-ara \ tesseract-ocr-data-aze \ tesseract-ocr-data-bel \ tesseract-ocr-data-ben \ tesseract-ocr-data-bul \ tesseract-ocr-data-cat \ tesseract-ocr-data-ces \ tesseract-ocr-data-chi_sim \ tesseract-ocr-data-chi_tra \ tesseract-ocr-data-chr \ tesseract-ocr-data-dan \ tesseract-ocr-data-deu \ tesseract-ocr-data-ell \ tesseract-ocr-data-enm \ tesseract-ocr-data-epo \ tesseract-ocr-data-equ \ tesseract-ocr-data-est \ tesseract-ocr-data-eus \ tesseract-ocr-data-fin \ tesseract-ocr-data-fra \ tesseract-ocr-data-frk \ tesseract-ocr-data-frm \ tesseract-ocr-data-glg \ tesseract-ocr-data-grc \ tesseract-ocr-data-heb \ tesseract-ocr-data-hin \ tesseract-ocr-data-hrv \ tesseract-ocr-data-hun \ tesseract-ocr-data-ind \ tesseract-ocr-data-isl \ tesseract-ocr-data-ita \ tesseract-ocr-data-ita_old \ tesseract-ocr-data-jpn \ tesseract-ocr-data-kan \ tesseract-ocr-data-kat \ tesseract-ocr-data-kor \ tesseract-ocr-data-lav \ tesseract-ocr-data-lit \ tesseract-ocr-data-mal \ tesseract-ocr-data-mkd \ tesseract-ocr-data-mlt \ tesseract-ocr-data-msa \ tesseract-ocr-data-nld \ tesseract-ocr-data-nor \ tesseract-ocr-data-pol \ tesseract-ocr-data-por \ tesseract-ocr-data-ron \ tesseract-ocr-data-rus \ tesseract-ocr-data-slk \ tesseract-ocr-data-slv \ tesseract-ocr-data-spa \ tesseract-ocr-data-spa_old \ tesseract-ocr-data-sqi \ tesseract-ocr-data-srp \ tesseract-ocr-data-swa \ tesseract-ocr-data-swe \ tesseract-ocr-data-tam \ tesseract-ocr-data-tel \ tesseract-ocr-data-tgl \ tesseract-ocr-data-tha \ tesseract-ocr-data-tur \ tesseract-ocr-data-ukr \ tesseract-ocr-data-vie # Install pdftk RUN \ wget https://gitlab.com/pdftk-java/pdftk/-/jobs/924565145/artifacts/raw/build/libs/pdftk-all.jar && \ mv pdftk-all.jar /usr/local/bin && \ chmod +x /usr/local/bin/pdftk-all.jar && \ echo '#!/bin/sh' > /usr/local/bin/pdftk && \ echo '/usr/bin/java -jar "/usr/local/bin/pdftk-all.jar" "$@"' >> /usr/local/bin/pdftk && \ chmod +x /usr/local/bin/pdftk COPY dangerzone.py /usr/local/bin/ COPY document-to-pixels /usr/local/bin/ COPY pixels-to-pdf /usr/local/bin/ # Add the unprivileged user RUN adduser -h /home/user -s /bin/sh -D user # /tmp/input_file is where the first convert expects the input file to be, and # /tmp where it will write the pixel files # # /dangerzone is where the second script expects files to be put by the first one # # /safezone is where the wrapper eventually moves the sanitized files. VOLUME /dangerzone /tmp/input_file /safezone