mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00

The minimum python version when installing from source is now python 3.9, as Pyside6 6.7.1 dropped support for python 3.8 (see #780 for more information). On Debian-derivatives distributions, the minimum Python version is now set to 3.8. In practice, because Pyside6 is not packaged for Debian, we use Pyside2 [0], which is why we can relax the python version requirement. In practice, when installing from source on an environment where python3.9 is not the default python, poetry will look for it and use it if available > For various reasons, this Python version might not be compatible with > the python range supported by the project. In this case, Poetry will > try to find one that is and use it. > > [Poetry docs](https://python-poetry.org/docs/managing-environments/) On Ubuntu Focal (20.04) where Python 3.9 is not installed by default, it is possible to install it using the `python3.9` package. Additionally, In version 1.24.3, PyMuPDF changed its package name from `fitz` to `pymupdf` [2], resulting in a breakage on how it is installed in our container. This is now fixed. [0] More information on how Pyside6 packaging affects dangerzone on #221 [1] See [the current status of Pyside6 packaging](https://repology.org/ project/python:pyside6/packages) [2] PyMuPDF changelog: https://pymupdf.readthedocs.io/en/latest/changes.html#change-log
82 lines
3.2 KiB
Docker
82 lines
3.2 KiB
Docker
###########################################
|
|
# Build PyMuPDF
|
|
|
|
FROM alpine:latest as pymupdf-build
|
|
|
|
ARG REQUIREMENTS_TXT
|
|
|
|
# Install PyMuPDF via hash-checked requirements file
|
|
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
|
|
RUN apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev
|
|
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
|
|
|
|
|
|
###########################################
|
|
# Download Tesseract data
|
|
|
|
FROM alpine:latest as tessdata-dl
|
|
ARG TESSDATA_CHECKSUM=d0e3bb6f3b4e75748680524a1d116f2bfb145618f8ceed55b279d15098a530f9
|
|
|
|
# Download the trained models from the latest GitHub release of Tesseract, and
|
|
# store them under /usr/share/tessdata. This is basically what distro packages
|
|
# do under the hood.
|
|
#
|
|
# Because the GitHub release contains more files than just the trained models,
|
|
# we use `find` to fetch only the '*.traineddata' files in the top directory.
|
|
#
|
|
# Before we untar the models, we also check if the checksum is the expected one.
|
|
RUN mkdir /usr/share/tessdata/ && mkdir tessdata && cd tessdata \
|
|
&& TESSDATA_VERSION=$(wget -O- -nv https://api.github.com/repos/tesseract-ocr/tessdata_fast/releases/latest \
|
|
| sed -n 's/^.*"tag_name": "\([0-9.]\+\)".*$/\1/p') \
|
|
&& wget https://github.com/tesseract-ocr/tessdata_fast/archive/$TESSDATA_VERSION/tessdata_fast-$TESSDATA_VERSION.tar.gz \
|
|
&& echo "$TESSDATA_CHECKSUM tessdata_fast-$TESSDATA_VERSION.tar.gz" | sha256sum -c \
|
|
&& tar -xzvf tessdata_fast-$TESSDATA_VERSION.tar.gz -C . \
|
|
&& find . -name '*.traineddata' -maxdepth 2 -exec cp {} /usr/share/tessdata/ \; \
|
|
&& cd .. && rm -r tessdata
|
|
|
|
|
|
###########################################
|
|
# Download H2ORestart
|
|
FROM alpine:latest as h2orestart-dl
|
|
ARG H2ORESTART_CHECKSUM=5db816a1e57b510456633f55e693cb5ef3675ef8b35df4f31c90ab9d4c66071a
|
|
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
|
|
&& H2ORESTART_FILENAME=h2orestart.oxt \
|
|
&& H2ORESTART_VERSION="v0.5.7" \
|
|
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
|
|
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
|
|
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"
|
|
|
|
|
|
###########################################
|
|
# Dangerzone image
|
|
|
|
FROM alpine:latest
|
|
|
|
# Install dependencies
|
|
RUN apk --no-cache -U upgrade && \
|
|
apk --no-cache add \
|
|
libreoffice \
|
|
openjdk8 \
|
|
python3 \
|
|
py3-magic \
|
|
font-noto-cjk
|
|
|
|
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
|
|
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
|
|
COPY --from=tessdata-dl /usr/share/tessdata/ /usr/share/tessdata
|
|
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
|
|
|
|
RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
|
|
|
|
ENV PYTHONPATH=/opt/dangerzone
|
|
|
|
RUN mkdir -p /opt/dangerzone/dangerzone
|
|
RUN touch /opt/dangerzone/dangerzone/__init__.py
|
|
COPY conversion /opt/dangerzone/dangerzone/conversion
|
|
|
|
# Add the unprivileged user
|
|
RUN adduser -s /bin/sh -D dangerzone
|
|
USER dangerzone
|
|
|
|
# /safezone is a directory through which Pixels to PDF receives files
|
|
VOLUME /safezone
|