diff --git a/Dockerfile b/Dockerfile index c89c5a4..ecda416 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,58 +1,22 @@ ########################################### # Build PyMuPDF -FROM alpine:latest as pymupdf-build -ARG ARCH -ARG REQUIREMENTS_TXT +FROM debian:bookworm-20230904-slim as dangerzone-image +ENV DEBIAN_FRONTEND=noninteractive +RUN \ + --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + --mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ + repro-sources-list.sh && \ + apt-get update && \ + apt-get install -y gcc && \ + : "Clean up for improving reproducibility (optional)" && \ + rm -rf /var/log/* /var/cache/ldconfig/aux-cache -# Install PyMuPDF via hash-checked requirements file -COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt - -# PyMuPDF provides non-arm musl wheels only. -# Only install build-dependencies if we are actually building the wheel -RUN case "$ARCH" in \ - "arm64") \ - # This is required for copying later, but is created only in the pre-built wheels - mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \ - && apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \ - *) \ - apk --no-cache add py3-pip ;; \ - esac -RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt - - -########################################### -# Download H2ORestart -FROM alpine:latest as h2orestart-dl -ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2 -RUN mkdir /libreoffice_ext && cd libreoffice_ext \ - && H2ORESTART_FILENAME=h2orestart.oxt \ - && H2ORESTART_VERSION="v0.6.6" \ - && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ - && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ - && install -dm777 "/usr/lib/libreoffice/share/extensions/" - - -########################################### -# Dangerzone image - -FROM alpine:latest AS dangerzone-image - -# Install dependencies -RUN apk --no-cache -U upgrade && \ - apk --no-cache add \ - libreoffice \ - openjdk8 \ - python3 \ - py3-magic \ - font-noto-cjk - -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs -COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext - -RUN install -dm777 "/usr/lib/libreoffice/share/extensions/" +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + python3-fitz libreoffice-core-nogui python3 python3-magic default-jdk-headless fonts-noto-cjk \ + && rm -rf /var/lib/apt/lists/* RUN mkdir -p /opt/dangerzone/dangerzone RUN touch /opt/dangerzone/dangerzone/__init__.py @@ -63,16 +27,28 @@ COPY conversion /opt/dangerzone/dangerzone/conversion # # NOTE: A tmpfs will be mounted over /home/dangerzone directory, # so nothing within it from the image will be persisted. -RUN addgroup -g 1000 dangerzone && \ - adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone +RUN addgroup --gid 1000 dangerzone \ + && adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone ########################################### # gVisor wrapper image -FROM alpine:latest +FROM debian:bookworm-20230904-slim +ENV DEBIAN_FRONTEND=noninteractive +RUN \ + --mount=type=cache,target=/var/cache/apt,sharing=locked \ + --mount=type=cache,target=/var/lib/apt,sharing=locked \ + --mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ + repro-sources-list.sh && \ + apt-get update && \ + apt-get install -y gcc && \ + : "Clean up for improving reproducibility (optional)" && \ + rm -rf /var/log/* /var/cache/ldconfig/aux-cache -RUN apk --no-cache -U upgrade && \ - apk --no-cache add python3 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends python3 wget ca-certificates \ + && rm -rf /var/lib/apt/lists/* RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ @@ -82,8 +58,8 @@ RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$( mv runsc /usr/bin/ # Add the unprivileged `dangerzone` user. -RUN addgroup dangerzone && \ - adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone +RUN addgroup --gid 1000 dangerzone \ + && adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone # Switch to the dangerzone user for the rest of the script. USER dangerzone @@ -100,3 +76,114 @@ RUN mkdir /home/dangerzone/.containers COPY gvisor_wrapper/entrypoint.py / ENTRYPOINT ["/entrypoint.py"] + + + + + + + + + +############################################ +## Build PyMuPDF + +#FROM alpine:latest as pymupdf-build +#ARG ARCH +#ARG REQUIREMENTS_TXT + +## Install PyMuPDF via hash-checked requirements file +#COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt + +## PyMuPDF provides non-arm musl wheels only. +## Only install build-dependencies if we are actually building the wheel +#RUN case "$ARCH" in \ +# "arm64") \ +# # This is required for copying later, but is created only in the pre-built wheels +# mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \ +# && apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \ +# *) \ +# apk --no-cache add py3-pip ;; \ +# esac +#RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt + + +############################################ +## Download H2ORestart +#FROM alpine:latest as h2orestart-dl +#ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2 +#RUN mkdir /libreoffice_ext && cd libreoffice_ext \ +# && H2ORESTART_FILENAME=h2orestart.oxt \ +# && H2ORESTART_VERSION="v0.6.6" \ +# && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ +# && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ +# && install -dm777 "/usr/lib/libreoffice/share/extensions/" + + +############################################ +## Dangerzone image + +#FROM alpine:latest AS dangerzone-image + +## Install dependencies +#RUN apk --no-cache -U upgrade && \ +# apk --no-cache add \ +# libreoffice \ +# openjdk8 \ +# python3 \ +# py3-magic \ +# font-noto-cjk + +#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz +#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf +#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs +#COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext + +#RUN install -dm777 "/usr/lib/libreoffice/share/extensions/" + +#RUN mkdir -p /opt/dangerzone/dangerzone +#RUN touch /opt/dangerzone/dangerzone/__init__.py +#COPY conversion /opt/dangerzone/dangerzone/conversion + +## Add the unprivileged user. Set the UID/GID of the dangerzone user/group to +## 1000, since we will point to it from the OCI config. +## +## NOTE: A tmpfs will be mounted over /home/dangerzone directory, +## so nothing within it from the image will be persisted. +#RUN addgroup -g 1000 dangerzone && \ +# adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone + +############################################ +## gVisor wrapper image + +#FROM alpine:latest + +#RUN apk --no-cache -U upgrade && \ +# apk --no-cache add python3 + +#RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ +# wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ +# sha512sum -c runsc.sha512 && \ +# rm -f runsc.sha512 && \ +# chmod 555 runsc && \ +# mv runsc /usr/bin/ + +## Add the unprivileged `dangerzone` user. +#RUN addgroup dangerzone && \ +# adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone + +## Switch to the dangerzone user for the rest of the script. +#USER dangerzone + +## Copy the Dangerzone image, as created by the previous steps, into the home +## directory of the `dangerzone` user. +#RUN mkdir /home/dangerzone/dangerzone-image +#COPY --from=dangerzone-image / /home/dangerzone/dangerzone-image/rootfs + +## Create a directory that will be used by gVisor as the place where it will +## store the state of its containers. +#RUN mkdir /home/dangerzone/.containers + +#COPY gvisor_wrapper/entrypoint.py / + +#ENTRYPOINT ["/entrypoint.py"] diff --git a/dangerzone/repro-sources-list.sh b/dangerzone/repro-sources-list.sh new file mode 100755 index 0000000..ea97e47 --- /dev/null +++ b/dangerzone/repro-sources-list.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# +# Copyright The repro-sources-list.sh Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------------------------------------------------------- +# repro-sources-list.sh: +# configures /etc/apt/sources.list and similar files for installing packages from a snapshot. +# +# This script is expected to be executed inside Dockerfile. +# +# The following distributions are supported: +# - debian:11 (/etc/apt/sources.list) +# - debian:12 (/etc/apt/sources.list.d/debian.sources) +# - ubuntu:22.04 (/etc/apt/sources.list) +# - ubuntu:24.04 (/etc/apt/sources.listd/ubuntu.sources) +# - archlinux (/etc/pacman.d/mirrorlist) +# +# For the further information, see https://github.com/reproducible-containers/repro-sources-list.sh +# ----------------------------------------------------------------------------- + +set -eux -o pipefail + +. /etc/os-release + +: "${KEEP_CACHE:=1}" + +keep_apt_cache() { + rm -f /etc/apt/apt.conf.d/docker-clean + echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache +} + +case "${ID}" in +"debian") + : "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.debian.org/archive/}" + : "${BACKPORTS:=}" + if [ -e /etc/apt/sources.list.d/debian.sources ]; then + : "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/debian.sources)}" + rm -f /etc/apt/sources.list.d/debian.sources + else + : "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}" + fi + snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")" + # TODO: use the new format for Debian >= 12 + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME} main" >/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian-security/${snapshot} ${VERSION_CODENAME}-security main" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-updates main" >>/etc/apt/sources.list + if [ "${BACKPORTS}" = 1 ]; then echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-backports main" >>/etc/apt/sources.list; fi + if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi + ;; +"ubuntu") + : "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.ubuntu.com/}" + if [ -e /etc/apt/sources.list.d/ubuntu.sources ]; then + : "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/ubuntu.sources)}" + rm -f /etc/apt/sources.list.d/ubuntu.sources + else + : "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}" + fi + snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")" + # TODO: use the new format for Ubuntu >= 24.04 + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} main restricted" >/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates main restricted" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} universe" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates universe" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} multiverse" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates multiverse" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-backports main restricted universe multiverse" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security main restricted" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security universe" >>/etc/apt/sources.list + echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security multiverse" >>/etc/apt/sources.list + if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi + # http://snapshot.ubuntu.com is redirected to https, so we have to install ca-certificates + export DEBIAN_FRONTEND=noninteractive + apt-get -o Acquire::https::Verify-Peer=false update >&2 + apt-get -o Acquire::https::Verify-Peer=false install -y ca-certificates >&2 + ;; +"arch") + : "${SNAPSHOT_ARCHIVE_BASE:=http://archive.archlinux.org/}" + : "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /var/log/pacman.log)}" + export SOURCE_DATE_EPOCH + # shellcheck disable=SC2016 + date -d "@${SOURCE_DATE_EPOCH}" "+Server = ${SNAPSHOT_ARCHIVE_BASE}repos/%Y/%m/%d/\$repo/os/\$arch" >/etc/pacman.d/mirrorlist + ;; +*) + echo >&2 "Unsupported distribution: ${ID}" + exit 1 + ;; +esac + +: "${WRITE_SOURCE_DATE_EPOCH:=/dev/null}" +echo "${SOURCE_DATE_EPOCH}" >"${WRITE_SOURCE_DATE_EPOCH}" +echo "SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH}"