Compare commits

...

2 commits

Author SHA1 Message Date
Alex Pyrgiotis
3d6a236099
WIP: Image works! 2024-12-12 22:47:19 +02:00
Alex Pyrgiotis
1efdd6fee9
WIP: Reproducibility 2024-12-09 10:46:22 +02:00
2 changed files with 138 additions and 68 deletions

View file

@ -1,78 +1,29 @@
###########################################
# Build PyMuPDF
FROM alpine:latest as pymupdf-build
ARG ARCH
ARG REQUIREMENTS_TXT
FROM debian:bookworm-20230904-slim as dangerzone-image
ENV DEBIAN_FRONTEND=noninteractive
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y --no-install-recommends python3-fitz libreoffice-nogui libreoffice-java-common python3 python3-magic default-jdk-headless fonts-noto-cjk && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache /var/lib/apt/lists/*
# Install PyMuPDF via hash-checked requirements file
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
RUN mkdir -p /opt/dangerzone/dangerzone && \
touch /opt/dangerzone/dangerzone/__init__.py && \
addgroup --gid 1000 dangerzone && \
adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
# PyMuPDF provides non-arm musl wheels only.
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
###########################################
# Download H2ORestart
FROM alpine:latest as h2orestart-dl
ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& H2ORESTART_VERSION="v0.6.6" \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"
###########################################
# Dangerzone image
FROM alpine:latest AS dangerzone-image
# Install dependencies
RUN apk --no-cache -U upgrade && \
apk --no-cache add \
libreoffice \
openjdk8 \
python3 \
py3-magic \
font-noto-cjk
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
COPY conversion /opt/dangerzone/dangerzone/conversion
# Add the unprivileged user. Set the UID/GID of the dangerzone user/group to
# 1000, since we will point to it from the OCI config.
#
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN addgroup -g 1000 dangerzone && \
adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
###########################################
# gVisor wrapper image
FROM alpine:latest
RUN apk --no-cache -U upgrade && \
apk --no-cache add python3
FROM alpine:latest as gvisor-image
RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
@ -81,9 +32,25 @@ RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(
chmod 555 runsc && \
mv runsc /usr/bin/
# Add the unprivileged `dangerzone` user.
RUN addgroup dangerzone && \
adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
###########################################
# gVisor wrapper image
FROM debian:bookworm-20230904-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y --no-install-recommends python3 && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache /var/lib/apt/lists/*
RUN addgroup --gid 1000 dangerzone && \
adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
COPY --from=gvisor-image /usr/bin/runsc /usr/bin/runsc
# Switch to the dangerzone user for the rest of the script.
USER dangerzone

103
dangerzone/repro-sources-list.sh Executable file
View file

@ -0,0 +1,103 @@
#!/bin/bash
#
# Copyright The repro-sources-list.sh Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -----------------------------------------------------------------------------
# repro-sources-list.sh:
# configures /etc/apt/sources.list and similar files for installing packages from a snapshot.
#
# This script is expected to be executed inside Dockerfile.
#
# The following distributions are supported:
# - debian:11 (/etc/apt/sources.list)
# - debian:12 (/etc/apt/sources.list.d/debian.sources)
# - ubuntu:22.04 (/etc/apt/sources.list)
# - ubuntu:24.04 (/etc/apt/sources.listd/ubuntu.sources)
# - archlinux (/etc/pacman.d/mirrorlist)
#
# For the further information, see https://github.com/reproducible-containers/repro-sources-list.sh
# -----------------------------------------------------------------------------
set -eux -o pipefail
. /etc/os-release
: "${KEEP_CACHE:=1}"
keep_apt_cache() {
rm -f /etc/apt/apt.conf.d/docker-clean
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache
}
case "${ID}" in
"debian")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.debian.org/archive/}"
: "${BACKPORTS:=}"
if [ -e /etc/apt/sources.list.d/debian.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/debian.sources)}"
rm -f /etc/apt/sources.list.d/debian.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Debian >= 12
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME} main" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian-security/${snapshot} ${VERSION_CODENAME}-security main" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-updates main" >>/etc/apt/sources.list
if [ "${BACKPORTS}" = 1 ]; then echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-backports main" >>/etc/apt/sources.list; fi
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
;;
"ubuntu")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.ubuntu.com/}"
if [ -e /etc/apt/sources.list.d/ubuntu.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/ubuntu.sources)}"
rm -f /etc/apt/sources.list.d/ubuntu.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Ubuntu >= 24.04
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} main restricted" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-backports main restricted universe multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security multiverse" >>/etc/apt/sources.list
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
# http://snapshot.ubuntu.com is redirected to https, so we have to install ca-certificates
export DEBIAN_FRONTEND=noninteractive
apt-get -o Acquire::https::Verify-Peer=false update >&2
apt-get -o Acquire::https::Verify-Peer=false install -y ca-certificates >&2
;;
"arch")
: "${SNAPSHOT_ARCHIVE_BASE:=http://archive.archlinux.org/}"
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /var/log/pacman.log)}"
export SOURCE_DATE_EPOCH
# shellcheck disable=SC2016
date -d "@${SOURCE_DATE_EPOCH}" "+Server = ${SNAPSHOT_ARCHIVE_BASE}repos/%Y/%m/%d/\$repo/os/\$arch" >/etc/pacman.d/mirrorlist
;;
*)
echo >&2 "Unsupported distribution: ${ID}"
exit 1
;;
esac
: "${WRITE_SOURCE_DATE_EPOCH:=/dev/null}"
echo "${SOURCE_DATE_EPOCH}" >"${WRITE_SOURCE_DATE_EPOCH}"
echo "SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH}"