WIP: Reproducibility

This commit is contained in:
Alex Pyrgiotis 2024-11-25 15:04:22 +02:00
parent 1298e9c398
commit a9e4f5f4fd
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
2 changed files with 248 additions and 58 deletions

View file

@ -1,58 +1,22 @@
###########################################
# Build PyMuPDF
FROM alpine:latest as pymupdf-build
ARG ARCH
ARG REQUIREMENTS_TXT
FROM debian:bookworm-20230904-slim as dangerzone-image
ENV DEBIAN_FRONTEND=noninteractive
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y gcc && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache
# Install PyMuPDF via hash-checked requirements file
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
# PyMuPDF provides non-arm musl wheels only.
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
###########################################
# Download H2ORestart
FROM alpine:latest as h2orestart-dl
ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& H2ORESTART_VERSION="v0.6.6" \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"
###########################################
# Dangerzone image
FROM alpine:latest AS dangerzone-image
# Install dependencies
RUN apk --no-cache -U upgrade && \
apk --no-cache add \
libreoffice \
openjdk8 \
python3 \
py3-magic \
font-noto-cjk
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3-fitz libreoffice-core-nogui python3 python3-magic default-jdk-headless fonts-noto-cjk \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
@ -63,16 +27,28 @@ COPY conversion /opt/dangerzone/dangerzone/conversion
#
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN addgroup -g 1000 dangerzone && \
adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
RUN addgroup --gid 1000 dangerzone \
&& adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
###########################################
# gVisor wrapper image
FROM alpine:latest
FROM debian:bookworm-20230904-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y gcc && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache
RUN apk --no-cache -U upgrade && \
apk --no-cache add python3
RUN apt-get update \
&& apt-get install -y --no-install-recommends python3 wget ca-certificates \
&& rm -rf /var/lib/apt/lists/*
RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
@ -82,8 +58,8 @@ RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(
mv runsc /usr/bin/
# Add the unprivileged `dangerzone` user.
RUN addgroup dangerzone && \
adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
RUN addgroup --gid 1000 dangerzone \
&& adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
# Switch to the dangerzone user for the rest of the script.
USER dangerzone
@ -100,3 +76,114 @@ RUN mkdir /home/dangerzone/.containers
COPY gvisor_wrapper/entrypoint.py /
ENTRYPOINT ["/entrypoint.py"]
############################################
## Build PyMuPDF
#FROM alpine:latest as pymupdf-build
#ARG ARCH
#ARG REQUIREMENTS_TXT
## Install PyMuPDF via hash-checked requirements file
#COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
## PyMuPDF provides non-arm musl wheels only.
## Only install build-dependencies if we are actually building the wheel
#RUN case "$ARCH" in \
# "arm64") \
# # This is required for copying later, but is created only in the pre-built wheels
# mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
# && apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
# *) \
# apk --no-cache add py3-pip ;; \
# esac
#RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
############################################
## Download H2ORestart
#FROM alpine:latest as h2orestart-dl
#ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
#RUN mkdir /libreoffice_ext && cd libreoffice_ext \
# && H2ORESTART_FILENAME=h2orestart.oxt \
# && H2ORESTART_VERSION="v0.6.6" \
# && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
# && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
# && install -dm777 "/usr/lib/libreoffice/share/extensions/"
############################################
## Dangerzone image
#FROM alpine:latest AS dangerzone-image
## Install dependencies
#RUN apk --no-cache -U upgrade && \
# apk --no-cache add \
# libreoffice \
# openjdk8 \
# python3 \
# py3-magic \
# font-noto-cjk
#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
#COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
#COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
#RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
#RUN mkdir -p /opt/dangerzone/dangerzone
#RUN touch /opt/dangerzone/dangerzone/__init__.py
#COPY conversion /opt/dangerzone/dangerzone/conversion
## Add the unprivileged user. Set the UID/GID of the dangerzone user/group to
## 1000, since we will point to it from the OCI config.
##
## NOTE: A tmpfs will be mounted over /home/dangerzone directory,
## so nothing within it from the image will be persisted.
#RUN addgroup -g 1000 dangerzone && \
# adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
############################################
## gVisor wrapper image
#FROM alpine:latest
#RUN apk --no-cache -U upgrade && \
# apk --no-cache add python3
#RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
# wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
# sha512sum -c runsc.sha512 && \
# rm -f runsc.sha512 && \
# chmod 555 runsc && \
# mv runsc /usr/bin/
## Add the unprivileged `dangerzone` user.
#RUN addgroup dangerzone && \
# adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
## Switch to the dangerzone user for the rest of the script.
#USER dangerzone
## Copy the Dangerzone image, as created by the previous steps, into the home
## directory of the `dangerzone` user.
#RUN mkdir /home/dangerzone/dangerzone-image
#COPY --from=dangerzone-image / /home/dangerzone/dangerzone-image/rootfs
## Create a directory that will be used by gVisor as the place where it will
## store the state of its containers.
#RUN mkdir /home/dangerzone/.containers
#COPY gvisor_wrapper/entrypoint.py /
#ENTRYPOINT ["/entrypoint.py"]

103
dangerzone/repro-sources-list.sh Executable file
View file

@ -0,0 +1,103 @@
#!/bin/bash
#
# Copyright The repro-sources-list.sh Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -----------------------------------------------------------------------------
# repro-sources-list.sh:
# configures /etc/apt/sources.list and similar files for installing packages from a snapshot.
#
# This script is expected to be executed inside Dockerfile.
#
# The following distributions are supported:
# - debian:11 (/etc/apt/sources.list)
# - debian:12 (/etc/apt/sources.list.d/debian.sources)
# - ubuntu:22.04 (/etc/apt/sources.list)
# - ubuntu:24.04 (/etc/apt/sources.listd/ubuntu.sources)
# - archlinux (/etc/pacman.d/mirrorlist)
#
# For the further information, see https://github.com/reproducible-containers/repro-sources-list.sh
# -----------------------------------------------------------------------------
set -eux -o pipefail
. /etc/os-release
: "${KEEP_CACHE:=1}"
keep_apt_cache() {
rm -f /etc/apt/apt.conf.d/docker-clean
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache
}
case "${ID}" in
"debian")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.debian.org/archive/}"
: "${BACKPORTS:=}"
if [ -e /etc/apt/sources.list.d/debian.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/debian.sources)}"
rm -f /etc/apt/sources.list.d/debian.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Debian >= 12
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME} main" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian-security/${snapshot} ${VERSION_CODENAME}-security main" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-updates main" >>/etc/apt/sources.list
if [ "${BACKPORTS}" = 1 ]; then echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-backports main" >>/etc/apt/sources.list; fi
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
;;
"ubuntu")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.ubuntu.com/}"
if [ -e /etc/apt/sources.list.d/ubuntu.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/ubuntu.sources)}"
rm -f /etc/apt/sources.list.d/ubuntu.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Ubuntu >= 24.04
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} main restricted" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-backports main restricted universe multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security multiverse" >>/etc/apt/sources.list
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
# http://snapshot.ubuntu.com is redirected to https, so we have to install ca-certificates
export DEBIAN_FRONTEND=noninteractive
apt-get -o Acquire::https::Verify-Peer=false update >&2
apt-get -o Acquire::https::Verify-Peer=false install -y ca-certificates >&2
;;
"arch")
: "${SNAPSHOT_ARCHIVE_BASE:=http://archive.archlinux.org/}"
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /var/log/pacman.log)}"
export SOURCE_DATE_EPOCH
# shellcheck disable=SC2016
date -d "@${SOURCE_DATE_EPOCH}" "+Server = ${SNAPSHOT_ARCHIVE_BASE}repos/%Y/%m/%d/\$repo/os/\$arch" >/etc/pacman.d/mirrorlist
;;
*)
echo >&2 "Unsupported distribution: ${ID}"
exit 1
;;
esac
: "${WRITE_SOURCE_DATE_EPOCH:=/dev/null}"
echo "${SOURCE_DATE_EPOCH}" >"${WRITE_SOURCE_DATE_EPOCH}"
echo "SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH}"