Compare commits

...

3 commits

Author SHA1 Message Date
Alex Pyrgiotis
aad17a9150
WIP: Remove non-reproducible parts 2024-12-12 23:13:41 +02:00
Alex Pyrgiotis
60905ce222
WIP: Image works! 2024-12-12 22:48:25 +02:00
Alex Pyrgiotis
7c342ce899
WIP: Reproducibility 2024-12-12 22:48:25 +02:00
3 changed files with 155 additions and 72 deletions

View file

@ -1,78 +1,31 @@
########################################### ###########################################
# Build PyMuPDF # Build PyMuPDF
FROM alpine:latest as pymupdf-build FROM debian:bookworm-20230904-slim as dangerzone-image
ARG ARCH ENV DEBIAN_FRONTEND=noninteractive
ARG REQUIREMENTS_TXT RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y --no-install-recommends python3-fitz libreoffice-nogui libreoffice-java-common python3 python3-magic default-jdk-headless fonts-noto-cjk && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/cache/fontconfig/ && \
rm -rf /etc/ssl/certs/java/cacerts && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache /var/lib/apt/lists/*
# Install PyMuPDF via hash-checked requirements file RUN mkdir -p /opt/dangerzone/dangerzone && \
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt touch /opt/dangerzone/dangerzone/__init__.py && \
addgroup --gid 1000 dangerzone && \
adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
# PyMuPDF provides non-arm musl wheels only. COPY conversion/doc_to_pixels.py conversion/common.py conversion/errors.py conversion/__init__.py /opt/dangerzone/dangerzone/conversion
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
###########################################
# Download H2ORestart
FROM alpine:latest as h2orestart-dl
ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& H2ORESTART_VERSION="v0.6.6" \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"
###########################################
# Dangerzone image
FROM alpine:latest AS dangerzone-image
# Install dependencies
RUN apk --no-cache -U upgrade && \
apk --no-cache add \
libreoffice \
openjdk8 \
python3 \
py3-magic \
font-noto-cjk
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
COPY conversion /opt/dangerzone/dangerzone/conversion
# Add the unprivileged user. Set the UID/GID of the dangerzone user/group to
# 1000, since we will point to it from the OCI config.
#
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN addgroup -g 1000 dangerzone && \
adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
########################################### ###########################################
# gVisor wrapper image # gVisor wrapper image
FROM alpine:latest FROM alpine:latest as gvisor-image
RUN apk --no-cache -U upgrade && \
apk --no-cache add python3
RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
@ -81,9 +34,25 @@ RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(
chmod 555 runsc && \ chmod 555 runsc && \
mv runsc /usr/bin/ mv runsc /usr/bin/
# Add the unprivileged `dangerzone` user. ###########################################
RUN addgroup dangerzone && \ # gVisor wrapper image
adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
FROM debian:bookworm-20230904-slim
ENV DEBIAN_FRONTEND=noninteractive
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
repro-sources-list.sh && \
apt-get update && \
apt-get install -y --no-install-recommends python3 && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache /var/lib/apt/lists/*
RUN addgroup --gid 1000 dangerzone && \
adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone
COPY --from=gvisor-image /usr/bin/runsc /usr/bin/runsc
# Switch to the dangerzone user for the rest of the script. # Switch to the dangerzone user for the rest of the script.
USER dangerzone USER dangerzone

View file

@ -8,6 +8,15 @@ DEFAULT_DPI = 150 # Pixels per inch
INT_BYTES = 2 INT_BYTES = 2
class CommandError(RuntimeError):
def __init__(self, msg, stdout, stderr):
self.stdout = stdout
self.stderr = stderr
msg += f"\n====\nCommand output:\n{stdout}\n=====\nCommand stderr:\n{stderr}\n======"
super().__init__(msg)
def running_on_qubes() -> bool: def running_on_qubes() -> bool:
# https://www.qubes-os.org/faq/#what-is-the-canonical-way-to-detect-qubes-vm # https://www.qubes-os.org/faq/#what-is-the-canonical-way-to-detect-qubes-vm
return os.path.exists("/usr/share/qubes/marker-vm") return os.path.exists("/usr/share/qubes/marker-vm")
@ -96,7 +105,7 @@ class DangerzoneConverter:
Run a command using asyncio.subprocess, consume its standard streams, and return its Run a command using asyncio.subprocess, consume its standard streams, and return its
output in bytes. output in bytes.
:raises RuntimeError: if the process returns a non-zero exit status :raises CommandError: if the process returns a non-zero exit status
""" """
# Start the provided command, and return a handle. The command will run in the # Start the provided command, and return a handle. The command will run in the
# background. # background.
@ -125,13 +134,15 @@ class DangerzoneConverter:
# Wait until the command has finished. Then, verify that the command # Wait until the command has finished. Then, verify that the command
# has completed successfully. In any other case, raise an exception. # has completed successfully. In any other case, raise an exception.
ret = await proc.wait() ret = await proc.wait()
if ret != 0:
raise RuntimeError(error_message)
# Wait until the tasks that consume the command's standard streams have exited as # Wait until the tasks that consume the command's standard streams have exited as
# well, and return their output. # well, and return their output.
stdout = await stdout_task stdout = await stdout_task
stderr = await stderr_task stderr = await stderr_task
if ret != 0:
raise CommandError(error_message, stdout, stderr)
return (stdout, stderr) return (stdout, stderr)
@abstractmethod @abstractmethod

103
dangerzone/repro-sources-list.sh Executable file
View file

@ -0,0 +1,103 @@
#!/bin/bash
#
# Copyright The repro-sources-list.sh Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -----------------------------------------------------------------------------
# repro-sources-list.sh:
# configures /etc/apt/sources.list and similar files for installing packages from a snapshot.
#
# This script is expected to be executed inside Dockerfile.
#
# The following distributions are supported:
# - debian:11 (/etc/apt/sources.list)
# - debian:12 (/etc/apt/sources.list.d/debian.sources)
# - ubuntu:22.04 (/etc/apt/sources.list)
# - ubuntu:24.04 (/etc/apt/sources.listd/ubuntu.sources)
# - archlinux (/etc/pacman.d/mirrorlist)
#
# For the further information, see https://github.com/reproducible-containers/repro-sources-list.sh
# -----------------------------------------------------------------------------
set -eux -o pipefail
. /etc/os-release
: "${KEEP_CACHE:=1}"
keep_apt_cache() {
rm -f /etc/apt/apt.conf.d/docker-clean
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache
}
case "${ID}" in
"debian")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.debian.org/archive/}"
: "${BACKPORTS:=}"
if [ -e /etc/apt/sources.list.d/debian.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/debian.sources)}"
rm -f /etc/apt/sources.list.d/debian.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Debian >= 12
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME} main" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian-security/${snapshot} ${VERSION_CODENAME}-security main" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-updates main" >>/etc/apt/sources.list
if [ "${BACKPORTS}" = 1 ]; then echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}debian/${snapshot} ${VERSION_CODENAME}-backports main" >>/etc/apt/sources.list; fi
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
;;
"ubuntu")
: "${SNAPSHOT_ARCHIVE_BASE:=http://snapshot.ubuntu.com/}"
if [ -e /etc/apt/sources.list.d/ubuntu.sources ]; then
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list.d/ubuntu.sources)}"
rm -f /etc/apt/sources.list.d/ubuntu.sources
else
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /etc/apt/sources.list)}"
fi
snapshot="$(printf "%(%Y%m%dT%H%M%SZ)T\n" "${SOURCE_DATE_EPOCH}")"
# TODO: use the new format for Ubuntu >= 24.04
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} main restricted" >/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME} multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-updates multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-backports main restricted universe multiverse" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security main restricted" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security universe" >>/etc/apt/sources.list
echo "deb [check-valid-until=no] ${SNAPSHOT_ARCHIVE_BASE}ubuntu/${snapshot} ${VERSION_CODENAME}-security multiverse" >>/etc/apt/sources.list
if [ "${KEEP_CACHE}" = 1 ]; then keep_apt_cache; fi
# http://snapshot.ubuntu.com is redirected to https, so we have to install ca-certificates
export DEBIAN_FRONTEND=noninteractive
apt-get -o Acquire::https::Verify-Peer=false update >&2
apt-get -o Acquire::https::Verify-Peer=false install -y ca-certificates >&2
;;
"arch")
: "${SNAPSHOT_ARCHIVE_BASE:=http://archive.archlinux.org/}"
: "${SOURCE_DATE_EPOCH:=$(stat --format=%Y /var/log/pacman.log)}"
export SOURCE_DATE_EPOCH
# shellcheck disable=SC2016
date -d "@${SOURCE_DATE_EPOCH}" "+Server = ${SNAPSHOT_ARCHIVE_BASE}repos/%Y/%m/%d/\$repo/os/\$arch" >/etc/pacman.d/mirrorlist
;;
*)
echo >&2 "Unsupported distribution: ${ID}"
exit 1
;;
esac
: "${WRITE_SOURCE_DATE_EPOCH:=/dev/null}"
echo "${SOURCE_DATE_EPOCH}" >"${WRITE_SOURCE_DATE_EPOCH}"
echo "SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH}"