dangerzone/Dockerfile
Alex Pyrgiotis 66600f32dc
Remove sources of non-determinism from our image
Make our container image more reproducible, by changing the following in
our Dockerfile:
1. Touch `/etc/apt/sources.list` with a UTC timestamp. Else, builds on
   different countries (!?) may result to different Unix epochs for the
   same date, and therefore different modification time for the
   file.
2. Turn the third column of `/etc/shadow` (date of last password change)
   for the `dangerzone` user into a constant number.
3. Fix r-s file permissions in some copied files, due to inconsistent
   COPY behavior in containerized vs non-containerized Buildkit. This
   requires creating a full file hierarchy in a separate directory (see
   new_root/).
4. Set a specific modification time for the entrypoint script, because
   rewrite-timestamp=true does not overwrite it.
2025-03-20 17:15:15 +02:00

228 lines
10 KiB
Docker

# NOTE: Updating the packages to their latest versions requires bumping the
# Dockerfile args below. For more info about this file, read
# docs/developer/reproducibility.md.
ARG DEBIAN_IMAGE_DATE=20250224
FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim AS dangerzone-image
ARG GVISOR_ARCHIVE_DATE=20250217
ARG DEBIAN_ARCHIVE_DATE=20250226
ARG H2ORESTART_CHECKSUM=452331f8603ef456264bd72db6fa8a11ca72b392019a8135c0b2f3095037d7b1
ARG H2ORESTART_VERSION=v0.7.1
ENV DEBIAN_FRONTEND=noninteractive
# The following way of installing packages is taken from
# https://github.com/reproducible-containers/repro-sources-list.sh/blob/master/Dockerfile.debian-12,
# and adapted to allow installing gVisor from each own repo as well.
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./container_helpers/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
--mount=type=bind,source=./container_helpers/gvisor.key,target=/tmp/gvisor.key \
: "Hacky way to set a date for the Debian snapshot repos" && \
touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list.d/debian.sources && \
touch -d ${DEBIAN_ARCHIVE_DATE}Z /etc/apt/sources.list && \
repro-sources-list.sh && \
: "Setup APT to install gVisor from its separate APT repo" && \
apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg && \
gpg -o /usr/share/keyrings/gvisor-archive-keyring.gpg --dearmor /tmp/gvisor.key && \
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases ${GVISOR_ARCHIVE_DATE} main" > /etc/apt/sources.list.d/gvisor.list && \
: "Install the necessary gVisor and Dangerzone dependencies" && \
apt-get update && \
apt-get install -y --no-install-recommends \
python3 python3-fitz libreoffice-nogui libreoffice-java-common \
python3 python3-magic default-jre-headless fonts-noto-cjk fonts-dejavu \
runsc unzip wget && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/cache/fontconfig/ && \
rm -rf /etc/ssl/certs/java/cacerts && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache
# Download H2ORestart from GitHub using a pinned version and hash. Note that
# it's available in Debian repos, but not in Bookworm yet.
RUN mkdir /opt/libreoffice_ext && cd /opt/libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/" \
&& rm /root/.wget-hsts
# Create an unprivileged user both for gVisor and for running Dangerzone.
# XXX: Make the shadow field "date of last password change" a constant
# number.
RUN addgroup --gid 1000 dangerzone
RUN adduser --uid 1000 --ingroup dangerzone --shell /bin/true \
--disabled-password --home /home/dangerzone dangerzone \
&& chage -d 99999 dangerzone \
&& rm /etc/shadow-
# Copy Dangerzone's conversion logic under /opt/dangerzone, and allow Python to
# import it.
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
# Copy only the Python code, and not any produced .pyc files.
COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/
# Create a directory that will be used by gVisor as the place where it will
# store the state of its containers.
RUN mkdir /home/dangerzone/.containers
###############################################################################
#
# REUSING CONTAINER IMAGES:
# Anatomy of a hack
# ========================
#
# The rest of the Dockerfile aims to do one thing: allow the final container
# image to actually contain two container images; one for the outer container
# (spawned by Podman/Docker Desktop), and one for the inner container (spawned
# by gVisor).
#
# This has already been done in the past, and we explain why and how in the
# design document for gVisor integration (should be in
# `docs/developer/gvisor.md`). In this iteration, we want to also
# achieve the following:
#
# 1. Have a small final image, by sharing some system paths between the inner
# and outer container image using symlinks.
# 2. Allow our security scanning tool to see the contents of the inner
# container image.
# 3. Make the outer container image operational, in the sense that you can use
# `apt` commands and perform a conversion with Dangerzone, outside the
# gVisor sandbox. This is helpful for debugging purposes.
#
# Below we'll explain how our design choices are informed by the above
# sub-goals.
#
# First, to achieve a small container image, we basically need to copy `/etc`,
# `/usr` and `/opt` from the original Dangerzone image to the **inner**
# container image (under `/home/dangerzone/dangerzone-image/rootfs/`)
#
# That's all we need. The rest of the files play no role, and we can actually
# mask them in gVisor's OCI config.
#
# Second, in order to let our security scanner find the installed packages,
# we need to copy the following dirs to the root of the **outer** container
# image:
# * `/etc`, so that the security scanner can detect the image type and its
# sources
# * `/var`, so that the security scanner can have access to the APT database.
#
# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to
# the **inner** one, in order to avoid leaking files like
# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running
# the **outer** container image.
#
# Third, in order to have an operational Debian image, we are _mostly_ covered
# by the dirs we have copied. There's a _rare_ case where during debugging, we
# may want to install a system package that has components in `/etc` and
# `/var`, which will not be available in the **inner** container image. In that
# case, the developer can do the necessary symlinks in the live container.
#
# FILESYSTEM HIERARCHY
# ====================
#
# The above plan leads to the following filesystem hierarchy:
#
# Outer container image:
#
# # ls -l /
# lrwxrwxrwx 1 root root 7 Jan 27 10:46 bin -> usr/bin
# -rwxr-xr-x 1 root root 7764 Jan 24 08:14 entrypoint.py
# drwxr-xr-x 1 root root 4096 Jan 27 10:47 etc
# drwxr-xr-x 1 root root 4096 Jan 27 10:46 home
# lrwxrwxrwx 1 root root 7 Jan 27 10:46 lib -> usr/lib
# lrwxrwxrwx 1 root root 9 Jan 27 10:46 lib64 -> usr/lib64
# drwxr-xr-x 2 root root 4096 Jan 27 10:46 root
# drwxr-xr-x 1 root root 4096 Jan 27 10:47 run
# lrwxrwxrwx 1 root root 8 Jan 27 10:46 sbin -> usr/sbin
# drwxrwxrwx 2 root root 4096 Jan 27 10:46 tmp
# lrwxrwxrwx 1 root root 44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr
# drwxr-xr-x 11 root root 4096 Jan 27 10:47 var
#
# Inner container image:
#
# # ls -l /home/dangerzone/dangerzone-image/rootfs/
# total 12
# lrwxrwxrwx 1 root root 7 Jan 27 10:47 bin -> usr/bin
# drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc
# lrwxrwxrwx 1 root root 7 Jan 27 10:47 lib -> usr/lib
# lrwxrwxrwx 1 root root 9 Jan 27 10:47 lib64 -> usr/lib64
# drwxr-xr-x 4 root root 4096 Jan 27 10:47 opt
# drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr
#
# SYMLINKING /USR
# ===============
#
# It's surprisingly difficult (maybe even borderline impossible), to symlink
# `/usr` to a different path during image build. The problem is that /usr
# is very sensitive, and you can't manipulate it in a live system. That is, I
# haven't found a way to do the following, or something equivalent:
#
# rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr
#
# The `ln` binary, even if you specify it by its full path, cannot run
# (probably because `ld-linux.so` can't be found). For this reason, we have
# to create the symlinks beforehand, in a previous build stage. Then, in an
# empty container image (scratch images), we can copy these symlinks and the
# /usr, and stitch everything together.
###############################################################################
# Create the filesystem hierarchy that will be used to symlink /usr.
RUN mkdir -p \
/new_root \
/new_root/root \
/new_root/run \
/new_root/tmp \
/new_root/home/dangerzone/dangerzone-image/rootfs
# Copy the /etc and /var directories under the new root directory. Also,
# copy /etc/, /opt, and /usr to the Dangerzone image rootfs.
#
# NOTE: We also have to remove the resolv.conf file, in order to not leak any DNS
# servers added there during image build time.
RUN cp -r /etc /var /new_root/ \
&& rm /new_root/etc/resolv.conf
RUN cp -r /etc /opt /usr /new_root/home/dangerzone/dangerzone-image/rootfs \
&& rm /new_root/home/dangerzone/dangerzone-image/rootfs/etc/resolv.conf
RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr
RUN ln -s usr/bin /new_root/bin
RUN ln -s usr/lib /new_root/lib
RUN ln -s usr/lib64 /new_root/lib64
RUN ln -s usr/sbin /new_root/sbin
RUN ln -s usr/bin /new_root/home/dangerzone/dangerzone-image/rootfs/bin
RUN ln -s usr/lib /new_root/home/dangerzone/dangerzone-image/rootfs/lib
RUN ln -s usr/lib64 /new_root/home/dangerzone/dangerzone-image/rootfs/lib64
# Fix permissions in /home/dangerzone, so that our entrypoint script can make
# changes in the following folders.
RUN chown dangerzone:dangerzone \
/new_root/home/dangerzone \
/new_root/home/dangerzone/dangerzone-image/
# Fix permissions in /tmp, so that it can be used by unprivileged users.
RUN chmod 777 /new_root/tmp
COPY container_helpers/entrypoint.py /new_root
# HACK: For reasons that we are not sure yet, we need to explicitly specify the
# modification time of this file.
RUN touch -d ${DEBIAN_ARCHIVE_DATE}Z /new_root/entrypoint.py
## Final image
FROM scratch
# Copy the filesystem hierarchy that we created in the previous stage, so that
# /usr can be a symlink.
COPY --from=dangerzone-image /new_root/ /
# Switch to the dangerzone user for the rest of the script.
USER dangerzone
ENTRYPOINT ["/entrypoint.py"]