From 8b1e4c25e78e8e49bf4a81644d978519c54a655c Mon Sep 17 00:00:00 2001
From: Alex Pyrgiotis <alex.p@freedom.press>
Date: Mon, 27 Jan 2025 13:06:46 +0200
Subject: [PATCH] WIP: Allow security scanning

---
 Dockerfile     | 133 ++++++++++++++++++++++++++++++++++++++-----------
 Dockerfile.env |   4 +-
 Dockerfile.in  | 129 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 210 insertions(+), 56 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2dd195f..62f56f8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,8 +6,8 @@ ARG DEBIAN_IMAGE_DATE=20250113
 
 FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as dangerzone-image
 
-ARG GVISOR_ARCHIVE_DATE=20250113
-ARG DEBIAN_ARCHIVE_DATE=20250120
+ARG GVISOR_ARCHIVE_DATE=20250120
+ARG DEBIAN_ARCHIVE_DATE=20250127
 ARG H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132
 ARG H2ORESTART_VERSION=v0.7.0
 
@@ -68,61 +68,138 @@ COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/
 # store the state of its containers.
 RUN mkdir /home/dangerzone/.containers
 
-# XXX: Create a new root hierarchy, that will be used in the final container
+###############################################################################
+#
+#                       REUSING CONTAINER IMAGES:
+#                          Anatomy of a hack
+#                       ========================
+#
+# The rest of the Dockerfile aims to do one thing: allow the final container
+# image to actually contain two container images; one for the outer container
+# (spawned by Podman/Docker Desktop), and one for the inner container (spawned
+# by gVisor).
+#
+# This has already been done in the past, and we explain why and how in the
+# design document for gVisor integration (should be in
+# `docs/developer/gvisor.md`). In this iteration, we want to also
+# achieve the following:
+#
+# 1. Have a small final image, by sharing some system paths between the inner
+#    and outer container image using symlinks.
+# 2. Allow our security scanning tool to see the contents of the inner
+#    container image.
+# 3. Make the outer container image operational, in the sense that you can use
+#    `apt` commands and perform a conversion with Dangerzone, outside the
+#    gVisor sandbox. This is helpful for debugging purposes.
+#
+# Below we'll explain how our design choices are informed by the above
+# sub-goals.
+#
+# First, to achieve a small container image, we basically need to copy `/etc`,
+# `/usr` and `/opt` from the original Dangerzone image to the **inner**
+# container image (under `/home/dangerzone/dangerzone-image/rootfs/`)
+#
+# That's all we need. The rest of the files play no role, and we can actually
+# mask them in gVisor's OCI config.
+#
+# Second, in order to let our security scanner find the installed packages,
+# we need to copy the following dirs to the root of the **outer** container
 # image:
+# * `/etc`, so that the security scanner can detect the image type and its
+#   sources
+# * `/var`, so that the security scanner can have access to the APT database.
 #
-# /bin -> usr/bin
-# /lib -> usr/lib
-# /lib64 -> usr/lib64
-# /root
-# /run
-# /tmp
-# /usr -> /home/dangerzone/dangerzone-image/rootfs/usr/
+# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to
+# the **inner** one, in order to avoid leaking files like
+# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running
+# the **outer** container image.
 #
-# We have to create this hierarchy beforehand because we want to use the same
-# /usr for both the inner and outer container. The problem though is that /usr
-# is very sensitive, and you can't manipulate in a live system. That is, I
+# Third, in order to have an operational Debian image, we are _mostly_ covered
+# by the dirs we have copied. There's a _rare_ case where during debugging, we
+# may want to install a system package that has components in `/etc` and
+# `/var`, which will not be available in the **inner** container image. In that
+# case, the developer can do the necessary symlinks in the live container.
+#
+#                           FILESYSTEM HIERARCHY
+#                           ====================
+#
+# The above plan leads to the following filesystem hierarchy:
+#
+# Outer container image:
+#
+#     # ls -l /
+#     lrwxrwxrwx   1 root   root       7 Jan 27 10:46 bin -> usr/bin
+#     -rwxr-xr-x   1 root   root    7764 Jan 24 08:14 entrypoint.py
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:47 etc
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:46 home
+#     lrwxrwxrwx   1 root   root       7 Jan 27 10:46 lib -> usr/lib
+#     lrwxrwxrwx   1 root   root       9 Jan 27 10:46 lib64 -> usr/lib64
+#     drwxr-xr-x   2 root   root    4096 Jan 27 10:46 root
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:47 run
+#     lrwxrwxrwx   1 root   root       8 Jan 27 10:46 sbin -> usr/sbin
+#     drwxrwxrwx   2 root   root    4096 Jan 27 10:46 tmp
+#     lrwxrwxrwx   1 root   root      44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr
+#     drwxr-xr-x  11 root   root    4096 Jan 27 10:47 var
+#
+# Inner container image:
+#
+#     # ls -l /home/dangerzone/dangerzone-image/rootfs/
+#     total 12
+#     lrwxrwxrwx  1 root root    7 Jan 27 10:47 bin -> usr/bin
+#     drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc
+#     lrwxrwxrwx  1 root root    7 Jan 27 10:47 lib -> usr/lib
+#     lrwxrwxrwx  1 root root    9 Jan 27 10:47 lib64 -> usr/lib64
+#     drwxr-xr-x  4 root root 4096 Jan 27 10:47 opt
+#     drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr
+#
+#                           SYMLINKING /USR
+#                           ===============
+#
+# It's surprisingly difficult (maybe even borderline impossible), to symlink
+# `/usr` to a different path during image build. The problem is that /usr
+# is very sensitive, and you can't manipulate it in a live system. That is, I
 # haven't found a way to do the following, or something equivalent:
 #
 #    rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr
 #
-# So, we prefer to create the symlinks here instead, and create the image
-# manually in the next steps.
+# The `ln` binary, even if you specify it by its full path, cannot run
+# (probably because `ld-linux.so` can't be found). For this reason, we have
+# to create the symlinks beforehand, in a previous build stage. Then, in an
+# empty contianer image (scratch images), we can copy these symlinks and the
+# /usr, and stich everything together.
+###############################################################################
+
+# Create the filesystem hierarchy that will be used to symlink /usr.
+
 RUN mkdir /new_root
 RUN mkdir /new_root/root /new_root/run /new_root/tmp
 RUN chmod 777 /new_root/tmp
-RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /new_root/usr
+RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr
 RUN ln -s usr/bin /new_root/bin
 RUN ln -s usr/lib /new_root/lib
 RUN ln -s usr/lib64 /new_root/lib64
 RUN ln -s usr/sbin /new_root/sbin
 
-# Intermediate layer
-
-FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as debian-utils
-
 ## Final image
 
 FROM scratch
 
-# Copy the filesystem hierarchy that we created in the previous layer, so that
+# Copy the filesystem hierarchy that we created in the previous stage, so that
 # /usr can be a symlink.
 COPY --from=dangerzone-image /new_root/ /
 
-# Copy some files that are necessary to use the outer container image, e.g., in
-# order to run `apt`. We _could_ avoid doing this, but the space cost is very
-# small.
-COPY --from=dangerzone-image /etc/ /etc/
-COPY --from=debian-utils /var/ /var/
-
 # Copy the bare minimum to run Dangerzone in the inner container image.
 COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/
-COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/
 COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/
+COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/
 RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin
 RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib
 RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64
 
+# Copy the bare minimum to let the security scanner find vulnerabilities.
+COPY --from=dangerzone-image /etc/ /etc/
+COPY --from=dangerzone-image /var/ /var/
+
 # Allow our entrypoint script to make changes in the following folders.
 RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/
 
diff --git a/Dockerfile.env b/Dockerfile.env
index 25ff6ff..2ab94bd 100644
--- a/Dockerfile.env
+++ b/Dockerfile.env
@@ -1,9 +1,9 @@
 # Can be bumped to the latest date in https://hub.docker.com/_/debian/tags?name=bookworm-
 DEBIAN_IMAGE_DATE=20250113
 # Can be bumped to today's date
-DEBIAN_ARCHIVE_DATE=20250120
+DEBIAN_ARCHIVE_DATE=20250127
 # Can be bumped to the latest date in https://github.com/google/gvisor/tags
-GVISOR_ARCHIVE_DATE=20250113
+GVISOR_ARCHIVE_DATE=20250120
 # Can be bumped to the latest version and checksum from https://github.com/ebandal/H2Orestart/releases
 H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132
 H2ORESTART_VERSION=v0.7.0
diff --git a/Dockerfile.in b/Dockerfile.in
index eb75eed..af03c89 100644
--- a/Dockerfile.in
+++ b/Dockerfile.in
@@ -68,61 +68,138 @@ COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/
 # store the state of its containers.
 RUN mkdir /home/dangerzone/.containers
 
-# XXX: Create a new root hierarchy, that will be used in the final container
+###############################################################################
+#
+#                       REUSING CONTAINER IMAGES:
+#                          Anatomy of a hack
+#                       ========================
+#
+# The rest of the Dockerfile aims to do one thing: allow the final container
+# image to actually contain two container images; one for the outer container
+# (spawned by Podman/Docker Desktop), and one for the inner container (spawned
+# by gVisor).
+#
+# This has already been done in the past, and we explain why and how in the
+# design document for gVisor integration (should be in
+# `docs/developer/gvisor.md`). In this iteration, we want to also
+# achieve the following:
+#
+# 1. Have a small final image, by sharing some system paths between the inner
+#    and outer container image using symlinks.
+# 2. Allow our security scanning tool to see the contents of the inner
+#    container image.
+# 3. Make the outer container image operational, in the sense that you can use
+#    `apt` commands and perform a conversion with Dangerzone, outside the
+#    gVisor sandbox. This is helpful for debugging purposes.
+#
+# Below we'll explain how our design choices are informed by the above
+# sub-goals.
+#
+# First, to achieve a small container image, we basically need to copy `/etc`,
+# `/usr` and `/opt` from the original Dangerzone image to the **inner**
+# container image (under `/home/dangerzone/dangerzone-image/rootfs/`)
+#
+# That's all we need. The rest of the files play no role, and we can actually
+# mask them in gVisor's OCI config.
+#
+# Second, in order to let our security scanner find the installed packages,
+# we need to copy the following dirs to the root of the **outer** container
 # image:
+# * `/etc`, so that the security scanner can detect the image type and its
+#   sources
+# * `/var`, so that the security scanner can have access to the APT database.
 #
-# /bin -> usr/bin
-# /lib -> usr/lib
-# /lib64 -> usr/lib64
-# /root
-# /run
-# /tmp
-# /usr -> /home/dangerzone/dangerzone-image/rootfs/usr/
+# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to
+# the **inner** one, in order to avoid leaking files like
+# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running
+# the **outer** container image.
 #
-# We have to create this hierarchy beforehand because we want to use the same
-# /usr for both the inner and outer container. The problem though is that /usr
-# is very sensitive, and you can't manipulate in a live system. That is, I
+# Third, in order to have an operational Debian image, we are _mostly_ covered
+# by the dirs we have copied. There's a _rare_ case where during debugging, we
+# may want to install a system package that has components in `/etc` and
+# `/var`, which will not be available in the **inner** container image. In that
+# case, the developer can do the necessary symlinks in the live container.
+#
+#                           FILESYSTEM HIERARCHY
+#                           ====================
+#
+# The above plan leads to the following filesystem hierarchy:
+#
+# Outer container image:
+#
+#     # ls -l /
+#     lrwxrwxrwx   1 root   root       7 Jan 27 10:46 bin -> usr/bin
+#     -rwxr-xr-x   1 root   root    7764 Jan 24 08:14 entrypoint.py
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:47 etc
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:46 home
+#     lrwxrwxrwx   1 root   root       7 Jan 27 10:46 lib -> usr/lib
+#     lrwxrwxrwx   1 root   root       9 Jan 27 10:46 lib64 -> usr/lib64
+#     drwxr-xr-x   2 root   root    4096 Jan 27 10:46 root
+#     drwxr-xr-x   1 root   root    4096 Jan 27 10:47 run
+#     lrwxrwxrwx   1 root   root       8 Jan 27 10:46 sbin -> usr/sbin
+#     drwxrwxrwx   2 root   root    4096 Jan 27 10:46 tmp
+#     lrwxrwxrwx   1 root   root      44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr
+#     drwxr-xr-x  11 root   root    4096 Jan 27 10:47 var
+#
+# Inner container image:
+#
+#     # ls -l /home/dangerzone/dangerzone-image/rootfs/
+#     total 12
+#     lrwxrwxrwx  1 root root    7 Jan 27 10:47 bin -> usr/bin
+#     drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc
+#     lrwxrwxrwx  1 root root    7 Jan 27 10:47 lib -> usr/lib
+#     lrwxrwxrwx  1 root root    9 Jan 27 10:47 lib64 -> usr/lib64
+#     drwxr-xr-x  4 root root 4096 Jan 27 10:47 opt
+#     drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr
+#
+#                           SYMLINKING /USR
+#                           ===============
+#
+# It's surprisingly difficult (maybe even borderline impossible), to symlink
+# `/usr` to a different path during image build. The problem is that /usr
+# is very sensitive, and you can't manipulate it in a live system. That is, I
 # haven't found a way to do the following, or something equivalent:
 #
 #    rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr
 #
-# So, we prefer to create the symlinks here instead, and create the image
-# manually in the next steps.
+# The `ln` binary, even if you specify it by its full path, cannot run
+# (probably because `ld-linux.so` can't be found). For this reason, we have
+# to create the symlinks beforehand, in a previous build stage. Then, in an
+# empty contianer image (scratch images), we can copy these symlinks and the
+# /usr, and stich everything together.
+###############################################################################
+
+# Create the filesystem hierarchy that will be used to symlink /usr.
+
 RUN mkdir /new_root
 RUN mkdir /new_root/root /new_root/run /new_root/tmp
 RUN chmod 777 /new_root/tmp
-RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /new_root/usr
+RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr
 RUN ln -s usr/bin /new_root/bin
 RUN ln -s usr/lib /new_root/lib
 RUN ln -s usr/lib64 /new_root/lib64
 RUN ln -s usr/sbin /new_root/sbin
 
-# Intermediate layer
-
-FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as debian-utils
-
 ## Final image
 
 FROM scratch
 
-# Copy the filesystem hierarchy that we created in the previous layer, so that
+# Copy the filesystem hierarchy that we created in the previous stage, so that
 # /usr can be a symlink.
 COPY --from=dangerzone-image /new_root/ /
 
-# Copy some files that are necessary to use the outer container image, e.g., in
-# order to run `apt`. We _could_ avoid doing this, but the space cost is very
-# small.
-COPY --from=dangerzone-image /etc/ /etc/
-COPY --from=debian-utils /var/ /var/
-
 # Copy the bare minimum to run Dangerzone in the inner container image.
 COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/
-COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/
 COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/
+COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/
 RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin
 RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib
 RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64
 
+# Copy the bare minimum to let the security scanner find vulnerabilities.
+COPY --from=dangerzone-image /etc/ /etc/
+COPY --from=dangerzone-image /var/ /var/
+
 # Allow our entrypoint script to make changes in the following folders.
 RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/