From 8b1e4c25e78e8e49bf4a81644d978519c54a655c Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Mon, 27 Jan 2025 13:06:46 +0200 Subject: [PATCH] WIP: Allow security scanning --- Dockerfile | 133 ++++++++++++++++++++++++++++++++++++++----------- Dockerfile.env | 4 +- Dockerfile.in | 129 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 210 insertions(+), 56 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2dd195f..62f56f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,8 +6,8 @@ ARG DEBIAN_IMAGE_DATE=20250113 FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as dangerzone-image -ARG GVISOR_ARCHIVE_DATE=20250113 -ARG DEBIAN_ARCHIVE_DATE=20250120 +ARG GVISOR_ARCHIVE_DATE=20250120 +ARG DEBIAN_ARCHIVE_DATE=20250127 ARG H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132 ARG H2ORESTART_VERSION=v0.7.0 @@ -68,61 +68,138 @@ COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/ # store the state of its containers. RUN mkdir /home/dangerzone/.containers -# XXX: Create a new root hierarchy, that will be used in the final container +############################################################################### +# +# REUSING CONTAINER IMAGES: +# Anatomy of a hack +# ======================== +# +# The rest of the Dockerfile aims to do one thing: allow the final container +# image to actually contain two container images; one for the outer container +# (spawned by Podman/Docker Desktop), and one for the inner container (spawned +# by gVisor). +# +# This has already been done in the past, and we explain why and how in the +# design document for gVisor integration (should be in +# `docs/developer/gvisor.md`). In this iteration, we want to also +# achieve the following: +# +# 1. Have a small final image, by sharing some system paths between the inner +# and outer container image using symlinks. +# 2. Allow our security scanning tool to see the contents of the inner +# container image. +# 3. Make the outer container image operational, in the sense that you can use +# `apt` commands and perform a conversion with Dangerzone, outside the +# gVisor sandbox. This is helpful for debugging purposes. +# +# Below we'll explain how our design choices are informed by the above +# sub-goals. +# +# First, to achieve a small container image, we basically need to copy `/etc`, +# `/usr` and `/opt` from the original Dangerzone image to the **inner** +# container image (under `/home/dangerzone/dangerzone-image/rootfs/`) +# +# That's all we need. The rest of the files play no role, and we can actually +# mask them in gVisor's OCI config. +# +# Second, in order to let our security scanner find the installed packages, +# we need to copy the following dirs to the root of the **outer** container # image: +# * `/etc`, so that the security scanner can detect the image type and its +# sources +# * `/var`, so that the security scanner can have access to the APT database. # -# /bin -> usr/bin -# /lib -> usr/lib -# /lib64 -> usr/lib64 -# /root -# /run -# /tmp -# /usr -> /home/dangerzone/dangerzone-image/rootfs/usr/ +# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to +# the **inner** one, in order to avoid leaking files like +# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running +# the **outer** container image. # -# We have to create this hierarchy beforehand because we want to use the same -# /usr for both the inner and outer container. The problem though is that /usr -# is very sensitive, and you can't manipulate in a live system. That is, I +# Third, in order to have an operational Debian image, we are _mostly_ covered +# by the dirs we have copied. There's a _rare_ case where during debugging, we +# may want to install a system package that has components in `/etc` and +# `/var`, which will not be available in the **inner** container image. In that +# case, the developer can do the necessary symlinks in the live container. +# +# FILESYSTEM HIERARCHY +# ==================== +# +# The above plan leads to the following filesystem hierarchy: +# +# Outer container image: +# +# # ls -l / +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 bin -> usr/bin +# -rwxr-xr-x 1 root root 7764 Jan 24 08:14 entrypoint.py +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 etc +# drwxr-xr-x 1 root root 4096 Jan 27 10:46 home +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:46 lib64 -> usr/lib64 +# drwxr-xr-x 2 root root 4096 Jan 27 10:46 root +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 run +# lrwxrwxrwx 1 root root 8 Jan 27 10:46 sbin -> usr/sbin +# drwxrwxrwx 2 root root 4096 Jan 27 10:46 tmp +# lrwxrwxrwx 1 root root 44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr +# drwxr-xr-x 11 root root 4096 Jan 27 10:47 var +# +# Inner container image: +# +# # ls -l /home/dangerzone/dangerzone-image/rootfs/ +# total 12 +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 bin -> usr/bin +# drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:47 lib64 -> usr/lib64 +# drwxr-xr-x 4 root root 4096 Jan 27 10:47 opt +# drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr +# +# SYMLINKING /USR +# =============== +# +# It's surprisingly difficult (maybe even borderline impossible), to symlink +# `/usr` to a different path during image build. The problem is that /usr +# is very sensitive, and you can't manipulate it in a live system. That is, I # haven't found a way to do the following, or something equivalent: # # rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr # -# So, we prefer to create the symlinks here instead, and create the image -# manually in the next steps. +# The `ln` binary, even if you specify it by its full path, cannot run +# (probably because `ld-linux.so` can't be found). For this reason, we have +# to create the symlinks beforehand, in a previous build stage. Then, in an +# empty contianer image (scratch images), we can copy these symlinks and the +# /usr, and stich everything together. +############################################################################### + +# Create the filesystem hierarchy that will be used to symlink /usr. + RUN mkdir /new_root RUN mkdir /new_root/root /new_root/run /new_root/tmp RUN chmod 777 /new_root/tmp -RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /new_root/usr +RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr RUN ln -s usr/bin /new_root/bin RUN ln -s usr/lib /new_root/lib RUN ln -s usr/lib64 /new_root/lib64 RUN ln -s usr/sbin /new_root/sbin -# Intermediate layer - -FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as debian-utils - ## Final image FROM scratch -# Copy the filesystem hierarchy that we created in the previous layer, so that +# Copy the filesystem hierarchy that we created in the previous stage, so that # /usr can be a symlink. COPY --from=dangerzone-image /new_root/ / -# Copy some files that are necessary to use the outer container image, e.g., in -# order to run `apt`. We _could_ avoid doing this, but the space cost is very -# small. -COPY --from=dangerzone-image /etc/ /etc/ -COPY --from=debian-utils /var/ /var/ - # Copy the bare minimum to run Dangerzone in the inner container image. COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ -COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ +COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 +# Copy the bare minimum to let the security scanner find vulnerabilities. +COPY --from=dangerzone-image /etc/ /etc/ +COPY --from=dangerzone-image /var/ /var/ + # Allow our entrypoint script to make changes in the following folders. RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/ diff --git a/Dockerfile.env b/Dockerfile.env index 25ff6ff..2ab94bd 100644 --- a/Dockerfile.env +++ b/Dockerfile.env @@ -1,9 +1,9 @@ # Can be bumped to the latest date in https://hub.docker.com/_/debian/tags?name=bookworm- DEBIAN_IMAGE_DATE=20250113 # Can be bumped to today's date -DEBIAN_ARCHIVE_DATE=20250120 +DEBIAN_ARCHIVE_DATE=20250127 # Can be bumped to the latest date in https://github.com/google/gvisor/tags -GVISOR_ARCHIVE_DATE=20250113 +GVISOR_ARCHIVE_DATE=20250120 # Can be bumped to the latest version and checksum from https://github.com/ebandal/H2Orestart/releases H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132 H2ORESTART_VERSION=v0.7.0 diff --git a/Dockerfile.in b/Dockerfile.in index eb75eed..af03c89 100644 --- a/Dockerfile.in +++ b/Dockerfile.in @@ -68,61 +68,138 @@ COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/ # store the state of its containers. RUN mkdir /home/dangerzone/.containers -# XXX: Create a new root hierarchy, that will be used in the final container +############################################################################### +# +# REUSING CONTAINER IMAGES: +# Anatomy of a hack +# ======================== +# +# The rest of the Dockerfile aims to do one thing: allow the final container +# image to actually contain two container images; one for the outer container +# (spawned by Podman/Docker Desktop), and one for the inner container (spawned +# by gVisor). +# +# This has already been done in the past, and we explain why and how in the +# design document for gVisor integration (should be in +# `docs/developer/gvisor.md`). In this iteration, we want to also +# achieve the following: +# +# 1. Have a small final image, by sharing some system paths between the inner +# and outer container image using symlinks. +# 2. Allow our security scanning tool to see the contents of the inner +# container image. +# 3. Make the outer container image operational, in the sense that you can use +# `apt` commands and perform a conversion with Dangerzone, outside the +# gVisor sandbox. This is helpful for debugging purposes. +# +# Below we'll explain how our design choices are informed by the above +# sub-goals. +# +# First, to achieve a small container image, we basically need to copy `/etc`, +# `/usr` and `/opt` from the original Dangerzone image to the **inner** +# container image (under `/home/dangerzone/dangerzone-image/rootfs/`) +# +# That's all we need. The rest of the files play no role, and we can actually +# mask them in gVisor's OCI config. +# +# Second, in order to let our security scanner find the installed packages, +# we need to copy the following dirs to the root of the **outer** container # image: +# * `/etc`, so that the security scanner can detect the image type and its +# sources +# * `/var`, so that the security scanner can have access to the APT database. # -# /bin -> usr/bin -# /lib -> usr/lib -# /lib64 -> usr/lib64 -# /root -# /run -# /tmp -# /usr -> /home/dangerzone/dangerzone-image/rootfs/usr/ +# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to +# the **inner** one, in order to avoid leaking files like +# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running +# the **outer** container image. # -# We have to create this hierarchy beforehand because we want to use the same -# /usr for both the inner and outer container. The problem though is that /usr -# is very sensitive, and you can't manipulate in a live system. That is, I +# Third, in order to have an operational Debian image, we are _mostly_ covered +# by the dirs we have copied. There's a _rare_ case where during debugging, we +# may want to install a system package that has components in `/etc` and +# `/var`, which will not be available in the **inner** container image. In that +# case, the developer can do the necessary symlinks in the live container. +# +# FILESYSTEM HIERARCHY +# ==================== +# +# The above plan leads to the following filesystem hierarchy: +# +# Outer container image: +# +# # ls -l / +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 bin -> usr/bin +# -rwxr-xr-x 1 root root 7764 Jan 24 08:14 entrypoint.py +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 etc +# drwxr-xr-x 1 root root 4096 Jan 27 10:46 home +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:46 lib64 -> usr/lib64 +# drwxr-xr-x 2 root root 4096 Jan 27 10:46 root +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 run +# lrwxrwxrwx 1 root root 8 Jan 27 10:46 sbin -> usr/sbin +# drwxrwxrwx 2 root root 4096 Jan 27 10:46 tmp +# lrwxrwxrwx 1 root root 44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr +# drwxr-xr-x 11 root root 4096 Jan 27 10:47 var +# +# Inner container image: +# +# # ls -l /home/dangerzone/dangerzone-image/rootfs/ +# total 12 +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 bin -> usr/bin +# drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:47 lib64 -> usr/lib64 +# drwxr-xr-x 4 root root 4096 Jan 27 10:47 opt +# drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr +# +# SYMLINKING /USR +# =============== +# +# It's surprisingly difficult (maybe even borderline impossible), to symlink +# `/usr` to a different path during image build. The problem is that /usr +# is very sensitive, and you can't manipulate it in a live system. That is, I # haven't found a way to do the following, or something equivalent: # # rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr # -# So, we prefer to create the symlinks here instead, and create the image -# manually in the next steps. +# The `ln` binary, even if you specify it by its full path, cannot run +# (probably because `ld-linux.so` can't be found). For this reason, we have +# to create the symlinks beforehand, in a previous build stage. Then, in an +# empty contianer image (scratch images), we can copy these symlinks and the +# /usr, and stich everything together. +############################################################################### + +# Create the filesystem hierarchy that will be used to symlink /usr. + RUN mkdir /new_root RUN mkdir /new_root/root /new_root/run /new_root/tmp RUN chmod 777 /new_root/tmp -RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /new_root/usr +RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr RUN ln -s usr/bin /new_root/bin RUN ln -s usr/lib /new_root/lib RUN ln -s usr/lib64 /new_root/lib64 RUN ln -s usr/sbin /new_root/sbin -# Intermediate layer - -FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as debian-utils - ## Final image FROM scratch -# Copy the filesystem hierarchy that we created in the previous layer, so that +# Copy the filesystem hierarchy that we created in the previous stage, so that # /usr can be a symlink. COPY --from=dangerzone-image /new_root/ / -# Copy some files that are necessary to use the outer container image, e.g., in -# order to run `apt`. We _could_ avoid doing this, but the space cost is very -# small. -COPY --from=dangerzone-image /etc/ /etc/ -COPY --from=debian-utils /var/ /var/ - # Copy the bare minimum to run Dangerzone in the inner container image. COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ -COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ +COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 +# Copy the bare minimum to let the security scanner find vulnerabilities. +COPY --from=dangerzone-image /etc/ /etc/ +COPY --from=dangerzone-image /var/ /var/ + # Allow our entrypoint script to make changes in the following folders. RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/