Completely overhaul the reproduce-image.py script

Make a major change to the `reproduce-image.py` script: drop `diffoci`,
build the container image, and ensure it has the exact same hash as the
source image.

We can drop the `diffoci` script when comparing the two images, because
we are now able build bit-for-bit reproducible images.
This commit is contained in:
Alex Pyrgiotis 2025-02-26 18:32:07 +02:00
parent a1402d5b6b
commit e1dbdff1da
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
2 changed files with 73 additions and 138 deletions

View file

@ -4,6 +4,7 @@ import argparse
import hashlib import hashlib
import logging import logging
import pathlib import pathlib
import platform
import stat import stat
import subprocess import subprocess
import sys import sys
@ -11,131 +12,72 @@ import urllib.request
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DIFFOCI_URL = "https://github.com/reproducible-containers/diffoci/releases/download/v0.1.5/diffoci-v0.1.5.linux-amd64" if platform.system() in ["Darwin", "Windows"]:
DIFFOCI_CHECKSUM = "01d25fe690196945a6bd510d30559338aa489c034d3a1b895a0d82a4b860698f" CONTAINER_RUNTIME = "docker"
DIFFOCI_PATH = ( elif platform.system() == "Linux":
pathlib.Path.home() / ".local" / "share" / "dangerzone-dev" / "helpers" / "diffoci" CONTAINER_RUNTIME = "podman"
)
IMAGE_NAME = "dangerzone.rocks/dangerzone"
def run(*args): def run(*args):
"""Simple function that runs a command, validates it, and returns the output""" """Simple function that runs a command and checks the result."""
logger.debug(f"Running command: {' '.join(args)}") logger.debug(f"Running command: {' '.join(args)}")
return subprocess.run( return subprocess.run(args, check=True)
args,
check=True,
stdout=subprocess.PIPE,
).stdout
def git_commit_get(): def build_image(
return run("git", "rev-parse", "--short", "HEAD").decode().strip() platform=None,
runtime=None,
cache=True,
def git_determine_tag(): date=None,
return run("git", "describe", "--long", "--first-parent").decode().strip()[1:] ):
def git_verify(commit, source):
if not commit in source:
raise RuntimeError(
f"Image '{source}' does not seem to be built from commit '{commit}'"
)
def diffoci_hash_matches(diffoci):
"""Check if the hash of the downloaded diffoci bin matches the expected one."""
m = hashlib.sha256()
m.update(diffoci)
diffoci_checksum = m.hexdigest()
return diffoci_checksum == DIFFOCI_CHECKSUM
def diffoci_is_installed():
"""Determine if diffoci has been installed.
Determine if diffoci has been installed, by checking if the binary exists, and if
its hash is the expected one. If the binary exists but the hash is different, then
this is a sign that we need to update the local diffoci binary.
"""
if not DIFFOCI_PATH.exists():
return False
return diffoci_hash_matches(DIFFOCI_PATH.open("rb").read())
def diffoci_download():
"""Download the diffoci tool, based on a URL and its checksum."""
with urllib.request.urlopen(DIFFOCI_URL) as f:
diffoci_bin = f.read()
if not diffoci_hash_matches(diffoci_bin):
raise ValueError(
"Unexpected checksum for downloaded diffoci binary:"
f" {diffoci_checksum} !={DIFFOCI_CHECKSUM}"
)
DIFFOCI_PATH.parent.mkdir(parents=True, exist_ok=True)
DIFFOCI_PATH.open("wb+").write(diffoci_bin)
DIFFOCI_PATH.chmod(DIFFOCI_PATH.stat().st_mode | stat.S_IEXEC)
def diffoci_diff(source, local_target):
"""Diff the source image against the recently built target image using diffoci."""
target = f"podman://{local_target}"
try:
return run(
str(DIFFOCI_PATH),
"diff",
source,
target,
"--semantic",
"--verbose",
)
except subprocess.CalledProcessError as e:
error = e.stdout.decode()
raise RuntimeError(
f"Could not rebuild an identical image to {source}. Diffoci report:\n{error}"
)
def build_image(tag, use_cache=False):
"""Build the Dangerzone container image with a special tag.""" """Build the Dangerzone container image with a special tag."""
platform_args = [] if not platform else ["--platform", platform]
runtime_args = [] if not runtime else ["--runtime", runtime]
cache_args = [] if cache else ["--use-cache", "no"]
date_args = [] if not date else ["--debian-archive-date", date]
run( run(
"python3", "python3",
"./install/common/build-image.py", "./install/common/build-image.py",
"--no-save", *platform_args,
"--use-cache", *runtime_args,
str(use_cache), *cache_args,
"--tag", *date_args,
tag,
) )
def parse_args(): def parse_args():
image_tag = git_determine_tag()
# TODO: Remove the local "podman://" prefix once we have started pushing images to a
# remote.
default_image_name = f"podman://{IMAGE_NAME}:{image_tag}"
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog=sys.argv[0], prog=sys.argv[0],
description="Dev script for verifying container image reproducibility", description="Dev script for verifying container image reproducibility",
) )
parser.add_argument( parser.add_argument(
"--source", "--platform",
default=default_image_name, default=None,
help=f"The platform for building the image (default: current platform)",
)
parser.add_argument(
"--runtime",
choices=["docker", "podman"],
default=CONTAINER_RUNTIME,
help=f"The container runtime for building the image (default: {CONTAINER_RUNTIME})",
)
parser.add_argument(
"--no-cache",
default=False,
action="store_true",
help=( help=(
"The name of the image that you want to reproduce. If the image resides in" "Do not use existing cached images for the container build."
" the local Docker / Podman engine, you can prefix it with podman:// or" " Build from the start with a new set of cached layers."
f" docker:// accordingly (default: {default_image_name})"
), ),
) )
parser.add_argument( parser.add_argument(
"--use-cache", "--debian-archive-date",
default=False, default=None,
action="store_true", help="Use a specific Debian snapshot archive, by its date",
help="Whether to reuse the build cache (off by default for better reproducibility)", )
parser.add_argument(
"digest",
help="The digest of the image that you want to reproduce",
) )
return parser.parse_args() return parser.parse_args()
@ -148,32 +90,25 @@ def main():
) )
args = parse_args() args = parse_args()
logger.info(f"Ensuring that current Git commit matches image '{args.source}'") logger.info(f"Building container image")
commit = git_commit_get() build_image(
git_verify(commit, args.source) args.platform,
args.runtime,
if not diffoci_is_installed(): not args.no_cache,
logger.info(f"Downloading diffoci helper from {DIFFOCI_URL}") args.debian_archive_date,
diffoci_download() )
tag = f"reproduce-{commit}"
target = f"{IMAGE_NAME}:{tag}"
logger.info(f"Building container image and tagging it as '{target}'")
build_image(tag, args.use_cache)
logger.info( logger.info(
f"Ensuring that source image '{args.source}' is semantically identical with" f"Check that the reproduced image has the expected digest: {args.digest}"
f" built image '{target}'" )
run(
"./dev_scripts/repro-build.py",
"analyze",
"--show-contents",
"share/container.tar",
"--expected-image-digest",
args.digest,
) )
try:
diffoci_diff(args.source, target)
except subprocess.CalledProcessError as e:
raise RuntimeError(
f"Could not reproduce image {args.source} for commit {commit}"
)
breakpoint()
logger.info(f"Successfully reproduced image '{args.source}' from commit '{commit}'")
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -47,21 +47,21 @@ trigger a CI error.
For a simple way to reproduce a Dangerzone container image, you can checkout the For a simple way to reproduce a Dangerzone container image, you can checkout the
commit this image was built from (you can find it from the image tag in its commit this image was built from (you can find it from the image tag in its
`g<commit>` portion), and run the following command in a Linux environment: `g<commit>` portion), retrieve the date it was built (also included in the image
tag), and run the following command in any environment:
``` ```
./dev_scripts/reproduce-image.py --source <image> ./dev_scripts/reproduce-image.py \
--debian-archive-date <date> \
<digest>
``` ```
This command will download the `diffoci` helper, build a container image from where:
the current Git commit, and ensure that the built image matches the source one, * `<date>` should be given in YYYYMMDD format, e.g, 20250226
with the exception of image names and file timestamps. * `<digest>` is the SHA-256 hash of the image for the **current platform**, with
or without the `sha256:` prefix.
> [!TIP] This command will build a container image from the current Git commit and the
> If the source image is not pushed to a registry, and is local instead, you provided date for the Debian archives. Then, it will compare the digest of the
> can prefix it with `docker://` or `podman://` accordingly, so that `diffoci` manifest against the provided one. This is a simple way to ensure that the
> can load it from the local Docker / Podman container engine. For example: created image is bit-for-bit reproducible.
>
> ```
> ./dev_scripts/reproduce.py --source podman://dangerzone.rocks/dangerzone:0.8.0-125-g725ce3b
> ```