#!/usr/bin/env python3 ################### # Copied from: # https://github.com/freedomofpress/repro-build/blob/8f85db91a5595bf29d9ba22f6021aca77c1037a8/repro-build import argparse import datetime import hashlib import json import logging import os import pprint import shlex import shutil import subprocess import sys import tarfile from pathlib import Path logger = logging.getLogger(__name__) MEDIA_TYPE_INDEX_V1_JSON = "application/vnd.oci.image.index.v1+json" MEDIA_TYPE_MANIFEST_V1_JSON = "application/vnd.oci.image.manifest.v1+json" ENV_RUNTIME = "REPRO_RUNTIME" ENV_DATETIME = "REPRO_DATETIME" ENV_SDE = "REPRO_SOURCE_DATE_EPOCH" ENV_CACHE = "REPRO_CACHE" ENV_BUILDKIT = "REPRO_BUILDKIT_IMAGE" ENV_ROOTLESS = "REPRO_ROOTLESS" DEFAULT_BUILDKIT_IMAGE = "moby/buildkit:v0.19.0@sha256:14aa1b4dd92ea0a4cd03a54d0c6079046ea98cd0c0ae6176bdd7036ba370cbbe" DEFAULT_BUILDKIT_IMAGE_ROOTLESS = "moby/buildkit:v0.19.0-rootless@sha256:e901cffdad753892a7c3afb8b9972549fca02c73888cf340c91ed801fdd96d71" MSG_BUILD_CTX = """Build environment: - Container runtime: {runtime} - Buildkit image: {buildkit_image} - Rootless support: {rootless} - Caching enabled: {use_cache} - Build context: {context} - Dockerfile: {dockerfile} - Output: {output} Build parameters: - SOURCE_DATE_EPOCH: {sde} - Build args: {build_args} - Tag: {tag} - Platform: {platform} Podman-only arguments: - Buildkit arguments: {buildkit_args} Docker-only arguments: - Docker Buildx arguments: {buildx_args} """ def pretty_error(obj: dict, msg: str): raise Exception(f"{msg}\n{pprint.pprint(obj)}") def get_key(obj: dict, key: str) -> object: if key not in obj: pretty_error(f"Could not find key '{key}' in the dictionary:", obj) return obj[key] def run(cmd, dry=False, check=True): action = "Would have run" if dry else "Running" logger.debug(f"{action}: {shlex.join(cmd)}") if not dry: subprocess.run(cmd, check=check) def snip_contents(contents: str, num: int) -> str: contents = contents.replace("\n", "") if len(contents) > num: return ( contents[:num] + f" [... {len(contents) - num} characters omitted." + " Pass --show-contents to print them in their entirety]" ) return contents def detect_container_runtime() -> str: """Auto-detect the installed container runtime in the system.""" if shutil.which("docker"): return "docker" elif shutil.which("podman"): return "podman" else: return None def parse_runtime(args) -> str: if args.runtime is not None: return args.runtime runtime = os.environ.get(ENV_RUNTIME) if runtime is None: raise RuntimeError("No container runtime detected in your system") if runtime not in ("docker", "podman"): raise RuntimeError( "Only 'docker' or 'podman' container runtimes" " are currently supported by this script" ) def parse_use_cache(args) -> bool: if args.no_cache: return False return bool(int(os.environ.get(ENV_CACHE, "1"))) def parse_rootless(args, runtime: str) -> bool: rootless = args.rootless or bool(int(os.environ.get(ENV_ROOTLESS, "0"))) if runtime != "podman" and rootless: raise RuntimeError("Rootless mode is only supported with Podman runtime") return rootless def parse_sde(args) -> str: sde = os.environ.get(ENV_SDE, args.source_date_epoch) dt = os.environ.get(ENV_DATETIME, args.datetime) if (sde is not None and dt is not None) or (sde is None and dt is None): raise RuntimeError("You need to pass either a source date epoch or a datetime") if sde is not None: return str(sde) if dt is not None: d = datetime.datetime.fromisoformat(dt) # If the datetime is naive, assume its timezone is UTC. The check is taken from: # https://docs.python.org/3/library/datetime.html#determining-if-an-object-is-aware-or-naive if d.tzinfo is None or d.tzinfo.utcoffset(d) is None: d = d.replace(tzinfo=datetime.timezone.utc) return int(d.timestamp()) def parse_buildkit_image(args, rootless: bool, runtime: str) -> str: default = DEFAULT_BUILDKIT_IMAGE_ROOTLESS if rootless else DEFAULT_BUILDKIT_IMAGE img = args.buildkit_image or os.environ.get(ENV_BUILDKIT, default) if runtime == "podman" and not img.startswith("docker.io/"): img = "docker.io/" + img return img def parse_build_args(args) -> str: return args.build_arg or [] def parse_buildkit_args(args, runtime: str) -> str: if not args.buildkit_args: return [] if runtime != "podman": raise RuntimeError("Cannot specify Buildkit arguments using the Podman runtime") return shlex.split(args.buildkit_args) def parse_buildx_args(args, runtime: str) -> str: if not args.buildx_args: return [] if runtime != "docker": raise RuntimeError( "Cannot specify Docker Buildx arguments using the Podman runtime" ) return shlex.split(args.buildx_args) def parse_image_digest(args) -> str | None: if not args.expected_image_digest: return None parsed = args.expected_image_digest.split(":", 1) if len(parsed) == 1: return parsed[0] else: return parsed[1] def parse_path(path: str | None) -> str | None: return path and str(Path(path).absolute()) ########################## # OCI parsing logic # # Compatible with: # * https://github.com/opencontainers/image-spec/blob/main/image-layout.md def oci_print_info(parsed: dict, full: bool) -> None: print(f"The OCI tarball contains an index and {len(parsed) - 1} manifest(s):") print() print(f"Image digest: {parsed[1]['digest']}") for i, info in enumerate(parsed): print() if i == 0: print(f"Index ({info['path']}):") else: print(f"Manifest {i} ({info['path']}):") print(f" Digest: {info['digest']}") print(f" Media type: {info['media_type']}") print(f" Platform: {info['platform'] or '-'}") contents = info["contents"] if full else snip_contents(info["contents"], 600) print(f" Contents: {contents}") print() def oci_normalize_path(path): if path.startswith("sha256:"): hash_algo, checksum = path.split(":") path = f"blobs/{hash_algo}/{checksum}" return path def oci_get_file_from_tarball(tar: tarfile.TarFile, path: str) -> dict: return def oci_parse_manifest(tar: tarfile.TarFile, path: str, platform: dict | None) -> dict: path = oci_normalize_path(path) contents = tar.extractfile(path).read().decode() digest = "sha256:" + hashlib.sha256(contents.encode()).hexdigest() contents_dict = json.loads(contents) media_type = get_key(contents_dict, "mediaType") manifests = contents_dict.get("manifests", []) if platform: os = get_key(platform, "os") arch = get_key(platform, "architecture") platform = f"{os}/{arch}" return { "path": path, "contents": contents, "digest": digest, "media_type": media_type, "platform": platform, "manifests": manifests, } def oci_parse_manifests_dfs( tar: tarfile.TarFile, path: str, parsed: list, platform: dict | None = None ) -> None: info = oci_parse_manifest(tar, path, platform) parsed.append(info) for m in info["manifests"]: oci_parse_manifests_dfs(tar, m["digest"], parsed, m.get("platform")) def oci_parse_tarball(path: Path) -> dict: parsed = [] with tarfile.TarFile.open(path) as tar: oci_parse_manifests_dfs(tar, "index.json", parsed) return parsed ########################## # Image building logic def podman_build( context: str, dockerfile: str | None, tag: str | None, buildkit_image: str, sde: int, rootless: bool, use_cache: bool, output: Path, build_args: list, platform: str, buildkit_args: list, dry: bool, ): rootless_args = [] rootful_args = [] if rootless: rootless_args = [ "--userns", "keep-id:uid=1000,gid=1000", "--security-opt", "seccomp=unconfined", "--security-opt", "apparmor=unconfined", "-e", "BUILDKITD_FLAGS=--oci-worker-no-process-sandbox", ] else: rootful_args = ["--privileged"] dockerfile_args_podman = [] dockerfile_args_buildkit = [] if dockerfile: dockerfile_args_podman = ["-v", f"{dockerfile}:/tmp/Dockerfile"] dockerfile_args_buildkit = ["--local", "dockerfile=/tmp"] tag_args = f",name={tag}" if tag else "" cache_args = [] if use_cache: cache_args = [ "--export-cache", "type=local,mode=max,dest=/tmp/cache", "--import-cache", "type=local,src=/tmp/cache", ] _build_args = [] for arg in build_args: _build_args.append("--opt") _build_args.append(f"build-arg:{arg}") platform_args = ["--opt", f"platform={platform}"] if platform else [] cmd = [ "podman", "run", "-it", "--rm", "-v", "buildkit_cache:/tmp/cache", "-v", f"{output.parent}:/tmp/image", "-v", f"{context}:/tmp/work", "--entrypoint", "buildctl-daemonless.sh", *rootless_args, *rootful_args, *dockerfile_args_podman, buildkit_image, "build", "--frontend", "dockerfile.v0", "--local", "context=/tmp/work", "--opt", f"build-arg:SOURCE_DATE_EPOCH={sde}", *_build_args, "--output", f"type=oci,dest=/tmp/image/{output.name},rewrite-timestamp=true{tag_args}", *cache_args, *dockerfile_args_buildkit, *platform_args, *buildkit_args, ] run(cmd, dry) def docker_build( context: str, dockerfile: str | None, tag: str | None, buildkit_image: str, sde: int, use_cache: bool, output: Path, build_args: list, platform: str, buildx_args: list, dry: bool, ): builder_id = hashlib.sha256(buildkit_image.encode()).hexdigest() builder_name = f"repro-build-{builder_id}" tag_args = ["-t", tag] if tag else [] cache_args = [] if use_cache else ["--no-cache", "--pull"] cmd = [ "docker", "buildx", "create", "--name", builder_name, "--driver-opt", f"image={buildkit_image}", ] run(cmd, dry, check=False) dockerfile_args = ["-f", dockerfile] if dockerfile else [] _build_args = [] for arg in build_args: _build_args.append("--build-arg") _build_args.append(arg) platform_args = ["--platform", platform] if platform else [] cmd = [ "docker", "buildx", "--builder", builder_name, "build", "--build-arg", f"SOURCE_DATE_EPOCH={sde}", *_build_args, "--provenance", "false", "--output", f"type=oci,dest={output},rewrite-timestamp=true", *cache_args, *tag_args, *dockerfile_args, *platform_args, *buildx_args, context, ] run(cmd, dry) ########################## # Command logic def build(args): runtime = parse_runtime(args) use_cache = parse_use_cache(args) sde = parse_sde(args) rootless = parse_rootless(args, runtime) buildkit_image = parse_buildkit_image(args, rootless, runtime) build_args = parse_build_args(args) platform = args.platform buildkit_args = parse_buildkit_args(args, runtime) buildx_args = parse_buildx_args(args, runtime) tag = args.tag dockerfile = parse_path(args.file) output = Path(parse_path(args.output)) dry = args.dry context = parse_path(args.context) logger.info( MSG_BUILD_CTX.format( runtime=runtime, buildkit_image=buildkit_image, sde=sde, rootless=rootless, use_cache=use_cache, context=context, dockerfile=dockerfile or "(not provided)", tag=tag or "(not provided)", output=output, build_args=",".join(build_args) or "(not provided)", platform=platform or "(default)", buildkit_args=" ".join(buildkit_args) or "(not provided)", buildx_args=" ".join(buildx_args) or "(not provided)", ) ) try: if runtime == "docker": docker_build( context, dockerfile, tag, buildkit_image, sde, use_cache, output, build_args, platform, buildx_args, dry, ) else: podman_build( context, dockerfile, tag, buildkit_image, sde, rootless, use_cache, output, build_args, platform, buildkit_args, dry, ) except subprocess.CalledProcessError as e: logger.error(f"Failed with {e.returncode}") sys.exit(e.returncode) def analyze(args) -> None: expected_image_digest = parse_image_digest(args) tarball_path = Path(args.tarball) parsed = oci_parse_tarball(tarball_path) oci_print_info(parsed, args.show_contents) if expected_image_digest: cur_digest = parsed[1]["digest"].split(":")[1] if cur_digest != expected_image_digest: raise Exception( f"The image does not have the expected digest: {cur_digest} != {expected_image_digest}" ) print(f"✅ Image digest matches {expected_image_digest}") def define_build_cmd_args(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--runtime", choices=["docker", "podman"], default=detect_container_runtime(), help="The container runtime for building the image (default: %(default)s)", ) parser.add_argument( "--datetime", metavar="YYYY-MM-DD", default=None, help=( "Provide a date and (optionally) a time in ISO format, which will" " be used as the timestamp of the image layers" ), ) parser.add_argument( "--buildkit-image", metavar="NAME:TAG@DIGEST", default=None, help=( "The Buildkit container image which will be used for building the" " reproducible container image. Make sure to pass the '-rootless'" " variant if you are using rootless Podman" " (default: docker.io/moby/buildkit:v0.19.0)" ), ) parser.add_argument( "--source-date-epoch", "--sde", metavar="SECONDS", type=int, default=None, help="Provide a Unix timestamp for the image layers", ) parser.add_argument( "--no-cache", default=False, action="store_true", help="Do not use existing cached images for the container build. Build from the start with a new set of cached layers.", ) parser.add_argument( "--rootless", default=False, action="store_true", help="Run Buildkit in rootless mode (Podman only)", ) parser.add_argument( "-f", "--file", metavar="FILE", default=None, help="Pathname of a Dockerfile", ) parser.add_argument( "-o", "--output", metavar="FILE", default=Path.cwd() / "image.tar", help="Path to save OCI tarball (default: %(default)s)", ) parser.add_argument( "-t", "--tag", metavar="TAG", default=None, help="Tag the built image with the name %(metavar)s", ) parser.add_argument( "--build-arg", metavar="ARG=VALUE", action="append", default=None, help="Set build-time variables", ) parser.add_argument( "--platform", metavar="PLAT1,PLAT2", default=None, help="Set platform if server is multi-platform capable", ) parser.add_argument( "--buildkit-args", metavar="'ARG1 ARG2'", default=None, help="Extra arguments for Buildkit (Podman only)", ) parser.add_argument( "--buildx-args", metavar="'ARG1 ARG2'", default=None, help="Extra arguments for Docker Buildx (Docker only)", ) parser.add_argument( "--dry", default=False, action="store_true", help="Do not run any commands, just print what would happen", ) parser.add_argument( "context", metavar="CONTEXT", help="Path to the build context", ) def parse_args() -> dict: parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="command", help="Available commands") build_parser = subparsers.add_parser("build", help="Perform a build operation") build_parser.set_defaults(func=build) define_build_cmd_args(build_parser) analyze_parser = subparsers.add_parser("analyze", help="Analyze an OCI tarball") analyze_parser.set_defaults(func=analyze) analyze_parser.add_argument( "tarball", metavar="FILE", help="Path to OCI image in .tar format", ) analyze_parser.add_argument( "--expected-image-digest", metavar="DIGEST", default=None, help="The expected digest for the provided image", ) analyze_parser.add_argument( "--show-contents", default=False, action="store_true", help="Show full file contents", ) return parser.parse_args() def main() -> None: logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) args = parse_args() if not hasattr(args, "func"): args.func = build args.func(args) if __name__ == "__main__": sys.exit(main())