diff --git a/Dockerfile b/Dockerfile index c89c5a4..c35f1b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,102 +1 @@ -########################################### -# Build PyMuPDF - -FROM alpine:latest as pymupdf-build -ARG ARCH -ARG REQUIREMENTS_TXT - -# Install PyMuPDF via hash-checked requirements file -COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt - -# PyMuPDF provides non-arm musl wheels only. -# Only install build-dependencies if we are actually building the wheel -RUN case "$ARCH" in \ - "arm64") \ - # This is required for copying later, but is created only in the pre-built wheels - mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \ - && apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \ - *) \ - apk --no-cache add py3-pip ;; \ - esac -RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt - - -########################################### -# Download H2ORestart -FROM alpine:latest as h2orestart-dl -ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2 -RUN mkdir /libreoffice_ext && cd libreoffice_ext \ - && H2ORESTART_FILENAME=h2orestart.oxt \ - && H2ORESTART_VERSION="v0.6.6" \ - && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ - && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ - && install -dm777 "/usr/lib/libreoffice/share/extensions/" - - -########################################### -# Dangerzone image - -FROM alpine:latest AS dangerzone-image - -# Install dependencies -RUN apk --no-cache -U upgrade && \ - apk --no-cache add \ - libreoffice \ - openjdk8 \ - python3 \ - py3-magic \ - font-noto-cjk - -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf -COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs -COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext - -RUN install -dm777 "/usr/lib/libreoffice/share/extensions/" - -RUN mkdir -p /opt/dangerzone/dangerzone -RUN touch /opt/dangerzone/dangerzone/__init__.py -COPY conversion /opt/dangerzone/dangerzone/conversion - -# Add the unprivileged user. Set the UID/GID of the dangerzone user/group to -# 1000, since we will point to it from the OCI config. -# -# NOTE: A tmpfs will be mounted over /home/dangerzone directory, -# so nothing within it from the image will be persisted. -RUN addgroup -g 1000 dangerzone && \ - adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone - -########################################### -# gVisor wrapper image - -FROM alpine:latest - -RUN apk --no-cache -U upgrade && \ - apk --no-cache add python3 - -RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \ - wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \ - sha512sum -c runsc.sha512 && \ - rm -f runsc.sha512 && \ - chmod 555 runsc && \ - mv runsc /usr/bin/ - -# Add the unprivileged `dangerzone` user. -RUN addgroup dangerzone && \ - adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone - -# Switch to the dangerzone user for the rest of the script. -USER dangerzone - -# Copy the Dangerzone image, as created by the previous steps, into the home -# directory of the `dangerzone` user. -RUN mkdir /home/dangerzone/dangerzone-image -COPY --from=dangerzone-image / /home/dangerzone/dangerzone-image/rootfs - -# Create a directory that will be used by gVisor as the place where it will -# store the state of its containers. -RUN mkdir /home/dangerzone/.containers - -COPY gvisor_wrapper/entrypoint.py / - -ENTRYPOINT ["/entrypoint.py"] +FROM scratch diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 94f894d..64f080a 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -6,9 +6,10 @@ import shlex import shutil import subprocess from typing import List, Tuple +from pathlib import Path from ..document import Document -from ..util import get_resource_path, get_subprocess_startupinfo +from ..util import get_resource_path, get_resource_dir, get_subprocess_startupinfo from .base import IsolationProvider, terminate_process_group TIMEOUT_KILL = 5 # Timeout in seconds until the kill command returns. @@ -47,7 +48,8 @@ class ImageInstallationException(Exception): class Container(IsolationProvider): # Name of the dangerzone container - CONTAINER_NAME = "dangerzone.rocks/dangerzone" + IMAGE_NAME = "dangerzone.rocks/dangerzone" + TARBALL_NAME = "container-%(tag)s.tar.gz" @staticmethod def get_runtime_name() -> str: @@ -155,16 +157,59 @@ class Container(IsolationProvider): return security_args @staticmethod - def install() -> bool: - """ - Make sure the podman container is installed. Linux only. - """ - if Container.is_container_installed(): - return True + def get_image_tags() -> [str]: + """Get the tag of the loaded Dangerzone container image. - # Load the container into podman + If there's no such image, return None. + """ + names = subprocess.check_output( + [ + Container.get_runtime(), + "image", + "list", + "--format", + "{{index .Names 0 }}", + Container.IMAGE_NAME, + ], + text=True, + startupinfo=get_subprocess_startupinfo(), + ).strip().split() + + tags = [name.split(":")[1] for name in names] + return tags.pop("latest") + + @staticmethod + def delete_image_tag(tag: str) -> None: + name = Container.IMAGE_NAME + ":" + tag + log.warning(f"Deleting old container image: {name}") + try: + subprocess.check_output( + [Container.get_runtime(), "rmi", "--force", name], + startupinfo=get_subprocess_startupinfo(), + ) + except Exception: + log.warning(f"Couldn't delete old container image '{name}', so leaving it there") + + @staticmethod + def get_image_tarball() -> None | str: + """Get the name of the Dangerzone image tarball in the resource directory. + + If there's no such tarball, raise an exception. + """ + resource_dir = get_resource_dir() + tarball_glob = Container.TARBALL_NAME.format(tag="*") + tarballs = [f for f in resource_dir.glob(tarball_glob) if f.is_file()] + + if not tarballs: + raise FileNotFoundError(f"Did not find a Dangerzone image tarball in '{resource_dir}'") + elif len(tarballs) > 1: + log.warning(f"Found more than one Dangerzone image tarballs: {tarballs}. Picking the first one.") + + return tarballs[0] + + @staticmethod + def load_image_tarball(tarball: Path) -> None: log.info("Installing Dangerzone container image...") - p = subprocess.Popen( [Container.get_runtime(), "load"], stdin=subprocess.PIPE, @@ -172,8 +217,7 @@ class Container(IsolationProvider): ) chunk_size = 4 << 20 - compressed_container_path = get_resource_path("container.tar.gz") - with gzip.open(compressed_container_path) as f: + with gzip.open(tarball) as f: while True: chunk = f.read(chunk_size) if len(chunk) > 0: @@ -181,7 +225,7 @@ class Container(IsolationProvider): p.stdin.write(chunk) else: break - _, err = p.communicate() + out, err = p.communicate() if p.returncode < 0: if err: error = err.decode() @@ -191,10 +235,66 @@ class Container(IsolationProvider): f"Could not install container image: {error}" ) - if not Container.is_container_installed(raise_on_error=True): - return False + image_id = out.decode().strip() + log.info(f"Successfully installed container image with ID '{image_id}'") + return image_id + + @staticmethod + def tag_image(image_id: str, tag: str) -> None: + image_name = Container.IMAGE_NAME + ":" + tag + subprocess.check_output( + [ + Container.get_runtime(), + "tag", + image_id, + image_name, + ], + startupinfo=get_subprocess_startupinfo(), + ) + + log.info(f"Successfully tagged container image with ID '{image_id}' as {image_name}") + + @staticmethod + def is_tarball_loaded(tarball: Path, tags: [str]) -> None: + # Check if the image tarball has been loaded. + for tag in tags: + if tarball.name == Container.TARBALL_NAME.format(tag=tag): + return True + return False + + @staticmethod + def install() -> bool: + """Install the container image tarball, or verify that it's already installed. + + Perform the following actions: + 1. Get the images named `dangerzone.rocks/dangerzone`, and their tags, if any. + 2. Get the name of the container tarball in Dangerzone's `share/` directory. + 3. If there's no previous Dangerzone image, install the container tarball. + 4. Else, check if the image tag matches the name in the container tarball. If + yes, skip the installation. Else, load the container image tarball and delete + the previous ones. + """ + old_tags = Container.get_image_tags() + tarball = Container.get_image_tarball() + + if Container.is_tarball_loaded(tarball, old_tags): + return + + # Load the image tarball into the container runtime. + image_id = Container.load_image_tarball(tarball) + Container.tag_image(image_id, "latest") + + # Check if the image tarball has been loaded. + new_tags = Container.get_image_tags() + if not Container.is_tarball_loaded(tarball, new_tags): + raise ImageNotPresentException( + "Image is not listed after installation. Bailing out." + ) + + # Prune older container images. + for tag in old_tags: + Container.delete_image_tag(tag) - log.info("Container image installed") return True @staticmethod @@ -213,58 +313,6 @@ class Container(IsolationProvider): raise NotAvailableContainerTechException(runtime_name, stderr.decode()) return True - @staticmethod - def is_container_installed(raise_on_error: bool = False) -> bool: - """ - See if the container is installed. - """ - # Get the image id - with open(get_resource_path("image-id.txt")) as f: - expected_image_ids = f.read().strip().split() - - # See if this image is already installed - installed = False - found_image_id = subprocess.check_output( - [ - Container.get_runtime(), - "image", - "list", - "--format", - "{{.ID}}", - Container.CONTAINER_NAME, - ], - text=True, - startupinfo=get_subprocess_startupinfo(), - ) - found_image_id = found_image_id.strip() - - if found_image_id in expected_image_ids: - installed = True - elif found_image_id == "": - if raise_on_error: - raise ImageNotPresentException( - "Image is not listed after installation. Bailing out." - ) - else: - msg = ( - f"{Container.CONTAINER_NAME} images found, but IDs do not match." - f" Found: {found_image_id}, Expected: {','.join(expected_image_ids)}" - ) - if raise_on_error: - raise ImageNotPresentException(msg) - log.info(msg) - log.info("Deleting old dangerzone container image") - - try: - subprocess.check_output( - [Container.get_runtime(), "rmi", "--force", found_image_id], - startupinfo=get_subprocess_startupinfo(), - ) - except Exception: - log.warning("Couldn't delete old container image, so leaving it there") - - return installed - def doc_to_pixels_container_name(self, document: Document) -> str: """Unique container name for the doc-to-pixels phase.""" return f"dangerzone-doc-to-pixels-{document.id}" diff --git a/dangerzone/util.py b/dangerzone/util.py index a5063a0..4bbde7e 100644 --- a/dangerzone/util.py +++ b/dangerzone/util.py @@ -12,7 +12,7 @@ def get_config_dir() -> str: return appdirs.user_config_dir("dangerzone") -def get_resource_path(filename: str) -> str: +def get_resource_dir() -> pathlib.Path: if getattr(sys, "dangerzone_dev", False): # Look for resources directory relative to python file project_root = pathlib.Path(__file__).parent.parent @@ -30,8 +30,11 @@ def get_resource_path(filename: str) -> str: prefix = dz_install_path / "share" else: raise NotImplementedError(f"Unsupported system {platform.system()}") - resource_path = prefix / filename - return str(resource_path) + return prefix + + +def get_resource_path(filename: str) -> str: + return str(get_resource_dir()/ filename) def get_tessdata_dir() -> pathlib.Path: diff --git a/dodo.py b/dodo.py index 3470f8e..aa3bacf 100644 --- a/dodo.py +++ b/dodo.py @@ -14,8 +14,10 @@ from doit.action import CmdAction # CONTAINER_RUNTIME = "podman" CONTAINER_RUNTIME = "podman" +ARCH = "i686" # FIXME VERSION = open("share/version.txt").read().strip() -RELEASE_DIR = Path.home() / "release" / VERSION +# FIXME: Make this user-selectable with `get_var()` +RELEASE_DIR = Path.home() / "dz_release_area" / VERSION FEDORA_VERSIONS = ["39", "40", "41"] DEBIAN_VERSIONS = ["bullseye", "focal", "jammy", "mantic", "noble", "trixie"] @@ -63,6 +65,16 @@ def cmd_build_linux_pkg(distro, version, cwd, qubes=False): return CmdAction(cmd, cwd=cwd) +def task_clean_container_runtime(): + """Clean the storage space of the container runtime.""" + return { + "actions": None, + "clean": [ + [CONTAINER_RUNTIME, "system", "prune", "-f"], + ], + } + + def task_check_python(): """Check that the latest supported Python version is installed (WIP). @@ -78,7 +90,7 @@ def task_check_python(): } -def task_container_runtime(): +def task_check_container_runtime(): """Test that the container runtime is ready.""" return { "actions": [ @@ -88,13 +100,13 @@ def task_container_runtime(): } -def task_system_checks(): +def task_check_system(): """Common status checks for a system.""" return { "actions": None, "task_dep": [ "check_python", - "container_runtime", + "check_container_runtime", ], } @@ -113,7 +125,7 @@ def task_macos_check_docker_containerd(): """Test that Docker uses the containard image store.""" def check_containerd_store(): cmd = ["docker", "info", "-f", "{{ .DriverStatus }}"] - driver = subprocess.check_output(cmd).strip() + driver = subprocess.check_output(cmd, text=True).strip() if driver != "[[driver-type io.containerd.snapshotter.v1]]": raise RuntimeError( f"Probing the Docker image store with {cmd} returned {driver}." @@ -131,20 +143,34 @@ def task_macos_check_docker_containerd(): } -def task_macos_system_checks(): +def task_macos_check_system(): """Run macOS specific system checks, as well as the generic ones.""" return { "actions": None, "task_dep": [ - "system_checks", + "check_system", "macos_check_cert", "macos_check_docker_containerd", ], } -def task_tessdata(): - """Download Tesseract data""" +def task_init_release_dir(): + """Create a directory for release artifacts.""" + def create_release_dir(): + RELEASE_DIR.mkdir(parents=True, exist_ok=True) + (RELEASE_DIR / "assets").mkdir(exist_ok=True) + (RELEASE_DIR / "tmp").mkdir(exist_ok=True) + + return { + "actions": [create_release_dir], + "targets": [RELEASE_DIR, RELEASE_DIR / "github", RELEASE_DIR / "tmp"], + "clean": True, + } + + +def task_download_tessdata(): + """Download the Tesseract data using ./install/common/download-tessdata.py""" tessdata_dir = Path("share") / "tessdata" langs = json.loads(open(tessdata_dir.parent / "ocr-languages.json").read()).values() targets = [tessdata_dir / f"{lang}.traineddata" for lang in langs] @@ -160,77 +186,73 @@ def task_tessdata(): } -def task_build_container(): +def task_build_image(): + """Build the container image using ./install/common/build-image.py""" return { - "actions": ["python install/common/build-image.py --use-cache=%(use_cache)s"], + "actions": [ + "python install/common/build-image.py --use-cache=%(use_cache)s", + ], "params": [ { "name": "use_cache", "long": "use-cache", + "help": ( + "Whether to use cached results or not. For reproducibility reasons," + " it's best to leave it to false" + ), "default": False, - } + }, ], "file_dep": [ "Dockerfile", "poetry.lock", *list_files("dangerzone/conversion"), "dangerzone/gvisor_wrapper/entrypoint.py", + "install/common/build-image.py", ], "targets": ["share/container.tar.gz", "share/image-id.txt"], - "task_dep": ["container_runtime"], + "task_dep": ["check_container_runtime"], "clean": True, } def task_poetry_install(): + """Setup the Poetry environment""" return { "actions": ["poetry install --sync"], } -def task_app(): +def task_macos_build_app(): + """Build the macOS app bundle for Dangerzone.""" + return { "actions": [["poetry", "run", "install/macos/build-app.py"]], "file_dep": [ *list_files("share"), *list_files("dangerzone"), + "share/container.tar.gz", + "share/image-id.txt", ], "task_dep": ["poetry_install"], "targets": ["dist/Dangerzone.app"], - "clean": True, + "clean": ["rm -rf dist/Dangerzone.app"], } -def task_codesign(): +def task_macos_codesign(): return { "actions": [ - ["poetry", "run", "install/macos/build-app.py"], + ["poetry", "run", "install/macos/build-app.py", "--only-codesign"], [ - "xcrun", - "notarytool", - "submit", - "--wait", - "--apple-id", - "", - "--keychain-profile", - "dz-notarytool-release-key", - "dist/Dangerzone.dmg", + "xcrun notarytool submit --wait --apple-id %(apple_id)s" + " --keychain-profile dz-notarytool-release-key dist/Dangerzone.dmg", ], ], + "params": [PARAM_APPLE_ID], "file_dep": ["dist/Dangerzone.app"], - "targets": ["dist/Dangerzone.dmg"] - } - - -def task_init_release_dir(): - def create_release_dir(): - RELEASE_DIR.mkdir(parents=True, exist_ok=True) - (RELEASE_DIR / "github").mkdir(exist_ok=True) - (RELEASE_DIR / "tmp").mkdir(exist_ok=True) - - return { - "actions": [create_release_dir], - "targets": [RELEASE_DIR, RELEASE_DIR / "github", RELEASE_DIR / "tmp"], + "targets": ["dist/Dangerzone.dmg"], + "clean": True, } @@ -271,7 +293,8 @@ def task_debian_deb(): "task_dep": [ "debian_env", ], - "targets": [deb_dst] + "targets": [deb_dst], + "clean": True, } @@ -322,7 +345,8 @@ def task_fedora_rpm(): "task_dep": [ f"fedora_env:{version}", ], - "targets": rpm_dst + "targets": rpm_dst, + "clean": True, } @@ -360,5 +384,6 @@ def task_apt_tools_prod_prep(apt_tools_prod_dir): return { "actions": [copy_files], "file_dep": [src], - "targets": [bookworm_deb, *other_debs] + "targets": [bookworm_deb, *other_debs], + "clean": True, } diff --git a/pyproject.toml b/pyproject.toml index a87108e..c373931 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,8 +67,19 @@ skip_gitignore = true # This is necessary due to https://github.com/PyCQA/isort/issues/1835 follow_links = false +[tool.doit.commands.clean] +# XXX: Change this to false if you REALLY want to clean a task's output. Else, +# the `doit clean` comamnd will print the commands that would run instead. +dryrun = true + +[tool.doit.tasks.macos_check_cert] +apple_id = "fpf@example.com" + +[tool.doit.tasks.macos_codesign] +apple_id = "fpf@example.com" + [tool.doit.tasks.build_container] -use_cache = true +use_cache = false [build-system] requires = ["poetry-core>=1.2.0"]