WIP: Fix image installation

This commit is contained in:
Alex Pyrgiotis 2024-11-27 12:08:53 +02:00
parent f4c2c87072
commit 275c446316
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
5 changed files with 202 additions and 216 deletions

View file

@ -1,102 +1 @@
###########################################
# Build PyMuPDF
FROM alpine:latest as pymupdf-build
ARG ARCH
ARG REQUIREMENTS_TXT
# Install PyMuPDF via hash-checked requirements file
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt
# PyMuPDF provides non-arm musl wheels only.
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt
###########################################
# Download H2ORestart
FROM alpine:latest as h2orestart-dl
ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& H2ORESTART_VERSION="v0.6.6" \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"
###########################################
# Dangerzone image
FROM alpine:latest AS dangerzone-image
# Install dependencies
RUN apk --no-cache -U upgrade && \
apk --no-cache add \
libreoffice \
openjdk8 \
python3 \
py3-magic \
font-noto-cjk
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext
RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
COPY conversion /opt/dangerzone/dangerzone/conversion
# Add the unprivileged user. Set the UID/GID of the dangerzone user/group to
# 1000, since we will point to it from the OCI config.
#
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN addgroup -g 1000 dangerzone && \
adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
###########################################
# gVisor wrapper image
FROM alpine:latest
RUN apk --no-cache -U upgrade && \
apk --no-cache add python3
RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
sha512sum -c runsc.sha512 && \
rm -f runsc.sha512 && \
chmod 555 runsc && \
mv runsc /usr/bin/
# Add the unprivileged `dangerzone` user.
RUN addgroup dangerzone && \
adduser -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone
# Switch to the dangerzone user for the rest of the script.
USER dangerzone
# Copy the Dangerzone image, as created by the previous steps, into the home
# directory of the `dangerzone` user.
RUN mkdir /home/dangerzone/dangerzone-image
COPY --from=dangerzone-image / /home/dangerzone/dangerzone-image/rootfs
# Create a directory that will be used by gVisor as the place where it will
# store the state of its containers.
RUN mkdir /home/dangerzone/.containers
COPY gvisor_wrapper/entrypoint.py /
ENTRYPOINT ["/entrypoint.py"]
FROM scratch

View file

@ -6,9 +6,10 @@ import shlex
import shutil
import subprocess
from typing import List, Tuple
from pathlib import Path
from ..document import Document
from ..util import get_resource_path, get_subprocess_startupinfo
from ..util import get_resource_path, get_resource_dir, get_subprocess_startupinfo
from .base import IsolationProvider, terminate_process_group
TIMEOUT_KILL = 5 # Timeout in seconds until the kill command returns.
@ -47,7 +48,8 @@ class ImageInstallationException(Exception):
class Container(IsolationProvider):
# Name of the dangerzone container
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
IMAGE_NAME = "dangerzone.rocks/dangerzone"
TARBALL_NAME = "container-%(tag)s.tar.gz"
@staticmethod
def get_runtime_name() -> str:
@ -155,16 +157,59 @@ class Container(IsolationProvider):
return security_args
@staticmethod
def install() -> bool:
"""
Make sure the podman container is installed. Linux only.
"""
if Container.is_container_installed():
return True
def get_image_tags() -> [str]:
"""Get the tag of the loaded Dangerzone container image.
# Load the container into podman
If there's no such image, return None.
"""
names = subprocess.check_output(
[
Container.get_runtime(),
"image",
"list",
"--format",
"{{index .Names 0 }}",
Container.IMAGE_NAME,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
).strip().split()
tags = [name.split(":")[1] for name in names]
return tags.pop("latest")
@staticmethod
def delete_image_tag(tag: str) -> None:
name = Container.IMAGE_NAME + ":" + tag
log.warning(f"Deleting old container image: {name}")
try:
subprocess.check_output(
[Container.get_runtime(), "rmi", "--force", name],
startupinfo=get_subprocess_startupinfo(),
)
except Exception:
log.warning(f"Couldn't delete old container image '{name}', so leaving it there")
@staticmethod
def get_image_tarball() -> None | str:
"""Get the name of the Dangerzone image tarball in the resource directory.
If there's no such tarball, raise an exception.
"""
resource_dir = get_resource_dir()
tarball_glob = Container.TARBALL_NAME.format(tag="*")
tarballs = [f for f in resource_dir.glob(tarball_glob) if f.is_file()]
if not tarballs:
raise FileNotFoundError(f"Did not find a Dangerzone image tarball in '{resource_dir}'")
elif len(tarballs) > 1:
log.warning(f"Found more than one Dangerzone image tarballs: {tarballs}. Picking the first one.")
return tarballs[0]
@staticmethod
def load_image_tarball(tarball: Path) -> None:
log.info("Installing Dangerzone container image...")
p = subprocess.Popen(
[Container.get_runtime(), "load"],
stdin=subprocess.PIPE,
@ -172,8 +217,7 @@ class Container(IsolationProvider):
)
chunk_size = 4 << 20
compressed_container_path = get_resource_path("container.tar.gz")
with gzip.open(compressed_container_path) as f:
with gzip.open(tarball) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) > 0:
@ -181,7 +225,7 @@ class Container(IsolationProvider):
p.stdin.write(chunk)
else:
break
_, err = p.communicate()
out, err = p.communicate()
if p.returncode < 0:
if err:
error = err.decode()
@ -191,10 +235,66 @@ class Container(IsolationProvider):
f"Could not install container image: {error}"
)
if not Container.is_container_installed(raise_on_error=True):
return False
image_id = out.decode().strip()
log.info(f"Successfully installed container image with ID '{image_id}'")
return image_id
@staticmethod
def tag_image(image_id: str, tag: str) -> None:
image_name = Container.IMAGE_NAME + ":" + tag
subprocess.check_output(
[
Container.get_runtime(),
"tag",
image_id,
image_name,
],
startupinfo=get_subprocess_startupinfo(),
)
log.info(f"Successfully tagged container image with ID '{image_id}' as {image_name}")
@staticmethod
def is_tarball_loaded(tarball: Path, tags: [str]) -> None:
# Check if the image tarball has been loaded.
for tag in tags:
if tarball.name == Container.TARBALL_NAME.format(tag=tag):
return True
return False
@staticmethod
def install() -> bool:
"""Install the container image tarball, or verify that it's already installed.
Perform the following actions:
1. Get the images named `dangerzone.rocks/dangerzone`, and their tags, if any.
2. Get the name of the container tarball in Dangerzone's `share/` directory.
3. If there's no previous Dangerzone image, install the container tarball.
4. Else, check if the image tag matches the name in the container tarball. If
yes, skip the installation. Else, load the container image tarball and delete
the previous ones.
"""
old_tags = Container.get_image_tags()
tarball = Container.get_image_tarball()
if Container.is_tarball_loaded(tarball, old_tags):
return
# Load the image tarball into the container runtime.
image_id = Container.load_image_tarball(tarball)
Container.tag_image(image_id, "latest")
# Check if the image tarball has been loaded.
new_tags = Container.get_image_tags()
if not Container.is_tarball_loaded(tarball, new_tags):
raise ImageNotPresentException(
"Image is not listed after installation. Bailing out."
)
# Prune older container images.
for tag in old_tags:
Container.delete_image_tag(tag)
log.info("Container image installed")
return True
@staticmethod
@ -213,58 +313,6 @@ class Container(IsolationProvider):
raise NotAvailableContainerTechException(runtime_name, stderr.decode())
return True
@staticmethod
def is_container_installed(raise_on_error: bool = False) -> bool:
"""
See if the container is installed.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_ids = f.read().strip().split()
# See if this image is already installed
installed = False
found_image_id = subprocess.check_output(
[
Container.get_runtime(),
"image",
"list",
"--format",
"{{.ID}}",
Container.CONTAINER_NAME,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
found_image_id = found_image_id.strip()
if found_image_id in expected_image_ids:
installed = True
elif found_image_id == "":
if raise_on_error:
raise ImageNotPresentException(
"Image is not listed after installation. Bailing out."
)
else:
msg = (
f"{Container.CONTAINER_NAME} images found, but IDs do not match."
f" Found: {found_image_id}, Expected: {','.join(expected_image_ids)}"
)
if raise_on_error:
raise ImageNotPresentException(msg)
log.info(msg)
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[Container.get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except Exception:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def doc_to_pixels_container_name(self, document: Document) -> str:
"""Unique container name for the doc-to-pixels phase."""
return f"dangerzone-doc-to-pixels-{document.id}"

View file

@ -12,7 +12,7 @@ def get_config_dir() -> str:
return appdirs.user_config_dir("dangerzone")
def get_resource_path(filename: str) -> str:
def get_resource_dir() -> pathlib.Path:
if getattr(sys, "dangerzone_dev", False):
# Look for resources directory relative to python file
project_root = pathlib.Path(__file__).parent.parent
@ -30,8 +30,11 @@ def get_resource_path(filename: str) -> str:
prefix = dz_install_path / "share"
else:
raise NotImplementedError(f"Unsupported system {platform.system()}")
resource_path = prefix / filename
return str(resource_path)
return prefix
def get_resource_path(filename: str) -> str:
return str(get_resource_dir()/ filename)
def get_tessdata_dir() -> pathlib.Path:

109
dodo.py
View file

@ -14,8 +14,10 @@ from doit.action import CmdAction
# CONTAINER_RUNTIME = "podman"
CONTAINER_RUNTIME = "podman"
ARCH = "i686" # FIXME
VERSION = open("share/version.txt").read().strip()
RELEASE_DIR = Path.home() / "release" / VERSION
# FIXME: Make this user-selectable with `get_var()`
RELEASE_DIR = Path.home() / "dz_release_area" / VERSION
FEDORA_VERSIONS = ["39", "40", "41"]
DEBIAN_VERSIONS = ["bullseye", "focal", "jammy", "mantic", "noble", "trixie"]
@ -63,6 +65,16 @@ def cmd_build_linux_pkg(distro, version, cwd, qubes=False):
return CmdAction(cmd, cwd=cwd)
def task_clean_container_runtime():
"""Clean the storage space of the container runtime."""
return {
"actions": None,
"clean": [
[CONTAINER_RUNTIME, "system", "prune", "-f"],
],
}
def task_check_python():
"""Check that the latest supported Python version is installed (WIP).
@ -78,7 +90,7 @@ def task_check_python():
}
def task_container_runtime():
def task_check_container_runtime():
"""Test that the container runtime is ready."""
return {
"actions": [
@ -88,13 +100,13 @@ def task_container_runtime():
}
def task_system_checks():
def task_check_system():
"""Common status checks for a system."""
return {
"actions": None,
"task_dep": [
"check_python",
"container_runtime",
"check_container_runtime",
],
}
@ -113,7 +125,7 @@ def task_macos_check_docker_containerd():
"""Test that Docker uses the containard image store."""
def check_containerd_store():
cmd = ["docker", "info", "-f", "{{ .DriverStatus }}"]
driver = subprocess.check_output(cmd).strip()
driver = subprocess.check_output(cmd, text=True).strip()
if driver != "[[driver-type io.containerd.snapshotter.v1]]":
raise RuntimeError(
f"Probing the Docker image store with {cmd} returned {driver}."
@ -131,20 +143,34 @@ def task_macos_check_docker_containerd():
}
def task_macos_system_checks():
def task_macos_check_system():
"""Run macOS specific system checks, as well as the generic ones."""
return {
"actions": None,
"task_dep": [
"system_checks",
"check_system",
"macos_check_cert",
"macos_check_docker_containerd",
],
}
def task_tessdata():
"""Download Tesseract data"""
def task_init_release_dir():
"""Create a directory for release artifacts."""
def create_release_dir():
RELEASE_DIR.mkdir(parents=True, exist_ok=True)
(RELEASE_DIR / "assets").mkdir(exist_ok=True)
(RELEASE_DIR / "tmp").mkdir(exist_ok=True)
return {
"actions": [create_release_dir],
"targets": [RELEASE_DIR, RELEASE_DIR / "github", RELEASE_DIR / "tmp"],
"clean": True,
}
def task_download_tessdata():
"""Download the Tesseract data using ./install/common/download-tessdata.py"""
tessdata_dir = Path("share") / "tessdata"
langs = json.loads(open(tessdata_dir.parent / "ocr-languages.json").read()).values()
targets = [tessdata_dir / f"{lang}.traineddata" for lang in langs]
@ -160,77 +186,73 @@ def task_tessdata():
}
def task_build_container():
def task_build_image():
"""Build the container image using ./install/common/build-image.py"""
return {
"actions": ["python install/common/build-image.py --use-cache=%(use_cache)s"],
"actions": [
"python install/common/build-image.py --use-cache=%(use_cache)s",
],
"params": [
{
"name": "use_cache",
"long": "use-cache",
"help": (
"Whether to use cached results or not. For reproducibility reasons,"
" it's best to leave it to false"
),
"default": False,
}
},
],
"file_dep": [
"Dockerfile",
"poetry.lock",
*list_files("dangerzone/conversion"),
"dangerzone/gvisor_wrapper/entrypoint.py",
"install/common/build-image.py",
],
"targets": ["share/container.tar.gz", "share/image-id.txt"],
"task_dep": ["container_runtime"],
"task_dep": ["check_container_runtime"],
"clean": True,
}
def task_poetry_install():
"""Setup the Poetry environment"""
return {
"actions": ["poetry install --sync"],
}
def task_app():
def task_macos_build_app():
"""Build the macOS app bundle for Dangerzone."""
return {
"actions": [["poetry", "run", "install/macos/build-app.py"]],
"file_dep": [
*list_files("share"),
*list_files("dangerzone"),
"share/container.tar.gz",
"share/image-id.txt",
],
"task_dep": ["poetry_install"],
"targets": ["dist/Dangerzone.app"],
"clean": True,
"clean": ["rm -rf dist/Dangerzone.app"],
}
def task_codesign():
def task_macos_codesign():
return {
"actions": [
["poetry", "run", "install/macos/build-app.py"],
["poetry", "run", "install/macos/build-app.py", "--only-codesign"],
[
"xcrun",
"notarytool",
"submit",
"--wait",
"--apple-id",
"<email>",
"--keychain-profile",
"dz-notarytool-release-key",
"dist/Dangerzone.dmg",
"xcrun notarytool submit --wait --apple-id %(apple_id)s"
" --keychain-profile dz-notarytool-release-key dist/Dangerzone.dmg",
],
],
"params": [PARAM_APPLE_ID],
"file_dep": ["dist/Dangerzone.app"],
"targets": ["dist/Dangerzone.dmg"]
}
def task_init_release_dir():
def create_release_dir():
RELEASE_DIR.mkdir(parents=True, exist_ok=True)
(RELEASE_DIR / "github").mkdir(exist_ok=True)
(RELEASE_DIR / "tmp").mkdir(exist_ok=True)
return {
"actions": [create_release_dir],
"targets": [RELEASE_DIR, RELEASE_DIR / "github", RELEASE_DIR / "tmp"],
"targets": ["dist/Dangerzone.dmg"],
"clean": True,
}
@ -271,7 +293,8 @@ def task_debian_deb():
"task_dep": [
"debian_env",
],
"targets": [deb_dst]
"targets": [deb_dst],
"clean": True,
}
@ -322,7 +345,8 @@ def task_fedora_rpm():
"task_dep": [
f"fedora_env:{version}",
],
"targets": rpm_dst
"targets": rpm_dst,
"clean": True,
}
@ -360,5 +384,6 @@ def task_apt_tools_prod_prep(apt_tools_prod_dir):
return {
"actions": [copy_files],
"file_dep": [src],
"targets": [bookworm_deb, *other_debs]
"targets": [bookworm_deb, *other_debs],
"clean": True,
}

View file

@ -67,8 +67,19 @@ skip_gitignore = true
# This is necessary due to https://github.com/PyCQA/isort/issues/1835
follow_links = false
[tool.doit.commands.clean]
# XXX: Change this to false if you REALLY want to clean a task's output. Else,
# the `doit clean` comamnd will print the commands that would run instead.
dryrun = true
[tool.doit.tasks.macos_check_cert]
apple_id = "fpf@example.com"
[tool.doit.tasks.macos_codesign]
apple_id = "fpf@example.com"
[tool.doit.tasks.build_container]
use_cache = true
use_cache = false
[build-system]
requires = ["poetry-core>=1.2.0"]