From ece342bd73ea21580be82951a0a53d5dc217325f Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Tue, 17 Dec 2024 20:17:01 +0200 Subject: [PATCH] WIP: Fixes for H2ORestart --- .github/workflows/build.yml | 2 +- .github/workflows/ci.yml | 8 +-- Dockerfile | 33 ++++++---- dodo.py | 3 +- install/common/build-image.py | 121 +++++++++++++--------------------- 5 files changed, 69 insertions(+), 98 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 35f9597..60fd8c6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -85,7 +85,7 @@ jobs: id: cache-container-image uses: actions/cache@v4 with: - key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/common.py', 'dangerzone/conversion/doc_to_pixels.py', 'dangerzone/conversion/pixels_to_pdf.py', 'poetry.lock', 'gvisor_wrapper/entrypoint.py') }} + key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'oci/*', 'install/common/build-image.py') }} path: | share/container.tar.gz share/image-id.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc2bb27..d97308e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,7 @@ jobs: id: cache-container-image uses: actions/cache@v4 with: - key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/common.py', 'dangerzone/conversion/doc_to_pixels.py', 'dangerzone/conversion/pixels_to_pdf.py', 'poetry.lock', 'gvisor_wrapper/entrypoint.py') }} + key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'oci/*', 'install/common/build-image.py') }} path: |- share/container.tar.gz share/image-id.txt @@ -227,7 +227,7 @@ jobs: - name: Restore container cache uses: actions/cache/restore@v4 with: - key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/common.py', 'dangerzone/conversion/doc_to_pixels.py', 'dangerzone/conversion/pixels_to_pdf.py', 'poetry.lock', 'gvisor_wrapper/entrypoint.py') }} + key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'oci/*', 'install/common/build-image.py') }} path: |- share/container.tar.gz share/image-id.txt @@ -334,7 +334,7 @@ jobs: - name: Restore container image uses: actions/cache/restore@v4 with: - key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/common.py', 'dangerzone/conversion/doc_to_pixels.py', 'dangerzone/conversion/pixels_to_pdf.py', 'poetry.lock', 'gvisor_wrapper/entrypoint.py') }} + key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'oci/*', 'install/common/build-image.py') }} path: |- share/container.tar.gz share/image-id.txt @@ -429,7 +429,7 @@ jobs: - name: Restore container image uses: actions/cache/restore@v4 with: - key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/common.py', 'dangerzone/conversion/doc_to_pixels.py', 'dangerzone/conversion/pixels_to_pdf.py', 'poetry.lock', 'gvisor_wrapper/entrypoint.py') }} + key: v3-${{ steps.date.outputs.date }}-${{ hashFiles('Dockerfile', 'dangerzone/conversion/*.py', 'oci/*', 'install/common/build-image.py') }} path: |- share/container.tar.gz share/image-id.txt diff --git a/Dockerfile b/Dockerfile index 0b1ca9d..5875c0f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,12 +11,26 @@ RUN \ --mount=type=bind,source=./oci/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ repro-sources-list.sh && \ apt-get update && \ - apt-get install -y --no-install-recommends python3-fitz libreoffice-nogui libreoffice-java-common python3 python3-magic default-jdk-headless fonts-noto-cjk && \ + apt-get install -y --no-install-recommends \ + python3-fitz libreoffice-nogui libreoffice-java-common python3 \ + python3-magic default-jdk-headless fonts-noto-cjk unzip wget && \ : "Clean up for improving reproducibility (optional)" && \ + apt-get autoremove -y && \ rm -rf /var/cache/fontconfig/ && \ rm -rf /etc/ssl/certs/java/cacerts && \ rm -rf /var/log/* /var/cache/ldconfig/aux-cache +# Download H2ORestart from GitHub using a pinned version and hash. Note that +# it's available in Debian repos, but not Bookworm just yet. +ARG H2ORESTART_CHECKSUM=8a5be77359695c14faaf33891d3eca6c9d73c1224599aab50a9d2ccc04640580 +ARG H2ORESTART_VERSION=v0.6.8 + +RUN mkdir /libreoffice_ext && cd libreoffice_ext \ + && H2ORESTART_FILENAME=h2orestart.oxt \ + && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ + && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ + && install -dm777 "/usr/lib/libreoffice/share/extensions/" + RUN mkdir -p /opt/dangerzone/dangerzone && \ touch /opt/dangerzone/dangerzone/__init__.py && \ addgroup --gid 1000 dangerzone && \ @@ -36,30 +50,21 @@ RUN \ --mount=type=cache,target=/var/cache/apt,sharing=locked \ --mount=type=cache,target=/var/lib/apt,sharing=locked \ --mount=type=bind,source=./oci/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \ - --mount=type=bind,source=./oci/gvisor.key,target=/tmp/gvisor.key + --mount=type=bind,source=./oci/gvisor.key,target=/tmp/gvisor.key \ repro-sources-list.sh && \ : "Setup APT to install gVisor from its separate APT repo" && \ apt-get update && \ apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg && \ gpg -o /usr/share/keyrings/gvisor-archive-keyring.gpg --dearmor /tmp/gvisor.key && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases ${GVISOR_DATE} main" > /etc/apt/sources.list.d/gvisor.list + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases ${GVISOR_DATE} main" > /etc/apt/sources.list.d/gvisor.list && \ : "Install Pthon3 and gVisor" && \ apt-get update && \ apt-get install -y --no-install-recommends python3 runsc && \ : "Clean up for improving reproducibility (optional)" && \ + apt-get remove -y apt-transport-https ca-certificates gnupg && \ + apt-get autoremove -y && \ rm -rf /var/log/* /var/cache/ldconfig/aux-cache -# Download H2ORestart from GitHub using a pinned version and hash. Note that -# it's available in Debian repos, but not Bookworm just yet. -ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2 -ARG H2ORESTART_VERSION=v0.6.7 - -RUN mkdir /libreoffice_ext && cd libreoffice_ext \ - && H2ORESTART_FILENAME=h2orestart.oxt \ - && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ - && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ - && install -dm777 "/usr/lib/libreoffice/share/extensions/" - RUN addgroup --gid 1000 dangerzone && \ adduser --uid 1000 --ingroup dangerzone --shell /bin/true --home /home/dangerzone dangerzone diff --git a/dodo.py b/dodo.py index 2dd3d21..a05117a 100644 --- a/dodo.py +++ b/dodo.py @@ -63,9 +63,8 @@ TESSDATA_TARGETS = list_language_data() IMAGE_DEPS = [ "Dockerfile", - "poetry.lock", *list_files("dangerzone/conversion"), - "dangerzone/gvisor_wrapper/entrypoint.py", + *list_files("dangerzone/oci"), "install/common/build-image.py", ] IMAGE_TARGETS = ["share/container.tar.gz", "share/image-id.txt"] diff --git a/install/common/build-image.py b/install/common/build-image.py index 6d99877..7c836e7 100644 --- a/install/common/build-image.py +++ b/install/common/build-image.py @@ -1,6 +1,5 @@ import argparse import gzip -import os import platform import secrets import subprocess @@ -9,7 +8,6 @@ from pathlib import Path BUILD_CONTEXT = "dangerzone/" IMAGE_NAME = "dangerzone.rocks/dangerzone" -REQUIREMENTS_TXT = "container-pip-requirements.txt" if platform.system() in ["Darwin", "Windows"]: CONTAINER_RUNTIME = "docker" elif platform.system() == "Linux": @@ -85,90 +83,59 @@ def main(): f.write(tag) print("Exporting container pip dependencies") - with ContainerPipDependencies(): - if not args.use_cache: - print("Pulling base image") - subprocess.run( - [ - args.runtime, - "pull", - "alpine:latest", - ], - check=True, - ) - - # Build the container image, and tag it with the calculated tag - print("Building container image") - cache_args = [] if args.use_cache else ["--no-cache"] + if not args.use_cache: + print("Pulling base image") subprocess.run( [ args.runtime, - "build", - BUILD_CONTEXT, - *cache_args, - "--build-arg", - f"REQUIREMENTS_TXT={REQUIREMENTS_TXT}", - "--build-arg", - f"ARCH={ARCH}", - "-f", - "Dockerfile", - "--tag", - image_name_tagged, + "pull", + "alpine:latest", ], check=True, ) - if not args.no_save: - print("Saving container image") - cmd = subprocess.Popen( - [ - CONTAINER_RUNTIME, - "save", - image_name_tagged, - ], - stdout=subprocess.PIPE, - ) + # Build the container image, and tag it with the calculated tag + print("Building container image") + cache_args = [] if args.use_cache else ["--no-cache"] + subprocess.run( + [ + args.runtime, + "build", + BUILD_CONTEXT, + *cache_args, + "-f", + "Dockerfile", + "--tag", + image_name_tagged, + ], + check=True, + ) - print("Compressing container image") - chunk_size = 4 << 20 - with gzip.open( - tarball_path, - "wb", - compresslevel=args.compress_level, - ) as gzip_f: - while True: - chunk = cmd.stdout.read(chunk_size) - if len(chunk) > 0: - gzip_f.write(chunk) - else: - break - cmd.wait(5) + if not args.no_save: + print("Saving container image") + cmd = subprocess.Popen( + [ + CONTAINER_RUNTIME, + "save", + image_name_tagged, + ], + stdout=subprocess.PIPE, + ) - -class ContainerPipDependencies: - """Generates PIP dependencies within container""" - - def __enter__(self): - try: - container_requirements_txt = subprocess.check_output( - ["poetry", "export", "--only", "container"], universal_newlines=True - ) - except subprocess.CalledProcessError as e: - print("FAILURE", e.returncode, e.output) - print(f"REQUIREMENTS: {container_requirements_txt}") - # XXX Export container dependencies and exclude pymupdfb since it is not needed in container - req_txt_pymupdfb_stripped = container_requirements_txt.split("pymupdfb")[0] - with open(Path(BUILD_CONTEXT) / REQUIREMENTS_TXT, "w") as f: - if ARCH == "arm64": - # PyMuPDF needs to be built on ARM64 machines - # But is already provided as a prebuilt-wheel on other architectures - f.write(req_txt_pymupdfb_stripped) - else: - f.write(container_requirements_txt) - - def __exit__(self, exc_type, exc_value, exc_tb): - print("Leaving the context...") - os.remove(Path(BUILD_CONTEXT) / REQUIREMENTS_TXT) + print("Compressing container image") + chunk_size = 4 << 20 + with gzip.open( + tarball_path, + "wb", + compresslevel=args.compress_level, + ) as gzip_f: + while True: + chunk = cmd.stdout.read(chunk_size) + if len(chunk) > 0: + gzip_f.write(chunk) + else: + break + cmd.wait(5) if __name__ == "__main__":