diff --git a/.circleci/config.yml b/.circleci/config.yml index 208e49d..0215cfc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -42,8 +42,16 @@ aliases: ./install/linux/build-rpm.py ls -lh dist/ + - &calculate-cache-key + name: Caculating container cache key + command: | + mkdir -p /caches/ + cd dangerzone/conversion/ + cat common.py doc_to_pixels.py pixels_to_pdf.py | sha1sum | cut -d' ' -f1 > /caches/cache-id.txt + cd ../../ + - &restore-cache - key: v1-{{ checksum "container/Dockerfile" }}-{{ checksum "container/dangerzone.py" }} + key: v1-{{ checksum "Dockerfile" }}-{{ checksum "/caches/cache-id.txt" }} paths: - /caches/container.tar.gz - /caches/image-id.txt @@ -85,9 +93,8 @@ jobs: - image: docker:dind steps: - checkout - - restore_cache: - keys: - - v1-{{ checksum "container/Dockerfile" }}-{{ checksum "container/dangerzone.py" }} + - run: *calculate-cache-key + - restore_cache: *restore-cache - setup_remote_docker - run: name: Build Dangerzone image @@ -95,7 +102,9 @@ jobs: if [ -f "/caches/container.tar.gz" ]; then echo "Already cached, skipping" else - docker build --cache-from=dangerzone.rocks/dangerzone --tag dangerzone.rocks/dangerzone container + docker build dangerzone/ -f Dockerfile \ + --cache-from=dangerzone.rocks/dangerzone \ + --tag dangerzone.rocks/dangerzone fi - run: name: Save Dangerzone image and image-id.txt to cache @@ -108,8 +117,9 @@ jobs: gzip -f /caches/container.tar docker image ls dangerzone.rocks/dangerzone | grep "dangerzone.rocks/dangerzone" | tr -s ' ' | cut -d' ' -f3 > /caches/image-id.txt fi + - run: *calculate-cache-key - save_cache: - key: v1-{{ checksum "container/Dockerfile" }}-{{ checksum "container/dangerzone.py" }} + key: v1-{{ checksum "Dockerfile" }}-{{ checksum "/caches/cache-id.txt" }} paths: - /caches/container.tar.gz - /caches/image-id.txt @@ -136,6 +146,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: @@ -155,6 +166,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -181,6 +193,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -207,6 +220,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -233,6 +247,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -259,6 +274,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -285,6 +301,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -328,6 +345,7 @@ jobs: command: | sudo mkdir -p /caches sudo chown -R $USER:$USER /caches + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image @@ -365,6 +383,7 @@ jobs: steps: - run: *install-dependencies-deb - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-deb @@ -376,6 +395,7 @@ jobs: steps: - run: *install-dependencies-deb - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-deb @@ -388,6 +408,7 @@ jobs: - run: *install-dependencies-deb - run: *install-python-all - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-deb @@ -399,6 +420,7 @@ jobs: steps: - run: *install-dependencies-deb - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-deb @@ -410,6 +432,7 @@ jobs: steps: - run: *install-dependencies-deb - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-deb @@ -421,6 +444,7 @@ jobs: steps: - run: *install-dependencies-rpm - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-rpm @@ -432,6 +456,7 @@ jobs: steps: - run: *install-dependencies-rpm - checkout + - run: *calculate-cache-key - restore_cache: *restore-cache - run: *copy-image - run: *build-rpm diff --git a/.github/workflows/scan.yml b/.github/workflows/scan.yml index 9922921..4c885bf 100644 --- a/.github/workflows/scan.yml +++ b/.github/workflows/scan.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Build container image - run: docker build container --tag dangerzone.rocks/dangerzone:latest + run: docker build dangerzone/ -f Dockerfile --tag dangerzone.rocks/dangerzone:latest # NOTE: Scan first without failing, else we won't be able to read the scan # report. - name: Scan container image (no fail) diff --git a/container/Dockerfile b/Dockerfile similarity index 90% rename from container/Dockerfile rename to Dockerfile index 14e05c6..77bcbce 100644 --- a/container/Dockerfile +++ b/Dockerfile @@ -33,8 +33,11 @@ RUN mkdir tessdata && cd tessdata \ && find . -name '*.traineddata' -maxdepth 2 -exec cp {} /usr/share/tessdata \; \ && cd .. && rm -r tessdata -COPY dangerzone.py /usr/local/bin/ -RUN chmod +x /usr/local/bin/dangerzone.py +ENV PYTHONPATH=/opt/dangerzone + +RUN mkdir -p /opt/dangerzone/dangerzone +RUN touch /opt/dangerzone/dangerzone/__init__.py +COPY conversion /opt/dangerzone/dangerzone/conversion # Add the unprivileged user RUN adduser -s /bin/sh -D dangerzone diff --git a/Makefile b/Makefile index 05c64e5..900d5a8 100644 --- a/Makefile +++ b/Makefile @@ -24,13 +24,10 @@ MYPY_ARGS := --ignore-missing-imports \ mypy-host: mypy $(MYPY_ARGS) dangerzone -mypy-container: - mypy $(MYPY_ARGS) container - mypy-tests: mypy $(MYPY_ARGS) tests -mypy: mypy-host mypy-container mypy-tests ## check type hints with mypy +mypy: mypy-host mypy-tests ## check type hints with mypy .PHONY: lint lint: lint-black lint-isort mypy ## check the code with various linters diff --git a/dangerzone/conversion/__init__.py b/dangerzone/conversion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dangerzone/conversion/common.py b/dangerzone/conversion/common.py new file mode 100644 index 0000000..45629e1 --- /dev/null +++ b/dangerzone/conversion/common.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 + +import asyncio +import glob +import json +import os +import re +import shutil +import subprocess +import sys +import time +from abc import abstractmethod +from typing import Callable, Dict, List, Optional, Tuple, Union + +TIMEOUT_PER_PAGE: float = 30 # (seconds) +TIMEOUT_PER_MB: float = 30 # (seconds) +TIMEOUT_MIN: float = 60 # (seconds) + + +async def read_stream( + sr: asyncio.StreamReader, callback: Optional[Callable] = None +) -> bytes: + """Consume a byte stream line-by-line. + + Read all lines in a stream until EOF. If a user has passed a callback, call it for + each line. + + Note that the lines are in bytes, since we can't assume that all command output will + be UTF-8 encoded. Higher level commands are advised to decode the output to Unicode, + if they know its encoding. + """ + buf = b"" + while True: + line = await sr.readline() + if sr.at_eof(): + break + if callback is not None: + callback(line) + # TODO: This would be a good place to log the received line, mostly for debug + # logging. + buf += line + return buf + + +async def run_command( + args: List[str], + *, + error_message: str, + timeout_message: str, + timeout: Optional[float], + stdout_callback: Optional[Callable] = None, + stderr_callback: Optional[Callable] = None, +) -> Tuple[bytes, bytes]: + """Run a command and get its output. + + Run a command using asyncio.subprocess, consume its standard streams, and return its + output in bytes. + + :raises RuntimeError: if the process returns a non-zero exit status + :raises TimeoutError: if the process times out + """ + # Start the provided command, and return a handle. The command will run in the + # background. + proc = await asyncio.subprocess.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + assert proc.stdout is not None + assert proc.stderr is not None + + # Create asynchronous tasks that will consume the standard streams of the command, + # and call callbacks if necessary. + stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback)) + stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback)) + + # Wait until the command has finished, for a specific timeout. Then, verify that the + # command has completed successfully. In any other case, raise an exception. + try: + ret = await asyncio.wait_for(proc.wait(), timeout=timeout) + except asyncio.exceptions.TimeoutError: + raise TimeoutError(timeout_message) + if ret != 0: + raise RuntimeError(error_message) + + # Wait until the tasks that consume the command's standard streams have exited as + # well, and return their output. + stdout = await stdout_task + stderr = await stderr_task + return (stdout, stderr) + + +class DangerzoneConverter: + def __init__(self) -> None: + self.percentage: float = 0.0 + + def calculate_timeout( + self, size: float, pages: Optional[float] = None + ) -> Optional[float]: + """Calculate the timeout for a command. + + The timeout calculation takes two factors in mind: + + 1. The size (in MiBs) of the dataset (document, multiple pages). + 2. The number of pages in the dataset. + + It then calculates proportional timeout values based on the above, and keeps the + large one. This way, we can handle several corner cases: + + * Documents with lots of pages, but small file size. + * Single images with large file size. + """ + if not int(os.environ.get("ENABLE_TIMEOUTS", 1)): + return None + + # Do not have timeouts lower than 10 seconds, if the file size is small, since + # we need to take into account the program's startup time as well. + timeout = max(TIMEOUT_PER_MB * size, TIMEOUT_MIN) + if pages: + timeout = max(timeout, TIMEOUT_PER_PAGE * pages) + return timeout + + @abstractmethod + async def convert(self) -> None: + pass + + def update_progress(self, text: str, *, error: bool = False) -> None: + print( + json.dumps( + {"error": error, "text": text, "percentage": int(self.percentage)} + ) + ) + sys.stdout.flush() diff --git a/container/dangerzone.py b/dangerzone/conversion/doc_to_pixels.py similarity index 50% rename from container/dangerzone.py rename to dangerzone/conversion/doc_to_pixels.py index 360552e..23db83a 100644 --- a/container/dangerzone.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -1,140 +1,27 @@ #!/usr/bin/env python3 """ -Here are the steps, with progress bar percentages for each step: +Here are the steps, with progress bar percentages: -document_to_pixels - 0%-3%: Convert document into a PDF (skipped if the input file is a PDF) - 3%-5%: Split PDF into individual pages, and count those pages - 5%-50%: Convert each page into pixels (each page takes 45/n%, where n is the number of pages) - -pixels_to_pdf: -- 50%-95%: Convert each page of pixels into a PDF (each page takes 45/n%, where n is the number of pages) -- 95%-100%: Compress the final PDF """ import asyncio import glob -import json import os import re import shutil -import subprocess import sys -import time -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Dict, Optional import magic -TIMEOUT_PER_PAGE: float = 30 # (seconds) -TIMEOUT_PER_MB: float = 30 # (seconds) -TIMEOUT_MIN: float = 60 # (seconds) +from .common import DangerzoneConverter, run_command -async def read_stream( - sr: asyncio.StreamReader, callback: Optional[Callable] = None -) -> bytes: - """Consume a byte stream line-by-line. - - Read all lines in a stream until EOF. If a user has passed a callback, call it for - each line. - - Note that the lines are in bytes, since we can't assume that all command output will - be UTF-8 encoded. Higher level commands are advised to decode the output to Unicode, - if they know its encoding. - """ - buf = b"" - while True: - line = await sr.readline() - if sr.at_eof(): - break - if callback is not None: - callback(line) - # TODO: This would be a good place to log the received line, mostly for debug - # logging. - buf += line - return buf - - -async def run_command( - args: List[str], - *, - error_message: str, - timeout_message: str, - timeout: Optional[float], - stdout_callback: Optional[Callable] = None, - stderr_callback: Optional[Callable] = None, -) -> Tuple[bytes, bytes]: - """Run a command and get its output. - - Run a command using asyncio.subprocess, consume its standard streams, and return its - output in bytes. - - :raises RuntimeError: if the process returns a non-zero exit status - :raises TimeoutError: if the process times out - """ - # Start the provided command, and return a handle. The command will run in the - # background. - proc = await asyncio.subprocess.create_subprocess_exec( - *args, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - - assert proc.stdout is not None - assert proc.stderr is not None - - # Create asynchronous tasks that will consume the standard streams of the command, - # and call callbacks if necessary. - stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback)) - stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback)) - - # Wait until the command has finished, for a specific timeout. Then, verify that the - # command has completed successfully. In any other case, raise an exception. - try: - ret = await asyncio.wait_for(proc.wait(), timeout=timeout) - except asyncio.exceptions.TimeoutError: - raise TimeoutError(timeout_message) - if ret != 0: - raise RuntimeError(error_message) - - # Wait until the tasks that consume the command's standard streams have exited as - # well, and return their output. - stdout = await stdout_task - stderr = await stderr_task - return (stdout, stderr) - - -class DangerzoneConverter: - def __init__(self) -> None: - self.percentage: float = 0.0 - - def calculate_timeout( - self, size: float, pages: Optional[float] = None - ) -> Optional[float]: - """Calculate the timeout for a command. - - The timeout calculation takes two factors in mind: - - 1. The size (in MiBs) of the dataset (document, multiple pages). - 2. The number of pages in the dataset. - - It then calculates proportional timeout values based on the above, and keeps the - large one. This way, we can handle several corner cases: - - * Documents with lots of pages, but small file size. - * Single images with large file size. - """ - if not int(os.environ.get("ENABLE_TIMEOUTS", 1)): - return None - - # Do not have timeouts lower than 10 seconds, if the file size is small, since - # we need to take into account the program's startup time as well. - timeout = max(TIMEOUT_PER_MB * size, TIMEOUT_MIN) - if pages: - timeout = max(timeout, TIMEOUT_PER_PAGE * pages) - return timeout - - async def document_to_pixels(self) -> None: +class DocumentToPixels(DangerzoneConverter): + async def convert(self) -> None: conversions: Dict[str, Dict[str, Optional[str]]] = { # .pdf "application/pdf": {"type": None}, @@ -393,160 +280,12 @@ class DangerzoneConverter: ): shutil.move(filename, "/dangerzone") - async def pixels_to_pdf(self) -> None: - self.percentage = 50.0 - - num_pages = len(glob.glob("/dangerzone/page-*.rgb")) - total_size = 0.0 - - # Convert RGB files to PDF files - percentage_per_page = 45.0 / num_pages - for page in range(1, num_pages + 1): - filename_base = f"/dangerzone/page-{page}" - rgb_filename = f"{filename_base}.rgb" - width_filename = f"{filename_base}.width" - height_filename = f"{filename_base}.height" - png_filename = f"/tmp/page-{page}.png" - ocr_filename = f"/tmp/page-{page}" - pdf_filename = f"/tmp/page-{page}.pdf" - - with open(width_filename) as f: - width = f.read().strip() - with open(height_filename) as f: - height = f.read().strip() - - # The first few operations happen on a per-page basis. - page_size = os.path.getsize(filename_base + ".rgb") / 1024**2 - total_size += page_size - timeout = self.calculate_timeout(page_size, 1) - - if os.environ.get("OCR") == "1": # OCR the document - self.update_progress( - f"Converting page {page}/{num_pages} from pixels to searchable PDF" - ) - await run_command( - [ - "gm", - "convert", - "-size", - f"{width}x{height}", - "-depth", - "8", - f"rgb:{rgb_filename}", - f"png:{png_filename}", - ], - error_message=f"Page {page}/{num_pages} conversion to PNG failed", - timeout_message=( - "Error converting pixels to PNG, convert timed out after" - f" {timeout} seconds" - ), - timeout=timeout, - ) - await run_command( - [ - "tesseract", - png_filename, - ocr_filename, - "-l", - os.environ.get("OCR_LANGUAGE"), # type: ignore - "--dpi", - "70", - "pdf", - ], - error_message=f"Page {page}/{num_pages} OCR failed", - timeout_message=( - "Error converting PNG to searchable PDF, tesseract timed out" - f" after {timeout} seconds" - ), - timeout=timeout, - ) - - else: # Don't OCR - self.update_progress( - f"Converting page {page}/{num_pages} from pixels to PDF" - ) - await run_command( - [ - "gm", - "convert", - "-size", - f"{width}x{height}", - "-depth", - "8", - f"rgb:{rgb_filename}", - f"pdf:{pdf_filename}", - ], - error_message=f"Page {page}/{num_pages} conversion to PDF failed", - timeout_message=( - "Error converting RGB to PDF, convert timed out after" - f" {timeout} seconds" - ), - timeout=timeout, - ) - - self.percentage += percentage_per_page - - # Next operations apply to the all the pages, so we need to recalculate the - # timeout. - timeout = self.calculate_timeout(total_size, num_pages) - - # Merge pages into a single PDF - self.update_progress(f"Merging {num_pages} pages into a single PDF") - args = ["pdfunite"] - for page in range(1, num_pages + 1): - args.append(f"/tmp/page-{page}.pdf") - args.append(f"/tmp/safe-output.pdf") - await run_command( - args, - error_message="Merging pages into a single PDF failed", - timeout_message=( - "Error merging pages into a single PDF, pdfunite timed out after" - f" {timeout} seconds" - ), - timeout=timeout, - ) - - self.percentage += 2 - - # Compress - self.update_progress("Compressing PDF") - await run_command( - ["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"], - error_message="Compressing PDF failed", - timeout_message=( - f"Error compressing PDF, ps2pdf timed out after {timeout} seconds" - ), - timeout=timeout, - ) - - self.percentage = 100.0 - self.update_progress("Safe PDF created") - - # Move converted files into /safezone - shutil.move("/tmp/safe-output.pdf", "/safezone") - shutil.move("/tmp/safe-output-compressed.pdf", "/safezone") - - def update_progress(self, text: str, *, error: bool = False) -> None: - print( - json.dumps( - {"error": error, "text": text, "percentage": int(self.percentage)} - ) - ) - sys.stdout.flush() - async def main() -> int: - if len(sys.argv) != 2: - print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]") - return -1 - - converter = DangerzoneConverter() + converter = DocumentToPixels() try: - if sys.argv[1] == "document-to-pixels": - await converter.document_to_pixels() - elif sys.argv[1] == "pixels-to-pdf": - await converter.pixels_to_pdf() + await converter.convert() except (RuntimeError, TimeoutError, ValueError) as e: converter.update_progress(str(e), error=True) return 1 diff --git a/dangerzone/conversion/pixels_to_pdf.py b/dangerzone/conversion/pixels_to_pdf.py new file mode 100644 index 0000000..2e97de5 --- /dev/null +++ b/dangerzone/conversion/pixels_to_pdf.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +Here are the steps, with progress bar percentages: + +- 50%-95%: Convert each page of pixels into a PDF (each page takes 45/n%, where n is the number of pages) +- 95%-100%: Compress the final PDF +""" +import asyncio +import glob +import json +import os +import shutil +import sys + +from .common import DangerzoneConverter, run_command + + +class PixelsToPDF(DangerzoneConverter): + async def convert(self) -> None: + self.percentage = 50.0 + + num_pages = len(glob.glob("/tmp/dangerzone/page-*.rgb")) + total_size = 0.0 + + # Convert RGB files to PDF files + percentage_per_page = 45.0 / num_pages + for page in range(1, num_pages + 1): + filename_base = f"/tmp/dangerzone/page-{page}" + rgb_filename = f"{filename_base}.rgb" + width_filename = f"{filename_base}.width" + height_filename = f"{filename_base}.height" + png_filename = f"/tmp/page-{page}.png" + ocr_filename = f"/tmp/page-{page}" + pdf_filename = f"/tmp/page-{page}.pdf" + + with open(width_filename) as f: + width = f.read().strip() + with open(height_filename) as f: + height = f.read().strip() + + # The first few operations happen on a per-page basis. + page_size = os.path.getsize(filename_base + ".rgb") / 1024**2 + total_size += page_size + timeout = self.calculate_timeout(page_size, 1) + + if os.environ.get("OCR") == "1": # OCR the document + self.update_progress( + f"Converting page {page}/{num_pages} from pixels to searchable PDF" + ) + await run_command( + [ + "gm", + "convert", + "-size", + f"{width}x{height}", + "-depth", + "8", + f"rgb:{rgb_filename}", + f"png:{png_filename}", + ], + error_message=f"Page {page}/{num_pages} conversion to PNG failed", + timeout_message=( + "Error converting pixels to PNG, convert timed out after" + f" {timeout} seconds" + ), + timeout=timeout, + ) + await run_command( + [ + "tesseract", + png_filename, + ocr_filename, + "-l", + os.environ.get("OCR_LANGUAGE"), # type: ignore + "--dpi", + "70", + "pdf", + ], + error_message=f"Page {page}/{num_pages} OCR failed", + timeout_message=( + "Error converting PNG to searchable PDF, tesseract timed out" + f" after {timeout} seconds" + ), + timeout=timeout, + ) + + else: # Don't OCR + self.update_progress( + f"Converting page {page}/{num_pages} from pixels to PDF" + ) + await run_command( + [ + "gm", + "convert", + "-size", + f"{width}x{height}", + "-depth", + "8", + f"rgb:{rgb_filename}", + f"pdf:{pdf_filename}", + ], + error_message=f"Page {page}/{num_pages} conversion to PDF failed", + timeout_message=( + "Error converting RGB to PDF, convert timed out after" + f" {timeout} seconds" + ), + timeout=timeout, + ) + + self.percentage += percentage_per_page + + # Next operations apply to the all the pages, so we need to recalculate the + # timeout. + timeout = self.calculate_timeout(total_size, num_pages) + + # Merge pages into a single PDF + self.update_progress(f"Merging {num_pages} pages into a single PDF") + args = ["pdfunite"] + for page in range(1, num_pages + 1): + args.append(f"/tmp/page-{page}.pdf") + args.append(f"/tmp/safe-output.pdf") + await run_command( + args, + error_message="Merging pages into a single PDF failed", + timeout_message=( + "Error merging pages into a single PDF, pdfunite timed out after" + f" {timeout} seconds" + ), + timeout=timeout, + ) + + self.percentage += 2 + + # Compress + self.update_progress("Compressing PDF") + await run_command( + ["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"], + error_message="Compressing PDF failed", + timeout_message=( + f"Error compressing PDF, ps2pdf timed out after {timeout} seconds" + ), + timeout=timeout, + ) + + self.percentage = 100.0 + self.update_progress("Safe PDF created") + + # Move converted files into /safezone + shutil.move("/tmp/safe-output.pdf", "/safezone") + shutil.move("/tmp/safe-output-compressed.pdf", "/safezone") + + +async def main() -> int: + converter = PixelsToPDF() + + try: + await converter.convert() + except (RuntimeError, TimeoutError, ValueError) as e: + converter.update_progress(str(e), error=True) + return 1 + else: + return 0 # Success! + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 4113317..6b71b63 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -262,8 +262,8 @@ class Container(IsolationProvider): # Convert document to pixels command = [ "/usr/bin/python3", - "/usr/local/bin/dangerzone.py", - "document-to-pixels", + "-m", + "dangerzone.conversion.doc_to_pixels", ] extra_args = [ "-v", @@ -282,8 +282,8 @@ class Container(IsolationProvider): # Convert pixels to safe PDF command = [ "/usr/bin/python3", - "/usr/local/bin/dangerzone.py", - "pixels-to-pdf", + "-m", + "dangerzone.conversion.pixels_to_pdf", ] extra_args = [ "-v", diff --git a/install/linux/build-image.sh b/install/linux/build-image.sh index fc662ec..ad573c7 100755 --- a/install/linux/build-image.sh +++ b/install/linux/build-image.sh @@ -5,7 +5,7 @@ set -e TAG=dangerzone.rocks/dangerzone:latest echo "Building container image" -podman build container --tag $TAG +podman build dangerzone/ -f Dockerfile --tag $TAG echo "Saving and compressing container image" podman save $TAG | gzip > share/container.tar.gz diff --git a/install/macos/build-image.sh b/install/macos/build-image.sh index ab4fd97..eafb1c6 100755 --- a/install/macos/build-image.sh +++ b/install/macos/build-image.sh @@ -5,7 +5,7 @@ set -e TAG=dangerzone.rocks/dangerzone:latest echo "Building container image" -docker build container --tag $TAG +docker build dangerzone/ -f Dockerfile --tag $TAG echo "Saving and compressing container image" docker save $TAG | gzip > share/container.tar.gz diff --git a/install/windows/build-image.py b/install/windows/build-image.py index cf1d578..23fd2d5 100644 --- a/install/windows/build-image.py +++ b/install/windows/build-image.py @@ -9,7 +9,9 @@ def main(): [ "docker", "build", - "container", + "dangerzone/", + "-f", + "Dockerfile", "--tag", "dangerzone.rocks/dangerzone:latest", ] diff --git a/setup.py b/setup.py index 33f5e07..98656fd 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,12 @@ dangerous PDFs, office documents, or images and converts them to safe PDFs. \ It uses container technology to convert the documents within a secure sandbox.\ """, url="https://github.com/freedomofpress/dangerzone", - packages=["dangerzone", "dangerzone.gui", "dangerzone.isolation_provider"], + packages=[ + "dangerzone", + "dangerzone.conversion", + "dangerzone.gui", + "dangerzone.isolation_provider", + ], data_files=[ ( "share/applications",