Abstract container into an IsolationProvider

Encapsulate container logic into an implementation of AbstractIsolationProvider. This flexibility will allow for other types of isolation managers, such as a Dummy one.
2025-04-28 18:02:38 +02:00 · 2022-12-27 13:57:19 +00:00 · 2022-12-27 13:57:19 +00:00 · a4f27afdc6
commit a4f27afdc6
parent 1114a0dfa1
4 changed files with 331 additions and 290 deletions
--- a/dangerzone/cli.py
+++ b/dangerzone/cli.py
@ -73,7 +73,7 @@ def cli_main(
            exit(1)

    # Ensure container is installed
-    isolation_provider.install()
+    dangerzone.isolation_provider.install()

    # Convert the document
    print_header("Converting document to safe PDF")
--- a/dangerzone/gui/main_window.py
+++ b/dangerzone/gui/main_window.py
@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
 class InstallContainerThread(QtCore.QThread):
    finished = QtCore.Signal()

-    def __init__(self) -> None:
+    def __init__(self, dangerzone: DangerzoneGui) -> None:
        super(InstallContainerThread, self).__init__()
+        self.dangerzone = dangerzone

    def run(self) -> None:
-        isolation_provider.install()
+        self.dangerzone.isolation_provider.install()
        self.finished.emit()


@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
        state: Optional[str] = None

        try:
-            container_runtime = isolation_provider.get_runtime()
+            container_runtime = self.dangerzone.isolation_provider.get_runtime()
        except isolation_provider.NoContainerTechException as e:
            log.error(str(e))
            state = "not_installed"
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
                "Installing the Dangerzone container image.<br><br>This might take a few minutes..."
            )
            self.buttons.hide()
-            self.install_container_t = InstallContainerThread()
+            self.install_container_t = InstallContainerThread(self.dangerzone)
            self.install_container_t.finished.connect(self.finished)
            self.install_container_t.start()

@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
    finished = QtCore.Signal(bool)
    update = QtCore.Signal(bool, str, int)

-    def __init__(self, document: Document, ocr_lang: str = None) -> None:
+    def __init__(
+        self,
+        dangerzone: DangerzoneGui,
+        document: Document,
+        ocr_lang: str = None,
+    ) -> None:
        super(ConvertTask, self).__init__()
        self.document = document
        self.ocr_lang = ocr_lang
        self.error = False
+        self.dangerzone = dangerzone

    def convert_document(self) -> None:
-        isolation_provider.convert(
+        self.dangerzone.isolation_provider.convert(
            self.document,
            self.ocr_lang,
            self.stdout_callback,
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):

    def start_conversion(self) -> None:
        if not self.thread_pool_initized:
-            max_jobs = isolation_provider.get_max_parallel_conversions()
+            max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
            self.thread_pool = ThreadPool(max_jobs)

        for doc_widget in self.document_widgets:
-            task = ConvertTask(doc_widget.document, self.get_ocr_lang())
+            task = ConvertTask(
+                self.dangerzone, doc_widget.document, self.get_ocr_lang()
+            )
            task.update.connect(doc_widget.update_progress)
            task.finished.connect(doc_widget.all_done)
            self.thread_pool.apply_async(task.convert_document)
--- a/dangerzone/isolation_provider.py
+++ b/dangerzone/isolation_provider.py
@ -7,6 +7,7 @@ import platform
 import shutil
 import subprocess
 import tempfile
+from abc import ABC, abstractmethod
 from typing import Callable, List, Optional, Tuple

 import appdirs
@ -15,8 +16,6 @@ from colorama import Fore, Style
 from .document import Document
 from .util import get_resource_path, get_subprocess_startupinfo

-container_name = "dangerzone.rocks/dangerzone"
-
 # Define startupinfo for subprocesses
 if platform.system() == "Windows":
    startupinfo = subprocess.STARTUPINFO()  # type: ignore [attr-defined]
@ -26,298 +25,329 @@ else:

 log = logging.getLogger(__name__)

-# Name of the dangerzone container
-container_name = "dangerzone.rocks/dangerzone"
-

 class NoContainerTechException(Exception):
    def __init__(self, container_tech: str) -> None:
        super().__init__(f"{container_tech} is not installed")


-def get_runtime_name() -> str:
-    if platform.system() == "Linux":
-        runtime_name = "podman"
-    else:
-        # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
-        runtime_name = "docker"
-    return runtime_name
-
-
-def get_runtime() -> str:
-    container_tech = get_runtime_name()
-    runtime = shutil.which(container_tech)
-    if runtime is None:
-        raise NoContainerTechException(container_tech)
-    return runtime
-
-
-def install() -> bool:
+class AbstractIsolationProvider(ABC):
    """
-    Make sure the podman container is installed. Linux only.
+    Abstracts an isolation provider
    """
-    if is_container_installed():
+
+    @abstractmethod
+    def install(self) -> bool:
+        pass
+
+    @abstractmethod
+    def convert(
+        self,
+        document: Document,
+        ocr_lang: Optional[str],
+        stdout_callback: Optional[Callable] = None,
+    ) -> bool:
+        pass
+
+    @abstractmethod
+    def get_max_parallel_conversions(self) -> int:
+        pass
+
+
+class Container(AbstractIsolationProvider):
+
+    # Name of the dangerzone container
+    CONTAINER_NAME = "dangerzone.rocks/dangerzone"
+
+    def __init__(self) -> None:
+        pass
+
+    def get_runtime_name(self) -> str:
+        if platform.system() == "Linux":
+            runtime_name = "podman"
+        else:
+            # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
+            runtime_name = "docker"
+        return runtime_name
+
+    def get_runtime(self) -> str:
+        container_tech = self.get_runtime_name()
+        runtime = shutil.which(container_tech)
+        if runtime is None:
+            raise NoContainerTechException(container_tech)
+        return runtime
+
+    def install(self) -> bool:
+        """
+        Make sure the podman container is installed. Linux only.
+        """
+        if self.is_container_installed():
+            return True
+
+        # Load the container into podman
+        log.info("Installing Dangerzone container image...")
+
+        p = subprocess.Popen(
+            [self.get_runtime(), "load"],
+            stdin=subprocess.PIPE,
+            startupinfo=get_subprocess_startupinfo(),
+        )
+
+        chunk_size = 10240
+        compressed_container_path = get_resource_path("container.tar.gz")
+        with gzip.open(compressed_container_path) as f:
+            while True:
+                chunk = f.read(chunk_size)
+                if len(chunk) > 0:
+                    if p.stdin:
+                        p.stdin.write(chunk)
+                else:
+                    break
+        p.communicate()
+
+        if not self.is_container_installed():
+            log.error("Failed to install the container image")
+            return False
+
+        log.info("Container image installed")
        return True

-    # Load the container into podman
-    log.info("Installing Dangerzone container image...")
+    def is_container_installed(self) -> bool:
+        """
+        See if the podman container is installed. Linux only.
+        """
+        # Get the image id
+        with open(get_resource_path("image-id.txt")) as f:
+            expected_image_id = f.read().strip()

-    p = subprocess.Popen(
-        [get_runtime(), "load"],
-        stdin=subprocess.PIPE,
-        startupinfo=get_subprocess_startupinfo(),
-    )
-
-    chunk_size = 10240
-    compressed_container_path = get_resource_path("container.tar.gz")
-    with gzip.open(compressed_container_path) as f:
-        while True:
-            chunk = f.read(chunk_size)
-            if len(chunk) > 0:
-                if p.stdin:
-                    p.stdin.write(chunk)
-            else:
-                break
-    p.communicate()
-
-    if not is_container_installed():
-        log.error("Failed to install the container image")
-        return False
-
-    log.info("Container image installed")
-    return True
-
-
-def is_container_installed() -> bool:
-    """
-    See if the podman container is installed. Linux only.
-    """
-    # Get the image id
-    with open(get_resource_path("image-id.txt")) as f:
-        expected_image_id = f.read().strip()
-
-    # See if this image is already installed
-    installed = False
-    found_image_id = subprocess.check_output(
-        [
-            get_runtime(),
-            "image",
-            "list",
-            "--format",
-            "{{.ID}}",
-            container_name,
-        ],
-        text=True,
-        startupinfo=get_subprocess_startupinfo(),
-    )
-    found_image_id = found_image_id.strip()
-
-    if found_image_id == expected_image_id:
-        installed = True
-    elif found_image_id == "":
-        pass
-    else:
-        log.info("Deleting old dangerzone container image")
-
-        try:
-            subprocess.check_output(
-                [get_runtime(), "rmi", "--force", found_image_id],
-                startupinfo=get_subprocess_startupinfo(),
-            )
-        except:
-            log.warning("Couldn't delete old container image, so leaving it there")
-
-    return installed
-
-
-def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
-    """
-    Parses a line returned by the container.
-    """
-    try:
-        status = json.loads(line)
-    except:
-        error_message = f"Invalid JSON returned from container:\n\n\t {line}"
-        log.error(error_message)
-        return (True, error_message, -1)
-
-    s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
-    s += Fore.CYAN + f"{status['percentage']}% "
-    if status["error"]:
-        s += Style.RESET_ALL + Fore.RED + status["text"]
-        log.error(s)
-    else:
-        s += Style.RESET_ALL + status["text"]
-        log.info(s)
-
-    return (status["error"], status["text"], status["percentage"])
-
-
-def exec(
-    document: Document,
-    args: List[str],
-    stdout_callback: Optional[Callable] = None,
-) -> int:
-    args_str = " ".join(pipes.quote(s) for s in args)
-    log.info("> " + args_str)
-
-    with subprocess.Popen(
-        args,
-        stdin=None,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        bufsize=1,
-        universal_newlines=True,
-        startupinfo=startupinfo,
-    ) as p:
-        if p.stdout is not None:
-            for line in p.stdout:
-                (error, text, percentage) = parse_progress(document, line)
-                if error:
-                    document.mark_as_failed()
-                if percentage == 100.0:
-                    document.mark_as_safe()
-                if stdout_callback:
-                    stdout_callback(error, text, percentage)
-
-        p.communicate()
-        return p.returncode
-
-
-def exec_container(
-    document: Document,
-    command: List[str],
-    extra_args: List[str] = [],
-    stdout_callback: Optional[Callable] = None,
-) -> int:
-    container_runtime = get_runtime()
-
-    if get_runtime_name() == "podman":
-        platform_args = []
-        security_args = ["--security-opt", "no-new-privileges"]
-        security_args += ["--userns", "keep-id"]
-    else:
-        platform_args = ["--platform", "linux/amd64"]
-        security_args = ["--security-opt=no-new-privileges:true"]
-
-    # drop all linux kernel capabilities
-    security_args += ["--cap-drop", "all"]
-    user_args = ["-u", "dangerzone"]
-
-    prevent_leakage_args = ["--rm"]
-
-    args = (
-        ["run", "--network", "none"]
-        + platform_args
-        + user_args
-        + security_args
-        + prevent_leakage_args
-        + extra_args
-        + [container_name]
-        + command
-    )
-
-    args = [container_runtime] + args
-    return exec(document, args, stdout_callback)
-
-
-def convert(
-    document: Document,
-    ocr_lang: Optional[str],
-    stdout_callback: Optional[Callable] = None,
-) -> bool:
-    success = False
-    document.mark_as_converting()
-
-    if ocr_lang:
-        ocr = "1"
-    else:
-        ocr = "0"
-
-    dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
-    os.makedirs(dz_tmp, exist_ok=True)
-
-    tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
-    pixel_dir = os.path.join(tmpdir.name, "pixels")
-    safe_dir = os.path.join(tmpdir.name, "safe")
-    os.makedirs(pixel_dir, exist_ok=True)
-    os.makedirs(safe_dir, exist_ok=True)
-
-    # Convert document to pixels
-    command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
-    extra_args = [
-        "-v",
-        f"{document.input_filename}:/tmp/input_file",
-        "-v",
-        f"{pixel_dir}:/dangerzone",
-    ]
-    ret = exec_container(document, command, extra_args, stdout_callback)
-    if ret != 0:
-        log.error("documents-to-pixels failed")
-    else:
-        # TODO: validate convert to pixels output
-
-        # Convert pixels to safe PDF
-        command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
-        extra_args = [
-            "-v",
-            f"{pixel_dir}:/dangerzone",
-            "-v",
-            f"{safe_dir}:/safezone",
-            "-e",
-            f"OCR={ocr}",
-            "-e",
-            f"OCR_LANGUAGE={ocr_lang}",
-        ]
-        ret = exec_container(document, command, extra_args, stdout_callback)
-        if ret != 0:
-            log.error("pixels-to-pdf failed")
-        else:
-            # Move the final file to the right place
-            if os.path.exists(document.output_filename):
-                os.remove(document.output_filename)
-
-            container_output_filename = os.path.join(
-                safe_dir, "safe-output-compressed.pdf"
-            )
-            shutil.move(container_output_filename, document.output_filename)
-
-            if document.archive_after_conversion:
-                document.archive()
-
-            # We did it
-            success = True
-
-    # Clean up
-    tmpdir.cleanup()
-
-    return success
-
-
-def get_max_parallel_conversions() -> int:
-
-    # FIXME hardcoded 1 until timeouts are more limited and better handled
-    # https://github.com/freedomofpress/dangerzone/issues/257
-    return 1
-
-    n_cpu = 1  # type: ignore [unreachable]
-    if platform.system() == "Linux":
-        # if on linux containers run natively
-        cpu_count = os.cpu_count()
-        if cpu_count is not None:
-            n_cpu = cpu_count
-
-    elif get_runtime_name() == "docker":
-        # For Windows and MacOS containers run in VM
-        # So we obtain the CPU count for the VM
-        n_cpu_str = subprocess.check_output(
-            [get_runtime(), "info", "--format", "{{.NCPU}}"],
+        # See if this image is already installed
+        installed = False
+        found_image_id = subprocess.check_output(
+            [
+                self.get_runtime(),
+                "image",
+                "list",
+                "--format",
+                "{{.ID}}",
+                self.CONTAINER_NAME,
+            ],
            text=True,
            startupinfo=get_subprocess_startupinfo(),
        )
-        n_cpu = int(n_cpu_str.strip())
+        found_image_id = found_image_id.strip()

-    return 2 * n_cpu + 1
+        if found_image_id == expected_image_id:
+            installed = True
+        elif found_image_id == "":
+            pass
+        else:
+            log.info("Deleting old dangerzone container image")
+
+            try:
+                subprocess.check_output(
+                    [self.get_runtime(), "rmi", "--force", found_image_id],
+                    startupinfo=get_subprocess_startupinfo(),
+                )
+            except:
+                log.warning("Couldn't delete old container image, so leaving it there")
+
+        return installed
+
+    def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
+        """
+        Parses a line returned by the container.
+        """
+        try:
+            status = json.loads(line)
+        except:
+            error_message = f"Invalid JSON returned from container:\n\n\t {line}"
+            log.error(error_message)
+            return (True, error_message, -1)
+
+        s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
+        s += Fore.CYAN + f"{status['percentage']}% "
+        if status["error"]:
+            s += Style.RESET_ALL + Fore.RED + status["text"]
+            log.error(s)
+        else:
+            s += Style.RESET_ALL + status["text"]
+            log.info(s)
+
+        return (status["error"], status["text"], status["percentage"])
+
+    def exec(
+        self,
+        document: Document,
+        args: List[str],
+        stdout_callback: Optional[Callable] = None,
+    ) -> int:
+        args_str = " ".join(pipes.quote(s) for s in args)
+        log.info("> " + args_str)
+
+        with subprocess.Popen(
+            args,
+            stdin=None,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            bufsize=1,
+            universal_newlines=True,
+            startupinfo=startupinfo,
+        ) as p:
+            if p.stdout is not None:
+                for line in p.stdout:
+                    (error, text, percentage) = self.parse_progress(document, line)
+                    if error:
+                        document.mark_as_failed()
+                    if percentage == 100.0:
+                        document.mark_as_safe()
+                    if stdout_callback:
+                        stdout_callback(error, text, percentage)
+
+            p.communicate()
+            return p.returncode
+
+    def exec_container(
+        self,
+        document: Document,
+        command: List[str],
+        extra_args: List[str] = [],
+        stdout_callback: Optional[Callable] = None,
+    ) -> int:
+        container_runtime = self.get_runtime()
+
+        if self.get_runtime_name() == "podman":
+            platform_args = []
+            security_args = ["--security-opt", "no-new-privileges"]
+            security_args += ["--userns", "keep-id"]
+        else:
+            platform_args = ["--platform", "linux/amd64"]
+            security_args = ["--security-opt=no-new-privileges:true"]
+
+        # drop all linux kernel capabilities
+        security_args += ["--cap-drop", "all"]
+        user_args = ["-u", "dangerzone"]
+
+        prevent_leakage_args = ["--rm"]
+
+        args = (
+            ["run", "--network", "none"]
+            + platform_args
+            + user_args
+            + security_args
+            + prevent_leakage_args
+            + extra_args
+            + [self.CONTAINER_NAME]
+            + command
+        )
+
+        args = [container_runtime] + args
+        return self.exec(document, args, stdout_callback)
+
+    def convert(
+        self,
+        document: Document,
+        ocr_lang: Optional[str],
+        stdout_callback: Optional[Callable] = None,
+    ) -> bool:
+        success = False
+        document.mark_as_converting()
+
+        if ocr_lang:
+            ocr = "1"
+        else:
+            ocr = "0"
+
+        dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
+        os.makedirs(dz_tmp, exist_ok=True)
+
+        tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
+        pixel_dir = os.path.join(tmpdir.name, "pixels")
+        safe_dir = os.path.join(tmpdir.name, "safe")
+        os.makedirs(pixel_dir, exist_ok=True)
+        os.makedirs(safe_dir, exist_ok=True)
+
+        # Convert document to pixels
+        command = [
+            "/usr/bin/python3",
+            "/usr/local/bin/dangerzone.py",
+            "document-to-pixels",
+        ]
+        extra_args = [
+            "-v",
+            f"{document.input_filename}:/tmp/input_file",
+            "-v",
+            f"{pixel_dir}:/dangerzone",
+        ]
+        ret = self.exec_container(document, command, extra_args, stdout_callback)
+        if ret != 0:
+            log.error("documents-to-pixels failed")
+        else:
+            # TODO: validate convert to pixels output
+
+            # Convert pixels to safe PDF
+            command = [
+                "/usr/bin/python3",
+                "/usr/local/bin/dangerzone.py",
+                "pixels-to-pdf",
+            ]
+            extra_args = [
+                "-v",
+                f"{pixel_dir}:/dangerzone",
+                "-v",
+                f"{safe_dir}:/safezone",
+                "-e",
+                f"OCR={ocr}",
+                "-e",
+                f"OCR_LANGUAGE={ocr_lang}",
+            ]
+            ret = self.exec_container(document, command, extra_args, stdout_callback)
+            if ret != 0:
+                log.error("pixels-to-pdf failed")
+            else:
+                # Move the final file to the right place
+                if os.path.exists(document.output_filename):
+                    os.remove(document.output_filename)
+
+                container_output_filename = os.path.join(
+                    safe_dir, "safe-output-compressed.pdf"
+                )
+                shutil.move(container_output_filename, document.output_filename)
+
+                if document.archive_after_conversion:
+                    document.archive()
+
+                # We did it
+                success = True
+
+        # Clean up
+        tmpdir.cleanup()
+
+        return success
+
+    def get_max_parallel_conversions(self) -> int:
+
+        # FIXME hardcoded 1 until timeouts are more limited and better handled
+        # https://github.com/freedomofpress/dangerzone/issues/257
+        return 1
+
+        n_cpu = 1  # type: ignore [unreachable]
+        if platform.system() == "Linux":
+            # if on linux containers run natively
+            cpu_count = os.cpu_count()
+            if cpu_count is not None:
+                n_cpu = cpu_count
+
+        elif self.get_runtime_name() == "docker":
+            # For Windows and MacOS containers run in VM
+            # So we obtain the CPU count for the VM
+            n_cpu_str = subprocess.check_output(
+                [self.get_runtime(), "info", "--format", "{{.NCPU}}"],
+                text=True,
+                startupinfo=get_subprocess_startupinfo(),
+            )
+            n_cpu = int(n_cpu_str.strip())
+
+        return 2 * n_cpu + 1


 # From global_common:
--- a/dangerzone/logic.py
+++ b/dangerzone/logic.py
@ -41,6 +41,8 @@ class DangerzoneCore(object):

        self.documents: List[Document] = []

+        self.isolation_provider = isolation_provider.Container()
+
    def add_document_from_filename(
        self,
        input_filename: str,
@ -59,13 +61,13 @@ class DangerzoneCore(object):
        self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
    ) -> None:
        def convert_doc(document: Document) -> None:
-            success = isolation_provider.convert(
+            success = self.isolation_provider.convert(
                document,
                ocr_lang,
                stdout_callback,
            )

-        max_jobs = isolation_provider.get_max_parallel_conversions()
+        max_jobs = self.isolation_provider.get_max_parallel_conversions()
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
            executor.map(convert_doc, self.documents)