Abstract container into an IsolationProvider

Encapsulate container logic into an implementation of AbstractIsolationProvider. This flexibility will allow for other types of isolation managers, such as a Dummy one.
2025-04-29 02:12:36 +02:00 · 2022-12-27 13:57:19 +00:00 · 2022-12-27 13:57:19 +00:00 · a4f27afdc6
commit a4f27afdc6
parent 1114a0dfa1
4 changed files with 331 additions and 290 deletions
--- a/dangerzone/cli.py
+++ b/dangerzone/cli.py
@ -73,7 +73,7 @@ def cli_main(
            exit(1)
    # Ensure container is installed
-    isolation_provider.install()
+    dangerzone.isolation_provider.install()
    # Convert the document
    print_header("Converting document to safe PDF")
--- a/dangerzone/gui/main_window.py
+++ b/dangerzone/gui/main_window.py
@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
 class InstallContainerThread(QtCore.QThread):
    finished = QtCore.Signal()
-    def __init__(self) -> None:
+    def __init__(self, dangerzone: DangerzoneGui) -> None:
        super(InstallContainerThread, self).__init__()
        self.dangerzone = dangerzone
    def run(self) -> None:
-        isolation_provider.install()
+        self.dangerzone.isolation_provider.install()
        self.finished.emit()
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
        state: Optional[str] = None
        try:
-            container_runtime = isolation_provider.get_runtime()
+            container_runtime = self.dangerzone.isolation_provider.get_runtime()
        except isolation_provider.NoContainerTechException as e:
            log.error(str(e))
            state = "not_installed"
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
                "Installing the Dangerzone container image.<br><br>This might take a few minutes..."
            )
            self.buttons.hide()
-            self.install_container_t = InstallContainerThread()
+            self.install_container_t = InstallContainerThread(self.dangerzone)
            self.install_container_t.finished.connect(self.finished)
            self.install_container_t.start()
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
    finished = QtCore.Signal(bool)
    update = QtCore.Signal(bool, str, int)
-    def __init__(self, document: Document, ocr_lang: str = None) -> None:
+    def __init__(
        self,
        dangerzone: DangerzoneGui,
        document: Document,
        ocr_lang: str = None,
    ) -> None:
        super(ConvertTask, self).__init__()
        self.document = document
        self.ocr_lang = ocr_lang
        self.error = False
        self.dangerzone = dangerzone
    def convert_document(self) -> None:
-        isolation_provider.convert(
+        self.dangerzone.isolation_provider.convert(
            self.document,
            self.ocr_lang,
            self.stdout_callback,
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
    def start_conversion(self) -> None:
        if not self.thread_pool_initized:
-            max_jobs = isolation_provider.get_max_parallel_conversions()
+            max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
            self.thread_pool = ThreadPool(max_jobs)
        for doc_widget in self.document_widgets:
-            task = ConvertTask(doc_widget.document, self.get_ocr_lang())
+            task = ConvertTask(
                self.dangerzone, doc_widget.document, self.get_ocr_lang()
            )
            task.update.connect(doc_widget.update_progress)
            task.finished.connect(doc_widget.all_done)
            self.thread_pool.apply_async(task.convert_document)
--- a/dangerzone/isolation_provider.py
+++ b/dangerzone/isolation_provider.py
@ -7,6 +7,7 @@ import platform
 import shutil
 import subprocess
 import tempfile
 from abc import ABC, abstractmethod
 from typing import Callable, List, Optional, Tuple
 import appdirs
@ -15,8 +16,6 @@ from colorama import Fore, Style
 from .document import Document
 from .util import get_resource_path, get_subprocess_startupinfo
 container_name = "dangerzone.rocks/dangerzone"
 # Define startupinfo for subprocesses
 if platform.system() == "Windows":
    startupinfo = subprocess.STARTUPINFO()  # type: ignore [attr-defined]
@ -26,298 +25,329 @@ else:
 log = logging.getLogger(__name__)
 # Name of the dangerzone container
 container_name = "dangerzone.rocks/dangerzone"
 class NoContainerTechException(Exception):
    def __init__(self, container_tech: str) -> None:
        super().__init__(f"{container_tech} is not installed")
-def get_runtime_name() -> str:
+class AbstractIsolationProvider(ABC):
    if platform.system() == "Linux":
        runtime_name = "podman"
    else:
        # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
        runtime_name = "docker"
    return runtime_name
 def get_runtime() -> str:
    container_tech = get_runtime_name()
    runtime = shutil.which(container_tech)
    if runtime is None:
        raise NoContainerTechException(container_tech)
    return runtime
 def install() -> bool:
    """
-    Make sure the podman container is installed. Linux only.
+    Abstracts an isolation provider
    """
-    if is_container_installed():
+
    @abstractmethod
    def install(self) -> bool:
        pass
    @abstractmethod
    def convert(
        self,
        document: Document,
        ocr_lang: Optional[str],
        stdout_callback: Optional[Callable] = None,
    ) -> bool:
        pass
    @abstractmethod
    def get_max_parallel_conversions(self) -> int:
        pass
 class Container(AbstractIsolationProvider):
    # Name of the dangerzone container
    CONTAINER_NAME = "dangerzone.rocks/dangerzone"
    def __init__(self) -> None:
        pass
    def get_runtime_name(self) -> str:
        if platform.system() == "Linux":
            runtime_name = "podman"
        else:
            # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
            runtime_name = "docker"
        return runtime_name
    def get_runtime(self) -> str:
        container_tech = self.get_runtime_name()
        runtime = shutil.which(container_tech)
        if runtime is None:
            raise NoContainerTechException(container_tech)
        return runtime
    def install(self) -> bool:
        """
        Make sure the podman container is installed. Linux only.
        """
        if self.is_container_installed():
            return True
        # Load the container into podman
        log.info("Installing Dangerzone container image...")
        p = subprocess.Popen(
            [self.get_runtime(), "load"],
            stdin=subprocess.PIPE,
            startupinfo=get_subprocess_startupinfo(),
        )
        chunk_size = 10240
        compressed_container_path = get_resource_path("container.tar.gz")
        with gzip.open(compressed_container_path) as f:
            while True:
                chunk = f.read(chunk_size)
                if len(chunk) > 0:
                    if p.stdin:
                        p.stdin.write(chunk)
                else:
                    break
        p.communicate()
        if not self.is_container_installed():
            log.error("Failed to install the container image")
            return False
        log.info("Container image installed")
        return True
-    # Load the container into podman
+    def is_container_installed(self) -> bool:
-    log.info("Installing Dangerzone container image...")
+        """
        See if the podman container is installed. Linux only.
        """
        # Get the image id
        with open(get_resource_path("image-id.txt")) as f:
            expected_image_id = f.read().strip()
-    p = subprocess.Popen(
+        # See if this image is already installed
-        [get_runtime(), "load"],
+        installed = False
-        stdin=subprocess.PIPE,
+        found_image_id = subprocess.check_output(
-        startupinfo=get_subprocess_startupinfo(),
+            [
-    )
+                self.get_runtime(),
-
+                "image",
-    chunk_size = 10240
+                "list",
-    compressed_container_path = get_resource_path("container.tar.gz")
+                "--format",
-    with gzip.open(compressed_container_path) as f:
+                "{{.ID}}",
-        while True:
+                self.CONTAINER_NAME,
-            chunk = f.read(chunk_size)
+            ],
            if len(chunk) > 0:
                if p.stdin:
                    p.stdin.write(chunk)
            else:
                break
    p.communicate()
    if not is_container_installed():
        log.error("Failed to install the container image")
        return False
    log.info("Container image installed")
    return True
 def is_container_installed() -> bool:
    """
    See if the podman container is installed. Linux only.
    """
    # Get the image id
    with open(get_resource_path("image-id.txt")) as f:
        expected_image_id = f.read().strip()
    # See if this image is already installed
    installed = False
    found_image_id = subprocess.check_output(
        [
            get_runtime(),
            "image",
            "list",
            "--format",
            "{{.ID}}",
            container_name,
        ],
        text=True,
        startupinfo=get_subprocess_startupinfo(),
    )
    found_image_id = found_image_id.strip()
    if found_image_id == expected_image_id:
        installed = True
    elif found_image_id == "":
        pass
    else:
        log.info("Deleting old dangerzone container image")
        try:
            subprocess.check_output(
                [get_runtime(), "rmi", "--force", found_image_id],
                startupinfo=get_subprocess_startupinfo(),
            )
        except:
            log.warning("Couldn't delete old container image, so leaving it there")
    return installed
 def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
    """
    Parses a line returned by the container.
    """
    try:
        status = json.loads(line)
    except:
        error_message = f"Invalid JSON returned from container:\n\n\t {line}"
        log.error(error_message)
        return (True, error_message, -1)
    s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
    s += Fore.CYAN + f"{status['percentage']}% "
    if status["error"]:
        s += Style.RESET_ALL + Fore.RED + status["text"]
        log.error(s)
    else:
        s += Style.RESET_ALL + status["text"]
        log.info(s)
    return (status["error"], status["text"], status["percentage"])
 def exec(
    document: Document,
    args: List[str],
    stdout_callback: Optional[Callable] = None,
 ) -> int:
    args_str = " ".join(pipes.quote(s) for s in args)
    log.info("> " + args_str)
    with subprocess.Popen(
        args,
        stdin=None,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        bufsize=1,
        universal_newlines=True,
        startupinfo=startupinfo,
    ) as p:
        if p.stdout is not None:
            for line in p.stdout:
                (error, text, percentage) = parse_progress(document, line)
                if error:
                    document.mark_as_failed()
                if percentage == 100.0:
                    document.mark_as_safe()
                if stdout_callback:
                    stdout_callback(error, text, percentage)
        p.communicate()
        return p.returncode
 def exec_container(
    document: Document,
    command: List[str],
    extra_args: List[str] = [],
    stdout_callback: Optional[Callable] = None,
 ) -> int:
    container_runtime = get_runtime()
    if get_runtime_name() == "podman":
        platform_args = []
        security_args = ["--security-opt", "no-new-privileges"]
        security_args += ["--userns", "keep-id"]
    else:
        platform_args = ["--platform", "linux/amd64"]
        security_args = ["--security-opt=no-new-privileges:true"]
    # drop all linux kernel capabilities
    security_args += ["--cap-drop", "all"]
    user_args = ["-u", "dangerzone"]
    prevent_leakage_args = ["--rm"]
    args = (
        ["run", "--network", "none"]
        + platform_args
        + user_args
        + security_args
        + prevent_leakage_args
        + extra_args
        + [container_name]
        + command
    )
    args = [container_runtime] + args
    return exec(document, args, stdout_callback)
 def convert(
    document: Document,
    ocr_lang: Optional[str],
    stdout_callback: Optional[Callable] = None,
 ) -> bool:
    success = False
    document.mark_as_converting()
    if ocr_lang:
        ocr = "1"
    else:
        ocr = "0"
    dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
    os.makedirs(dz_tmp, exist_ok=True)
    tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
    pixel_dir = os.path.join(tmpdir.name, "pixels")
    safe_dir = os.path.join(tmpdir.name, "safe")
    os.makedirs(pixel_dir, exist_ok=True)
    os.makedirs(safe_dir, exist_ok=True)
    # Convert document to pixels
    command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
    extra_args = [
        "-v",
        f"{document.input_filename}:/tmp/input_file",
        "-v",
        f"{pixel_dir}:/dangerzone",
    ]
    ret = exec_container(document, command, extra_args, stdout_callback)
    if ret != 0:
        log.error("documents-to-pixels failed")
    else:
        # TODO: validate convert to pixels output
        # Convert pixels to safe PDF
        command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
        extra_args = [
            "-v",
            f"{pixel_dir}:/dangerzone",
            "-v",
            f"{safe_dir}:/safezone",
            "-e",
            f"OCR={ocr}",
            "-e",
            f"OCR_LANGUAGE={ocr_lang}",
        ]
        ret = exec_container(document, command, extra_args, stdout_callback)
        if ret != 0:
            log.error("pixels-to-pdf failed")
        else:
            # Move the final file to the right place
            if os.path.exists(document.output_filename):
                os.remove(document.output_filename)
            container_output_filename = os.path.join(
                safe_dir, "safe-output-compressed.pdf"
            )
            shutil.move(container_output_filename, document.output_filename)
            if document.archive_after_conversion:
                document.archive()
            # We did it
            success = True
    # Clean up
    tmpdir.cleanup()
    return success
 def get_max_parallel_conversions() -> int:
    # FIXME hardcoded 1 until timeouts are more limited and better handled
    # https://github.com/freedomofpress/dangerzone/issues/257
    return 1
    n_cpu = 1  # type: ignore [unreachable]
    if platform.system() == "Linux":
        # if on linux containers run natively
        cpu_count = os.cpu_count()
        if cpu_count is not None:
            n_cpu = cpu_count
    elif get_runtime_name() == "docker":
        # For Windows and MacOS containers run in VM
        # So we obtain the CPU count for the VM
        n_cpu_str = subprocess.check_output(
            [get_runtime(), "info", "--format", "{{.NCPU}}"],
            text=True,
            startupinfo=get_subprocess_startupinfo(),
        )
-        n_cpu = int(n_cpu_str.strip())
+        found_image_id = found_image_id.strip()
-    return 2 * n_cpu + 1
+        if found_image_id == expected_image_id:
            installed = True
        elif found_image_id == "":
            pass
        else:
            log.info("Deleting old dangerzone container image")
            try:
                subprocess.check_output(
                    [self.get_runtime(), "rmi", "--force", found_image_id],
                    startupinfo=get_subprocess_startupinfo(),
                )
            except:
                log.warning("Couldn't delete old container image, so leaving it there")
        return installed
    def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
        """
        Parses a line returned by the container.
        """
        try:
            status = json.loads(line)
        except:
            error_message = f"Invalid JSON returned from container:\n\n\t {line}"
            log.error(error_message)
            return (True, error_message, -1)
        s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
        s += Fore.CYAN + f"{status['percentage']}% "
        if status["error"]:
            s += Style.RESET_ALL + Fore.RED + status["text"]
            log.error(s)
        else:
            s += Style.RESET_ALL + status["text"]
            log.info(s)
        return (status["error"], status["text"], status["percentage"])
    def exec(
        self,
        document: Document,
        args: List[str],
        stdout_callback: Optional[Callable] = None,
    ) -> int:
        args_str = " ".join(pipes.quote(s) for s in args)
        log.info("> " + args_str)
        with subprocess.Popen(
            args,
            stdin=None,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            bufsize=1,
            universal_newlines=True,
            startupinfo=startupinfo,
        ) as p:
            if p.stdout is not None:
                for line in p.stdout:
                    (error, text, percentage) = self.parse_progress(document, line)
                    if error:
                        document.mark_as_failed()
                    if percentage == 100.0:
                        document.mark_as_safe()
                    if stdout_callback:
                        stdout_callback(error, text, percentage)
            p.communicate()
            return p.returncode
    def exec_container(
        self,
        document: Document,
        command: List[str],
        extra_args: List[str] = [],
        stdout_callback: Optional[Callable] = None,
    ) -> int:
        container_runtime = self.get_runtime()
        if self.get_runtime_name() == "podman":
            platform_args = []
            security_args = ["--security-opt", "no-new-privileges"]
            security_args += ["--userns", "keep-id"]
        else:
            platform_args = ["--platform", "linux/amd64"]
            security_args = ["--security-opt=no-new-privileges:true"]
        # drop all linux kernel capabilities
        security_args += ["--cap-drop", "all"]
        user_args = ["-u", "dangerzone"]
        prevent_leakage_args = ["--rm"]
        args = (
            ["run", "--network", "none"]
            + platform_args
            + user_args
            + security_args
            + prevent_leakage_args
            + extra_args
            + [self.CONTAINER_NAME]
            + command
        )
        args = [container_runtime] + args
        return self.exec(document, args, stdout_callback)
    def convert(
        self,
        document: Document,
        ocr_lang: Optional[str],
        stdout_callback: Optional[Callable] = None,
    ) -> bool:
        success = False
        document.mark_as_converting()
        if ocr_lang:
            ocr = "1"
        else:
            ocr = "0"
        dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
        os.makedirs(dz_tmp, exist_ok=True)
        tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
        pixel_dir = os.path.join(tmpdir.name, "pixels")
        safe_dir = os.path.join(tmpdir.name, "safe")
        os.makedirs(pixel_dir, exist_ok=True)
        os.makedirs(safe_dir, exist_ok=True)
        # Convert document to pixels
        command = [
            "/usr/bin/python3",
            "/usr/local/bin/dangerzone.py",
            "document-to-pixels",
        ]
        extra_args = [
            "-v",
            f"{document.input_filename}:/tmp/input_file",
            "-v",
            f"{pixel_dir}:/dangerzone",
        ]
        ret = self.exec_container(document, command, extra_args, stdout_callback)
        if ret != 0:
            log.error("documents-to-pixels failed")
        else:
            # TODO: validate convert to pixels output
            # Convert pixels to safe PDF
            command = [
                "/usr/bin/python3",
                "/usr/local/bin/dangerzone.py",
                "pixels-to-pdf",
            ]
            extra_args = [
                "-v",
                f"{pixel_dir}:/dangerzone",
                "-v",
                f"{safe_dir}:/safezone",
                "-e",
                f"OCR={ocr}",
                "-e",
                f"OCR_LANGUAGE={ocr_lang}",
            ]
            ret = self.exec_container(document, command, extra_args, stdout_callback)
            if ret != 0:
                log.error("pixels-to-pdf failed")
            else:
                # Move the final file to the right place
                if os.path.exists(document.output_filename):
                    os.remove(document.output_filename)
                container_output_filename = os.path.join(
                    safe_dir, "safe-output-compressed.pdf"
                )
                shutil.move(container_output_filename, document.output_filename)
                if document.archive_after_conversion:
                    document.archive()
                # We did it
                success = True
        # Clean up
        tmpdir.cleanup()
        return success
    def get_max_parallel_conversions(self) -> int:
        # FIXME hardcoded 1 until timeouts are more limited and better handled
        # https://github.com/freedomofpress/dangerzone/issues/257
        return 1
        n_cpu = 1  # type: ignore [unreachable]
        if platform.system() == "Linux":
            # if on linux containers run natively
            cpu_count = os.cpu_count()
            if cpu_count is not None:
                n_cpu = cpu_count
        elif self.get_runtime_name() == "docker":
            # For Windows and MacOS containers run in VM
            # So we obtain the CPU count for the VM
            n_cpu_str = subprocess.check_output(
                [self.get_runtime(), "info", "--format", "{{.NCPU}}"],
                text=True,
                startupinfo=get_subprocess_startupinfo(),
            )
            n_cpu = int(n_cpu_str.strip())
        return 2 * n_cpu + 1
 # From global_common:
--- a/dangerzone/logic.py
+++ b/dangerzone/logic.py
@ -41,6 +41,8 @@ class DangerzoneCore(object):
        self.documents: List[Document] = []
        self.isolation_provider = isolation_provider.Container()
    def add_document_from_filename(
        self,
        input_filename: str,
@ -59,13 +61,13 @@ class DangerzoneCore(object):
        self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
    ) -> None:
        def convert_doc(document: Document) -> None:
-            success = isolation_provider.convert(
+            success = self.isolation_provider.convert(
                document,
                ocr_lang,
                stdout_callback,
            )
-        max_jobs = isolation_provider.get_max_parallel_conversions()
+        max_jobs = self.isolation_provider.get_max_parallel_conversions()
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
            executor.map(convert_doc, self.documents)