diff --git a/dangerzone/cli.py b/dangerzone/cli.py index 19f3b8b..c451856 100644 --- a/dangerzone/cli.py +++ b/dangerzone/cli.py @@ -73,7 +73,7 @@ def cli_main( exit(1) # Ensure container is installed - isolation_provider.install() + dangerzone.isolation_provider.install() # Convert the document print_header("Converting document to safe PDF") diff --git a/dangerzone/gui/main_window.py b/dangerzone/gui/main_window.py index 8b25567..f014fe5 100644 --- a/dangerzone/gui/main_window.py +++ b/dangerzone/gui/main_window.py @@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow): class InstallContainerThread(QtCore.QThread): finished = QtCore.Signal() - def __init__(self) -> None: + def __init__(self, dangerzone: DangerzoneGui) -> None: super(InstallContainerThread, self).__init__() + self.dangerzone = dangerzone def run(self) -> None: - isolation_provider.install() + self.dangerzone.isolation_provider.install() self.finished.emit() @@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget): state: Optional[str] = None try: - container_runtime = isolation_provider.get_runtime() + container_runtime = self.dangerzone.isolation_provider.get_runtime() except isolation_provider.NoContainerTechException as e: log.error(str(e)) state = "not_installed" @@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget): "Installing the Dangerzone container image.

This might take a few minutes..." ) self.buttons.hide() - self.install_container_t = InstallContainerThread() + self.install_container_t = InstallContainerThread(self.dangerzone) self.install_container_t.finished.connect(self.finished) self.install_container_t.start() @@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject): finished = QtCore.Signal(bool) update = QtCore.Signal(bool, str, int) - def __init__(self, document: Document, ocr_lang: str = None) -> None: + def __init__( + self, + dangerzone: DangerzoneGui, + document: Document, + ocr_lang: str = None, + ) -> None: super(ConvertTask, self).__init__() self.document = document self.ocr_lang = ocr_lang self.error = False + self.dangerzone = dangerzone def convert_document(self) -> None: - isolation_provider.convert( + self.dangerzone.isolation_provider.convert( self.document, self.ocr_lang, self.stdout_callback, @@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget): def start_conversion(self) -> None: if not self.thread_pool_initized: - max_jobs = isolation_provider.get_max_parallel_conversions() + max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions() self.thread_pool = ThreadPool(max_jobs) for doc_widget in self.document_widgets: - task = ConvertTask(doc_widget.document, self.get_ocr_lang()) + task = ConvertTask( + self.dangerzone, doc_widget.document, self.get_ocr_lang() + ) task.update.connect(doc_widget.update_progress) task.finished.connect(doc_widget.all_done) self.thread_pool.apply_async(task.convert_document) diff --git a/dangerzone/isolation_provider.py b/dangerzone/isolation_provider.py index 8eaf7fa..22fc74b 100644 --- a/dangerzone/isolation_provider.py +++ b/dangerzone/isolation_provider.py @@ -7,6 +7,7 @@ import platform import shutil import subprocess import tempfile +from abc import ABC, abstractmethod from typing import Callable, List, Optional, Tuple import appdirs @@ -15,8 +16,6 @@ from colorama import Fore, Style from .document import Document from .util import get_resource_path, get_subprocess_startupinfo -container_name = "dangerzone.rocks/dangerzone" - # Define startupinfo for subprocesses if platform.system() == "Windows": startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined] @@ -26,298 +25,329 @@ else: log = logging.getLogger(__name__) -# Name of the dangerzone container -container_name = "dangerzone.rocks/dangerzone" - class NoContainerTechException(Exception): def __init__(self, container_tech: str) -> None: super().__init__(f"{container_tech} is not installed") -def get_runtime_name() -> str: - if platform.system() == "Linux": - runtime_name = "podman" - else: - # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually - runtime_name = "docker" - return runtime_name - - -def get_runtime() -> str: - container_tech = get_runtime_name() - runtime = shutil.which(container_tech) - if runtime is None: - raise NoContainerTechException(container_tech) - return runtime - - -def install() -> bool: +class AbstractIsolationProvider(ABC): """ - Make sure the podman container is installed. Linux only. + Abstracts an isolation provider """ - if is_container_installed(): + + @abstractmethod + def install(self) -> bool: + pass + + @abstractmethod + def convert( + self, + document: Document, + ocr_lang: Optional[str], + stdout_callback: Optional[Callable] = None, + ) -> bool: + pass + + @abstractmethod + def get_max_parallel_conversions(self) -> int: + pass + + +class Container(AbstractIsolationProvider): + + # Name of the dangerzone container + CONTAINER_NAME = "dangerzone.rocks/dangerzone" + + def __init__(self) -> None: + pass + + def get_runtime_name(self) -> str: + if platform.system() == "Linux": + runtime_name = "podman" + else: + # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually + runtime_name = "docker" + return runtime_name + + def get_runtime(self) -> str: + container_tech = self.get_runtime_name() + runtime = shutil.which(container_tech) + if runtime is None: + raise NoContainerTechException(container_tech) + return runtime + + def install(self) -> bool: + """ + Make sure the podman container is installed. Linux only. + """ + if self.is_container_installed(): + return True + + # Load the container into podman + log.info("Installing Dangerzone container image...") + + p = subprocess.Popen( + [self.get_runtime(), "load"], + stdin=subprocess.PIPE, + startupinfo=get_subprocess_startupinfo(), + ) + + chunk_size = 10240 + compressed_container_path = get_resource_path("container.tar.gz") + with gzip.open(compressed_container_path) as f: + while True: + chunk = f.read(chunk_size) + if len(chunk) > 0: + if p.stdin: + p.stdin.write(chunk) + else: + break + p.communicate() + + if not self.is_container_installed(): + log.error("Failed to install the container image") + return False + + log.info("Container image installed") return True - # Load the container into podman - log.info("Installing Dangerzone container image...") + def is_container_installed(self) -> bool: + """ + See if the podman container is installed. Linux only. + """ + # Get the image id + with open(get_resource_path("image-id.txt")) as f: + expected_image_id = f.read().strip() - p = subprocess.Popen( - [get_runtime(), "load"], - stdin=subprocess.PIPE, - startupinfo=get_subprocess_startupinfo(), - ) - - chunk_size = 10240 - compressed_container_path = get_resource_path("container.tar.gz") - with gzip.open(compressed_container_path) as f: - while True: - chunk = f.read(chunk_size) - if len(chunk) > 0: - if p.stdin: - p.stdin.write(chunk) - else: - break - p.communicate() - - if not is_container_installed(): - log.error("Failed to install the container image") - return False - - log.info("Container image installed") - return True - - -def is_container_installed() -> bool: - """ - See if the podman container is installed. Linux only. - """ - # Get the image id - with open(get_resource_path("image-id.txt")) as f: - expected_image_id = f.read().strip() - - # See if this image is already installed - installed = False - found_image_id = subprocess.check_output( - [ - get_runtime(), - "image", - "list", - "--format", - "{{.ID}}", - container_name, - ], - text=True, - startupinfo=get_subprocess_startupinfo(), - ) - found_image_id = found_image_id.strip() - - if found_image_id == expected_image_id: - installed = True - elif found_image_id == "": - pass - else: - log.info("Deleting old dangerzone container image") - - try: - subprocess.check_output( - [get_runtime(), "rmi", "--force", found_image_id], - startupinfo=get_subprocess_startupinfo(), - ) - except: - log.warning("Couldn't delete old container image, so leaving it there") - - return installed - - -def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]: - """ - Parses a line returned by the container. - """ - try: - status = json.loads(line) - except: - error_message = f"Invalid JSON returned from container:\n\n\t {line}" - log.error(error_message) - return (True, error_message, -1) - - s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] " - s += Fore.CYAN + f"{status['percentage']}% " - if status["error"]: - s += Style.RESET_ALL + Fore.RED + status["text"] - log.error(s) - else: - s += Style.RESET_ALL + status["text"] - log.info(s) - - return (status["error"], status["text"], status["percentage"]) - - -def exec( - document: Document, - args: List[str], - stdout_callback: Optional[Callable] = None, -) -> int: - args_str = " ".join(pipes.quote(s) for s in args) - log.info("> " + args_str) - - with subprocess.Popen( - args, - stdin=None, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - bufsize=1, - universal_newlines=True, - startupinfo=startupinfo, - ) as p: - if p.stdout is not None: - for line in p.stdout: - (error, text, percentage) = parse_progress(document, line) - if error: - document.mark_as_failed() - if percentage == 100.0: - document.mark_as_safe() - if stdout_callback: - stdout_callback(error, text, percentage) - - p.communicate() - return p.returncode - - -def exec_container( - document: Document, - command: List[str], - extra_args: List[str] = [], - stdout_callback: Optional[Callable] = None, -) -> int: - container_runtime = get_runtime() - - if get_runtime_name() == "podman": - platform_args = [] - security_args = ["--security-opt", "no-new-privileges"] - security_args += ["--userns", "keep-id"] - else: - platform_args = ["--platform", "linux/amd64"] - security_args = ["--security-opt=no-new-privileges:true"] - - # drop all linux kernel capabilities - security_args += ["--cap-drop", "all"] - user_args = ["-u", "dangerzone"] - - prevent_leakage_args = ["--rm"] - - args = ( - ["run", "--network", "none"] - + platform_args - + user_args - + security_args - + prevent_leakage_args - + extra_args - + [container_name] - + command - ) - - args = [container_runtime] + args - return exec(document, args, stdout_callback) - - -def convert( - document: Document, - ocr_lang: Optional[str], - stdout_callback: Optional[Callable] = None, -) -> bool: - success = False - document.mark_as_converting() - - if ocr_lang: - ocr = "1" - else: - ocr = "0" - - dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp") - os.makedirs(dz_tmp, exist_ok=True) - - tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp) - pixel_dir = os.path.join(tmpdir.name, "pixels") - safe_dir = os.path.join(tmpdir.name, "safe") - os.makedirs(pixel_dir, exist_ok=True) - os.makedirs(safe_dir, exist_ok=True) - - # Convert document to pixels - command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"] - extra_args = [ - "-v", - f"{document.input_filename}:/tmp/input_file", - "-v", - f"{pixel_dir}:/dangerzone", - ] - ret = exec_container(document, command, extra_args, stdout_callback) - if ret != 0: - log.error("documents-to-pixels failed") - else: - # TODO: validate convert to pixels output - - # Convert pixels to safe PDF - command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"] - extra_args = [ - "-v", - f"{pixel_dir}:/dangerzone", - "-v", - f"{safe_dir}:/safezone", - "-e", - f"OCR={ocr}", - "-e", - f"OCR_LANGUAGE={ocr_lang}", - ] - ret = exec_container(document, command, extra_args, stdout_callback) - if ret != 0: - log.error("pixels-to-pdf failed") - else: - # Move the final file to the right place - if os.path.exists(document.output_filename): - os.remove(document.output_filename) - - container_output_filename = os.path.join( - safe_dir, "safe-output-compressed.pdf" - ) - shutil.move(container_output_filename, document.output_filename) - - if document.archive_after_conversion: - document.archive() - - # We did it - success = True - - # Clean up - tmpdir.cleanup() - - return success - - -def get_max_parallel_conversions() -> int: - - # FIXME hardcoded 1 until timeouts are more limited and better handled - # https://github.com/freedomofpress/dangerzone/issues/257 - return 1 - - n_cpu = 1 # type: ignore [unreachable] - if platform.system() == "Linux": - # if on linux containers run natively - cpu_count = os.cpu_count() - if cpu_count is not None: - n_cpu = cpu_count - - elif get_runtime_name() == "docker": - # For Windows and MacOS containers run in VM - # So we obtain the CPU count for the VM - n_cpu_str = subprocess.check_output( - [get_runtime(), "info", "--format", "{{.NCPU}}"], + # See if this image is already installed + installed = False + found_image_id = subprocess.check_output( + [ + self.get_runtime(), + "image", + "list", + "--format", + "{{.ID}}", + self.CONTAINER_NAME, + ], text=True, startupinfo=get_subprocess_startupinfo(), ) - n_cpu = int(n_cpu_str.strip()) + found_image_id = found_image_id.strip() - return 2 * n_cpu + 1 + if found_image_id == expected_image_id: + installed = True + elif found_image_id == "": + pass + else: + log.info("Deleting old dangerzone container image") + + try: + subprocess.check_output( + [self.get_runtime(), "rmi", "--force", found_image_id], + startupinfo=get_subprocess_startupinfo(), + ) + except: + log.warning("Couldn't delete old container image, so leaving it there") + + return installed + + def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]: + """ + Parses a line returned by the container. + """ + try: + status = json.loads(line) + except: + error_message = f"Invalid JSON returned from container:\n\n\t {line}" + log.error(error_message) + return (True, error_message, -1) + + s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] " + s += Fore.CYAN + f"{status['percentage']}% " + if status["error"]: + s += Style.RESET_ALL + Fore.RED + status["text"] + log.error(s) + else: + s += Style.RESET_ALL + status["text"] + log.info(s) + + return (status["error"], status["text"], status["percentage"]) + + def exec( + self, + document: Document, + args: List[str], + stdout_callback: Optional[Callable] = None, + ) -> int: + args_str = " ".join(pipes.quote(s) for s in args) + log.info("> " + args_str) + + with subprocess.Popen( + args, + stdin=None, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + bufsize=1, + universal_newlines=True, + startupinfo=startupinfo, + ) as p: + if p.stdout is not None: + for line in p.stdout: + (error, text, percentage) = self.parse_progress(document, line) + if error: + document.mark_as_failed() + if percentage == 100.0: + document.mark_as_safe() + if stdout_callback: + stdout_callback(error, text, percentage) + + p.communicate() + return p.returncode + + def exec_container( + self, + document: Document, + command: List[str], + extra_args: List[str] = [], + stdout_callback: Optional[Callable] = None, + ) -> int: + container_runtime = self.get_runtime() + + if self.get_runtime_name() == "podman": + platform_args = [] + security_args = ["--security-opt", "no-new-privileges"] + security_args += ["--userns", "keep-id"] + else: + platform_args = ["--platform", "linux/amd64"] + security_args = ["--security-opt=no-new-privileges:true"] + + # drop all linux kernel capabilities + security_args += ["--cap-drop", "all"] + user_args = ["-u", "dangerzone"] + + prevent_leakage_args = ["--rm"] + + args = ( + ["run", "--network", "none"] + + platform_args + + user_args + + security_args + + prevent_leakage_args + + extra_args + + [self.CONTAINER_NAME] + + command + ) + + args = [container_runtime] + args + return self.exec(document, args, stdout_callback) + + def convert( + self, + document: Document, + ocr_lang: Optional[str], + stdout_callback: Optional[Callable] = None, + ) -> bool: + success = False + document.mark_as_converting() + + if ocr_lang: + ocr = "1" + else: + ocr = "0" + + dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp") + os.makedirs(dz_tmp, exist_ok=True) + + tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp) + pixel_dir = os.path.join(tmpdir.name, "pixels") + safe_dir = os.path.join(tmpdir.name, "safe") + os.makedirs(pixel_dir, exist_ok=True) + os.makedirs(safe_dir, exist_ok=True) + + # Convert document to pixels + command = [ + "/usr/bin/python3", + "/usr/local/bin/dangerzone.py", + "document-to-pixels", + ] + extra_args = [ + "-v", + f"{document.input_filename}:/tmp/input_file", + "-v", + f"{pixel_dir}:/dangerzone", + ] + ret = self.exec_container(document, command, extra_args, stdout_callback) + if ret != 0: + log.error("documents-to-pixels failed") + else: + # TODO: validate convert to pixels output + + # Convert pixels to safe PDF + command = [ + "/usr/bin/python3", + "/usr/local/bin/dangerzone.py", + "pixels-to-pdf", + ] + extra_args = [ + "-v", + f"{pixel_dir}:/dangerzone", + "-v", + f"{safe_dir}:/safezone", + "-e", + f"OCR={ocr}", + "-e", + f"OCR_LANGUAGE={ocr_lang}", + ] + ret = self.exec_container(document, command, extra_args, stdout_callback) + if ret != 0: + log.error("pixels-to-pdf failed") + else: + # Move the final file to the right place + if os.path.exists(document.output_filename): + os.remove(document.output_filename) + + container_output_filename = os.path.join( + safe_dir, "safe-output-compressed.pdf" + ) + shutil.move(container_output_filename, document.output_filename) + + if document.archive_after_conversion: + document.archive() + + # We did it + success = True + + # Clean up + tmpdir.cleanup() + + return success + + def get_max_parallel_conversions(self) -> int: + + # FIXME hardcoded 1 until timeouts are more limited and better handled + # https://github.com/freedomofpress/dangerzone/issues/257 + return 1 + + n_cpu = 1 # type: ignore [unreachable] + if platform.system() == "Linux": + # if on linux containers run natively + cpu_count = os.cpu_count() + if cpu_count is not None: + n_cpu = cpu_count + + elif self.get_runtime_name() == "docker": + # For Windows and MacOS containers run in VM + # So we obtain the CPU count for the VM + n_cpu_str = subprocess.check_output( + [self.get_runtime(), "info", "--format", "{{.NCPU}}"], + text=True, + startupinfo=get_subprocess_startupinfo(), + ) + n_cpu = int(n_cpu_str.strip()) + + return 2 * n_cpu + 1 # From global_common: diff --git a/dangerzone/logic.py b/dangerzone/logic.py index d7c08d8..5f30b56 100644 --- a/dangerzone/logic.py +++ b/dangerzone/logic.py @@ -41,6 +41,8 @@ class DangerzoneCore(object): self.documents: List[Document] = [] + self.isolation_provider = isolation_provider.Container() + def add_document_from_filename( self, input_filename: str, @@ -59,13 +61,13 @@ class DangerzoneCore(object): self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None ) -> None: def convert_doc(document: Document) -> None: - success = isolation_provider.convert( + success = self.isolation_provider.convert( document, ocr_lang, stdout_callback, ) - max_jobs = isolation_provider.get_max_parallel_conversions() + max_jobs = self.isolation_provider.get_max_parallel_conversions() with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor: executor.map(convert_doc, self.documents)