diff --git a/dangerzone/cli.py b/dangerzone/cli.py
index 19f3b8b..c451856 100644
--- a/dangerzone/cli.py
+++ b/dangerzone/cli.py
@@ -73,7 +73,7 @@ def cli_main(
exit(1)
# Ensure container is installed
- isolation_provider.install()
+ dangerzone.isolation_provider.install()
# Convert the document
print_header("Converting document to safe PDF")
diff --git a/dangerzone/gui/main_window.py b/dangerzone/gui/main_window.py
index 8b25567..f014fe5 100644
--- a/dangerzone/gui/main_window.py
+++ b/dangerzone/gui/main_window.py
@@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
class InstallContainerThread(QtCore.QThread):
finished = QtCore.Signal()
- def __init__(self) -> None:
+ def __init__(self, dangerzone: DangerzoneGui) -> None:
super(InstallContainerThread, self).__init__()
+ self.dangerzone = dangerzone
def run(self) -> None:
- isolation_provider.install()
+ self.dangerzone.isolation_provider.install()
self.finished.emit()
@@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
state: Optional[str] = None
try:
- container_runtime = isolation_provider.get_runtime()
+ container_runtime = self.dangerzone.isolation_provider.get_runtime()
except isolation_provider.NoContainerTechException as e:
log.error(str(e))
state = "not_installed"
@@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
"Installing the Dangerzone container image.
This might take a few minutes..."
)
self.buttons.hide()
- self.install_container_t = InstallContainerThread()
+ self.install_container_t = InstallContainerThread(self.dangerzone)
self.install_container_t.finished.connect(self.finished)
self.install_container_t.start()
@@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
finished = QtCore.Signal(bool)
update = QtCore.Signal(bool, str, int)
- def __init__(self, document: Document, ocr_lang: str = None) -> None:
+ def __init__(
+ self,
+ dangerzone: DangerzoneGui,
+ document: Document,
+ ocr_lang: str = None,
+ ) -> None:
super(ConvertTask, self).__init__()
self.document = document
self.ocr_lang = ocr_lang
self.error = False
+ self.dangerzone = dangerzone
def convert_document(self) -> None:
- isolation_provider.convert(
+ self.dangerzone.isolation_provider.convert(
self.document,
self.ocr_lang,
self.stdout_callback,
@@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
def start_conversion(self) -> None:
if not self.thread_pool_initized:
- max_jobs = isolation_provider.get_max_parallel_conversions()
+ max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
self.thread_pool = ThreadPool(max_jobs)
for doc_widget in self.document_widgets:
- task = ConvertTask(doc_widget.document, self.get_ocr_lang())
+ task = ConvertTask(
+ self.dangerzone, doc_widget.document, self.get_ocr_lang()
+ )
task.update.connect(doc_widget.update_progress)
task.finished.connect(doc_widget.all_done)
self.thread_pool.apply_async(task.convert_document)
diff --git a/dangerzone/isolation_provider.py b/dangerzone/isolation_provider.py
index 8eaf7fa..22fc74b 100644
--- a/dangerzone/isolation_provider.py
+++ b/dangerzone/isolation_provider.py
@@ -7,6 +7,7 @@ import platform
import shutil
import subprocess
import tempfile
+from abc import ABC, abstractmethod
from typing import Callable, List, Optional, Tuple
import appdirs
@@ -15,8 +16,6 @@ from colorama import Fore, Style
from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo
-container_name = "dangerzone.rocks/dangerzone"
-
# Define startupinfo for subprocesses
if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
@@ -26,298 +25,329 @@ else:
log = logging.getLogger(__name__)
-# Name of the dangerzone container
-container_name = "dangerzone.rocks/dangerzone"
-
class NoContainerTechException(Exception):
def __init__(self, container_tech: str) -> None:
super().__init__(f"{container_tech} is not installed")
-def get_runtime_name() -> str:
- if platform.system() == "Linux":
- runtime_name = "podman"
- else:
- # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
- runtime_name = "docker"
- return runtime_name
-
-
-def get_runtime() -> str:
- container_tech = get_runtime_name()
- runtime = shutil.which(container_tech)
- if runtime is None:
- raise NoContainerTechException(container_tech)
- return runtime
-
-
-def install() -> bool:
+class AbstractIsolationProvider(ABC):
"""
- Make sure the podman container is installed. Linux only.
+ Abstracts an isolation provider
"""
- if is_container_installed():
+
+ @abstractmethod
+ def install(self) -> bool:
+ pass
+
+ @abstractmethod
+ def convert(
+ self,
+ document: Document,
+ ocr_lang: Optional[str],
+ stdout_callback: Optional[Callable] = None,
+ ) -> bool:
+ pass
+
+ @abstractmethod
+ def get_max_parallel_conversions(self) -> int:
+ pass
+
+
+class Container(AbstractIsolationProvider):
+
+ # Name of the dangerzone container
+ CONTAINER_NAME = "dangerzone.rocks/dangerzone"
+
+ def __init__(self) -> None:
+ pass
+
+ def get_runtime_name(self) -> str:
+ if platform.system() == "Linux":
+ runtime_name = "podman"
+ else:
+ # Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
+ runtime_name = "docker"
+ return runtime_name
+
+ def get_runtime(self) -> str:
+ container_tech = self.get_runtime_name()
+ runtime = shutil.which(container_tech)
+ if runtime is None:
+ raise NoContainerTechException(container_tech)
+ return runtime
+
+ def install(self) -> bool:
+ """
+ Make sure the podman container is installed. Linux only.
+ """
+ if self.is_container_installed():
+ return True
+
+ # Load the container into podman
+ log.info("Installing Dangerzone container image...")
+
+ p = subprocess.Popen(
+ [self.get_runtime(), "load"],
+ stdin=subprocess.PIPE,
+ startupinfo=get_subprocess_startupinfo(),
+ )
+
+ chunk_size = 10240
+ compressed_container_path = get_resource_path("container.tar.gz")
+ with gzip.open(compressed_container_path) as f:
+ while True:
+ chunk = f.read(chunk_size)
+ if len(chunk) > 0:
+ if p.stdin:
+ p.stdin.write(chunk)
+ else:
+ break
+ p.communicate()
+
+ if not self.is_container_installed():
+ log.error("Failed to install the container image")
+ return False
+
+ log.info("Container image installed")
return True
- # Load the container into podman
- log.info("Installing Dangerzone container image...")
+ def is_container_installed(self) -> bool:
+ """
+ See if the podman container is installed. Linux only.
+ """
+ # Get the image id
+ with open(get_resource_path("image-id.txt")) as f:
+ expected_image_id = f.read().strip()
- p = subprocess.Popen(
- [get_runtime(), "load"],
- stdin=subprocess.PIPE,
- startupinfo=get_subprocess_startupinfo(),
- )
-
- chunk_size = 10240
- compressed_container_path = get_resource_path("container.tar.gz")
- with gzip.open(compressed_container_path) as f:
- while True:
- chunk = f.read(chunk_size)
- if len(chunk) > 0:
- if p.stdin:
- p.stdin.write(chunk)
- else:
- break
- p.communicate()
-
- if not is_container_installed():
- log.error("Failed to install the container image")
- return False
-
- log.info("Container image installed")
- return True
-
-
-def is_container_installed() -> bool:
- """
- See if the podman container is installed. Linux only.
- """
- # Get the image id
- with open(get_resource_path("image-id.txt")) as f:
- expected_image_id = f.read().strip()
-
- # See if this image is already installed
- installed = False
- found_image_id = subprocess.check_output(
- [
- get_runtime(),
- "image",
- "list",
- "--format",
- "{{.ID}}",
- container_name,
- ],
- text=True,
- startupinfo=get_subprocess_startupinfo(),
- )
- found_image_id = found_image_id.strip()
-
- if found_image_id == expected_image_id:
- installed = True
- elif found_image_id == "":
- pass
- else:
- log.info("Deleting old dangerzone container image")
-
- try:
- subprocess.check_output(
- [get_runtime(), "rmi", "--force", found_image_id],
- startupinfo=get_subprocess_startupinfo(),
- )
- except:
- log.warning("Couldn't delete old container image, so leaving it there")
-
- return installed
-
-
-def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
- """
- Parses a line returned by the container.
- """
- try:
- status = json.loads(line)
- except:
- error_message = f"Invalid JSON returned from container:\n\n\t {line}"
- log.error(error_message)
- return (True, error_message, -1)
-
- s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
- s += Fore.CYAN + f"{status['percentage']}% "
- if status["error"]:
- s += Style.RESET_ALL + Fore.RED + status["text"]
- log.error(s)
- else:
- s += Style.RESET_ALL + status["text"]
- log.info(s)
-
- return (status["error"], status["text"], status["percentage"])
-
-
-def exec(
- document: Document,
- args: List[str],
- stdout_callback: Optional[Callable] = None,
-) -> int:
- args_str = " ".join(pipes.quote(s) for s in args)
- log.info("> " + args_str)
-
- with subprocess.Popen(
- args,
- stdin=None,
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- bufsize=1,
- universal_newlines=True,
- startupinfo=startupinfo,
- ) as p:
- if p.stdout is not None:
- for line in p.stdout:
- (error, text, percentage) = parse_progress(document, line)
- if error:
- document.mark_as_failed()
- if percentage == 100.0:
- document.mark_as_safe()
- if stdout_callback:
- stdout_callback(error, text, percentage)
-
- p.communicate()
- return p.returncode
-
-
-def exec_container(
- document: Document,
- command: List[str],
- extra_args: List[str] = [],
- stdout_callback: Optional[Callable] = None,
-) -> int:
- container_runtime = get_runtime()
-
- if get_runtime_name() == "podman":
- platform_args = []
- security_args = ["--security-opt", "no-new-privileges"]
- security_args += ["--userns", "keep-id"]
- else:
- platform_args = ["--platform", "linux/amd64"]
- security_args = ["--security-opt=no-new-privileges:true"]
-
- # drop all linux kernel capabilities
- security_args += ["--cap-drop", "all"]
- user_args = ["-u", "dangerzone"]
-
- prevent_leakage_args = ["--rm"]
-
- args = (
- ["run", "--network", "none"]
- + platform_args
- + user_args
- + security_args
- + prevent_leakage_args
- + extra_args
- + [container_name]
- + command
- )
-
- args = [container_runtime] + args
- return exec(document, args, stdout_callback)
-
-
-def convert(
- document: Document,
- ocr_lang: Optional[str],
- stdout_callback: Optional[Callable] = None,
-) -> bool:
- success = False
- document.mark_as_converting()
-
- if ocr_lang:
- ocr = "1"
- else:
- ocr = "0"
-
- dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
- os.makedirs(dz_tmp, exist_ok=True)
-
- tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
- pixel_dir = os.path.join(tmpdir.name, "pixels")
- safe_dir = os.path.join(tmpdir.name, "safe")
- os.makedirs(pixel_dir, exist_ok=True)
- os.makedirs(safe_dir, exist_ok=True)
-
- # Convert document to pixels
- command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
- extra_args = [
- "-v",
- f"{document.input_filename}:/tmp/input_file",
- "-v",
- f"{pixel_dir}:/dangerzone",
- ]
- ret = exec_container(document, command, extra_args, stdout_callback)
- if ret != 0:
- log.error("documents-to-pixels failed")
- else:
- # TODO: validate convert to pixels output
-
- # Convert pixels to safe PDF
- command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
- extra_args = [
- "-v",
- f"{pixel_dir}:/dangerzone",
- "-v",
- f"{safe_dir}:/safezone",
- "-e",
- f"OCR={ocr}",
- "-e",
- f"OCR_LANGUAGE={ocr_lang}",
- ]
- ret = exec_container(document, command, extra_args, stdout_callback)
- if ret != 0:
- log.error("pixels-to-pdf failed")
- else:
- # Move the final file to the right place
- if os.path.exists(document.output_filename):
- os.remove(document.output_filename)
-
- container_output_filename = os.path.join(
- safe_dir, "safe-output-compressed.pdf"
- )
- shutil.move(container_output_filename, document.output_filename)
-
- if document.archive_after_conversion:
- document.archive()
-
- # We did it
- success = True
-
- # Clean up
- tmpdir.cleanup()
-
- return success
-
-
-def get_max_parallel_conversions() -> int:
-
- # FIXME hardcoded 1 until timeouts are more limited and better handled
- # https://github.com/freedomofpress/dangerzone/issues/257
- return 1
-
- n_cpu = 1 # type: ignore [unreachable]
- if platform.system() == "Linux":
- # if on linux containers run natively
- cpu_count = os.cpu_count()
- if cpu_count is not None:
- n_cpu = cpu_count
-
- elif get_runtime_name() == "docker":
- # For Windows and MacOS containers run in VM
- # So we obtain the CPU count for the VM
- n_cpu_str = subprocess.check_output(
- [get_runtime(), "info", "--format", "{{.NCPU}}"],
+ # See if this image is already installed
+ installed = False
+ found_image_id = subprocess.check_output(
+ [
+ self.get_runtime(),
+ "image",
+ "list",
+ "--format",
+ "{{.ID}}",
+ self.CONTAINER_NAME,
+ ],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
- n_cpu = int(n_cpu_str.strip())
+ found_image_id = found_image_id.strip()
- return 2 * n_cpu + 1
+ if found_image_id == expected_image_id:
+ installed = True
+ elif found_image_id == "":
+ pass
+ else:
+ log.info("Deleting old dangerzone container image")
+
+ try:
+ subprocess.check_output(
+ [self.get_runtime(), "rmi", "--force", found_image_id],
+ startupinfo=get_subprocess_startupinfo(),
+ )
+ except:
+ log.warning("Couldn't delete old container image, so leaving it there")
+
+ return installed
+
+ def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
+ """
+ Parses a line returned by the container.
+ """
+ try:
+ status = json.loads(line)
+ except:
+ error_message = f"Invalid JSON returned from container:\n\n\t {line}"
+ log.error(error_message)
+ return (True, error_message, -1)
+
+ s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
+ s += Fore.CYAN + f"{status['percentage']}% "
+ if status["error"]:
+ s += Style.RESET_ALL + Fore.RED + status["text"]
+ log.error(s)
+ else:
+ s += Style.RESET_ALL + status["text"]
+ log.info(s)
+
+ return (status["error"], status["text"], status["percentage"])
+
+ def exec(
+ self,
+ document: Document,
+ args: List[str],
+ stdout_callback: Optional[Callable] = None,
+ ) -> int:
+ args_str = " ".join(pipes.quote(s) for s in args)
+ log.info("> " + args_str)
+
+ with subprocess.Popen(
+ args,
+ stdin=None,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ bufsize=1,
+ universal_newlines=True,
+ startupinfo=startupinfo,
+ ) as p:
+ if p.stdout is not None:
+ for line in p.stdout:
+ (error, text, percentage) = self.parse_progress(document, line)
+ if error:
+ document.mark_as_failed()
+ if percentage == 100.0:
+ document.mark_as_safe()
+ if stdout_callback:
+ stdout_callback(error, text, percentage)
+
+ p.communicate()
+ return p.returncode
+
+ def exec_container(
+ self,
+ document: Document,
+ command: List[str],
+ extra_args: List[str] = [],
+ stdout_callback: Optional[Callable] = None,
+ ) -> int:
+ container_runtime = self.get_runtime()
+
+ if self.get_runtime_name() == "podman":
+ platform_args = []
+ security_args = ["--security-opt", "no-new-privileges"]
+ security_args += ["--userns", "keep-id"]
+ else:
+ platform_args = ["--platform", "linux/amd64"]
+ security_args = ["--security-opt=no-new-privileges:true"]
+
+ # drop all linux kernel capabilities
+ security_args += ["--cap-drop", "all"]
+ user_args = ["-u", "dangerzone"]
+
+ prevent_leakage_args = ["--rm"]
+
+ args = (
+ ["run", "--network", "none"]
+ + platform_args
+ + user_args
+ + security_args
+ + prevent_leakage_args
+ + extra_args
+ + [self.CONTAINER_NAME]
+ + command
+ )
+
+ args = [container_runtime] + args
+ return self.exec(document, args, stdout_callback)
+
+ def convert(
+ self,
+ document: Document,
+ ocr_lang: Optional[str],
+ stdout_callback: Optional[Callable] = None,
+ ) -> bool:
+ success = False
+ document.mark_as_converting()
+
+ if ocr_lang:
+ ocr = "1"
+ else:
+ ocr = "0"
+
+ dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
+ os.makedirs(dz_tmp, exist_ok=True)
+
+ tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
+ pixel_dir = os.path.join(tmpdir.name, "pixels")
+ safe_dir = os.path.join(tmpdir.name, "safe")
+ os.makedirs(pixel_dir, exist_ok=True)
+ os.makedirs(safe_dir, exist_ok=True)
+
+ # Convert document to pixels
+ command = [
+ "/usr/bin/python3",
+ "/usr/local/bin/dangerzone.py",
+ "document-to-pixels",
+ ]
+ extra_args = [
+ "-v",
+ f"{document.input_filename}:/tmp/input_file",
+ "-v",
+ f"{pixel_dir}:/dangerzone",
+ ]
+ ret = self.exec_container(document, command, extra_args, stdout_callback)
+ if ret != 0:
+ log.error("documents-to-pixels failed")
+ else:
+ # TODO: validate convert to pixels output
+
+ # Convert pixels to safe PDF
+ command = [
+ "/usr/bin/python3",
+ "/usr/local/bin/dangerzone.py",
+ "pixels-to-pdf",
+ ]
+ extra_args = [
+ "-v",
+ f"{pixel_dir}:/dangerzone",
+ "-v",
+ f"{safe_dir}:/safezone",
+ "-e",
+ f"OCR={ocr}",
+ "-e",
+ f"OCR_LANGUAGE={ocr_lang}",
+ ]
+ ret = self.exec_container(document, command, extra_args, stdout_callback)
+ if ret != 0:
+ log.error("pixels-to-pdf failed")
+ else:
+ # Move the final file to the right place
+ if os.path.exists(document.output_filename):
+ os.remove(document.output_filename)
+
+ container_output_filename = os.path.join(
+ safe_dir, "safe-output-compressed.pdf"
+ )
+ shutil.move(container_output_filename, document.output_filename)
+
+ if document.archive_after_conversion:
+ document.archive()
+
+ # We did it
+ success = True
+
+ # Clean up
+ tmpdir.cleanup()
+
+ return success
+
+ def get_max_parallel_conversions(self) -> int:
+
+ # FIXME hardcoded 1 until timeouts are more limited and better handled
+ # https://github.com/freedomofpress/dangerzone/issues/257
+ return 1
+
+ n_cpu = 1 # type: ignore [unreachable]
+ if platform.system() == "Linux":
+ # if on linux containers run natively
+ cpu_count = os.cpu_count()
+ if cpu_count is not None:
+ n_cpu = cpu_count
+
+ elif self.get_runtime_name() == "docker":
+ # For Windows and MacOS containers run in VM
+ # So we obtain the CPU count for the VM
+ n_cpu_str = subprocess.check_output(
+ [self.get_runtime(), "info", "--format", "{{.NCPU}}"],
+ text=True,
+ startupinfo=get_subprocess_startupinfo(),
+ )
+ n_cpu = int(n_cpu_str.strip())
+
+ return 2 * n_cpu + 1
# From global_common:
diff --git a/dangerzone/logic.py b/dangerzone/logic.py
index d7c08d8..5f30b56 100644
--- a/dangerzone/logic.py
+++ b/dangerzone/logic.py
@@ -41,6 +41,8 @@ class DangerzoneCore(object):
self.documents: List[Document] = []
+ self.isolation_provider = isolation_provider.Container()
+
def add_document_from_filename(
self,
input_filename: str,
@@ -59,13 +61,13 @@ class DangerzoneCore(object):
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None:
def convert_doc(document: Document) -> None:
- success = isolation_provider.convert(
+ success = self.isolation_provider.convert(
document,
ocr_lang,
stdout_callback,
)
- max_jobs = isolation_provider.get_max_parallel_conversions()
+ max_jobs = self.isolation_provider.get_max_parallel_conversions()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
executor.map(convert_doc, self.documents)