Abstract container into an IsolationProvider

Encapsulate container logic into an implementation of
AbstractIsolationProvider. This flexibility will allow for other types
of isolation managers, such as a Dummy one.
This commit is contained in:
deeplow 2022-12-27 13:57:19 +00:00
parent 1114a0dfa1
commit a4f27afdc6
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 331 additions and 290 deletions

View file

@ -73,7 +73,7 @@ def cli_main(
exit(1) exit(1)
# Ensure container is installed # Ensure container is installed
isolation_provider.install() dangerzone.isolation_provider.install()
# Convert the document # Convert the document
print_header("Converting document to safe PDF") print_header("Converting document to safe PDF")

View file

@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
class InstallContainerThread(QtCore.QThread): class InstallContainerThread(QtCore.QThread):
finished = QtCore.Signal() finished = QtCore.Signal()
def __init__(self) -> None: def __init__(self, dangerzone: DangerzoneGui) -> None:
super(InstallContainerThread, self).__init__() super(InstallContainerThread, self).__init__()
self.dangerzone = dangerzone
def run(self) -> None: def run(self) -> None:
isolation_provider.install() self.dangerzone.isolation_provider.install()
self.finished.emit() self.finished.emit()
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
state: Optional[str] = None state: Optional[str] = None
try: try:
container_runtime = isolation_provider.get_runtime() container_runtime = self.dangerzone.isolation_provider.get_runtime()
except isolation_provider.NoContainerTechException as e: except isolation_provider.NoContainerTechException as e:
log.error(str(e)) log.error(str(e))
state = "not_installed" state = "not_installed"
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
"Installing the Dangerzone container image.<br><br>This might take a few minutes..." "Installing the Dangerzone container image.<br><br>This might take a few minutes..."
) )
self.buttons.hide() self.buttons.hide()
self.install_container_t = InstallContainerThread() self.install_container_t = InstallContainerThread(self.dangerzone)
self.install_container_t.finished.connect(self.finished) self.install_container_t.finished.connect(self.finished)
self.install_container_t.start() self.install_container_t.start()
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
finished = QtCore.Signal(bool) finished = QtCore.Signal(bool)
update = QtCore.Signal(bool, str, int) update = QtCore.Signal(bool, str, int)
def __init__(self, document: Document, ocr_lang: str = None) -> None: def __init__(
self,
dangerzone: DangerzoneGui,
document: Document,
ocr_lang: str = None,
) -> None:
super(ConvertTask, self).__init__() super(ConvertTask, self).__init__()
self.document = document self.document = document
self.ocr_lang = ocr_lang self.ocr_lang = ocr_lang
self.error = False self.error = False
self.dangerzone = dangerzone
def convert_document(self) -> None: def convert_document(self) -> None:
isolation_provider.convert( self.dangerzone.isolation_provider.convert(
self.document, self.document,
self.ocr_lang, self.ocr_lang,
self.stdout_callback, self.stdout_callback,
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
def start_conversion(self) -> None: def start_conversion(self) -> None:
if not self.thread_pool_initized: if not self.thread_pool_initized:
max_jobs = isolation_provider.get_max_parallel_conversions() max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
self.thread_pool = ThreadPool(max_jobs) self.thread_pool = ThreadPool(max_jobs)
for doc_widget in self.document_widgets: for doc_widget in self.document_widgets:
task = ConvertTask(doc_widget.document, self.get_ocr_lang()) task = ConvertTask(
self.dangerzone, doc_widget.document, self.get_ocr_lang()
)
task.update.connect(doc_widget.update_progress) task.update.connect(doc_widget.update_progress)
task.finished.connect(doc_widget.all_done) task.finished.connect(doc_widget.all_done)
self.thread_pool.apply_async(task.convert_document) self.thread_pool.apply_async(task.convert_document)

View file

@ -7,6 +7,7 @@ import platform
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from abc import ABC, abstractmethod
from typing import Callable, List, Optional, Tuple from typing import Callable, List, Optional, Tuple
import appdirs import appdirs
@ -15,8 +16,6 @@ from colorama import Fore, Style
from .document import Document from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo from .util import get_resource_path, get_subprocess_startupinfo
container_name = "dangerzone.rocks/dangerzone"
# Define startupinfo for subprocesses # Define startupinfo for subprocesses
if platform.system() == "Windows": if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined] startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
@ -26,16 +25,44 @@ else:
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Name of the dangerzone container
container_name = "dangerzone.rocks/dangerzone"
class NoContainerTechException(Exception): class NoContainerTechException(Exception):
def __init__(self, container_tech: str) -> None: def __init__(self, container_tech: str) -> None:
super().__init__(f"{container_tech} is not installed") super().__init__(f"{container_tech} is not installed")
def get_runtime_name() -> str: class AbstractIsolationProvider(ABC):
"""
Abstracts an isolation provider
"""
@abstractmethod
def install(self) -> bool:
pass
@abstractmethod
def convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
pass
@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass
class Container(AbstractIsolationProvider):
# Name of the dangerzone container
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
def __init__(self) -> None:
pass
def get_runtime_name(self) -> str:
if platform.system() == "Linux": if platform.system() == "Linux":
runtime_name = "podman" runtime_name = "podman"
else: else:
@ -43,27 +70,25 @@ def get_runtime_name() -> str:
runtime_name = "docker" runtime_name = "docker"
return runtime_name return runtime_name
def get_runtime(self) -> str:
def get_runtime() -> str: container_tech = self.get_runtime_name()
container_tech = get_runtime_name()
runtime = shutil.which(container_tech) runtime = shutil.which(container_tech)
if runtime is None: if runtime is None:
raise NoContainerTechException(container_tech) raise NoContainerTechException(container_tech)
return runtime return runtime
def install(self) -> bool:
def install() -> bool:
""" """
Make sure the podman container is installed. Linux only. Make sure the podman container is installed. Linux only.
""" """
if is_container_installed(): if self.is_container_installed():
return True return True
# Load the container into podman # Load the container into podman
log.info("Installing Dangerzone container image...") log.info("Installing Dangerzone container image...")
p = subprocess.Popen( p = subprocess.Popen(
[get_runtime(), "load"], [self.get_runtime(), "load"],
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
startupinfo=get_subprocess_startupinfo(), startupinfo=get_subprocess_startupinfo(),
) )
@ -80,15 +105,14 @@ def install() -> bool:
break break
p.communicate() p.communicate()
if not is_container_installed(): if not self.is_container_installed():
log.error("Failed to install the container image") log.error("Failed to install the container image")
return False return False
log.info("Container image installed") log.info("Container image installed")
return True return True
def is_container_installed(self) -> bool:
def is_container_installed() -> bool:
""" """
See if the podman container is installed. Linux only. See if the podman container is installed. Linux only.
""" """
@ -100,12 +124,12 @@ def is_container_installed() -> bool:
installed = False installed = False
found_image_id = subprocess.check_output( found_image_id = subprocess.check_output(
[ [
get_runtime(), self.get_runtime(),
"image", "image",
"list", "list",
"--format", "--format",
"{{.ID}}", "{{.ID}}",
container_name, self.CONTAINER_NAME,
], ],
text=True, text=True,
startupinfo=get_subprocess_startupinfo(), startupinfo=get_subprocess_startupinfo(),
@ -121,7 +145,7 @@ def is_container_installed() -> bool:
try: try:
subprocess.check_output( subprocess.check_output(
[get_runtime(), "rmi", "--force", found_image_id], [self.get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(), startupinfo=get_subprocess_startupinfo(),
) )
except: except:
@ -129,8 +153,7 @@ def is_container_installed() -> bool:
return installed return installed
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
""" """
Parses a line returned by the container. Parses a line returned by the container.
""" """
@ -152,8 +175,8 @@ def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
return (status["error"], status["text"], status["percentage"]) return (status["error"], status["text"], status["percentage"])
def exec( def exec(
self,
document: Document, document: Document,
args: List[str], args: List[str],
stdout_callback: Optional[Callable] = None, stdout_callback: Optional[Callable] = None,
@ -172,7 +195,7 @@ def exec(
) as p: ) as p:
if p.stdout is not None: if p.stdout is not None:
for line in p.stdout: for line in p.stdout:
(error, text, percentage) = parse_progress(document, line) (error, text, percentage) = self.parse_progress(document, line)
if error: if error:
document.mark_as_failed() document.mark_as_failed()
if percentage == 100.0: if percentage == 100.0:
@ -183,16 +206,16 @@ def exec(
p.communicate() p.communicate()
return p.returncode return p.returncode
def exec_container( def exec_container(
self,
document: Document, document: Document,
command: List[str], command: List[str],
extra_args: List[str] = [], extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None, stdout_callback: Optional[Callable] = None,
) -> int: ) -> int:
container_runtime = get_runtime() container_runtime = self.get_runtime()
if get_runtime_name() == "podman": if self.get_runtime_name() == "podman":
platform_args = [] platform_args = []
security_args = ["--security-opt", "no-new-privileges"] security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"] security_args += ["--userns", "keep-id"]
@ -213,15 +236,15 @@ def exec_container(
+ security_args + security_args
+ prevent_leakage_args + prevent_leakage_args
+ extra_args + extra_args
+ [container_name] + [self.CONTAINER_NAME]
+ command + command
) )
args = [container_runtime] + args args = [container_runtime] + args
return exec(document, args, stdout_callback) return self.exec(document, args, stdout_callback)
def convert( def convert(
self,
document: Document, document: Document,
ocr_lang: Optional[str], ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None, stdout_callback: Optional[Callable] = None,
@ -244,21 +267,29 @@ def convert(
os.makedirs(safe_dir, exist_ok=True) os.makedirs(safe_dir, exist_ok=True)
# Convert document to pixels # Convert document to pixels
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"] command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"document-to-pixels",
]
extra_args = [ extra_args = [
"-v", "-v",
f"{document.input_filename}:/tmp/input_file", f"{document.input_filename}:/tmp/input_file",
"-v", "-v",
f"{pixel_dir}:/dangerzone", f"{pixel_dir}:/dangerzone",
] ]
ret = exec_container(document, command, extra_args, stdout_callback) ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0: if ret != 0:
log.error("documents-to-pixels failed") log.error("documents-to-pixels failed")
else: else:
# TODO: validate convert to pixels output # TODO: validate convert to pixels output
# Convert pixels to safe PDF # Convert pixels to safe PDF
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"] command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"pixels-to-pdf",
]
extra_args = [ extra_args = [
"-v", "-v",
f"{pixel_dir}:/dangerzone", f"{pixel_dir}:/dangerzone",
@ -269,7 +300,7 @@ def convert(
"-e", "-e",
f"OCR_LANGUAGE={ocr_lang}", f"OCR_LANGUAGE={ocr_lang}",
] ]
ret = exec_container(document, command, extra_args, stdout_callback) ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0: if ret != 0:
log.error("pixels-to-pdf failed") log.error("pixels-to-pdf failed")
else: else:
@ -293,8 +324,7 @@ def convert(
return success return success
def get_max_parallel_conversions(self) -> int:
def get_max_parallel_conversions() -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled # FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257 # https://github.com/freedomofpress/dangerzone/issues/257
@ -307,11 +337,11 @@ def get_max_parallel_conversions() -> int:
if cpu_count is not None: if cpu_count is not None:
n_cpu = cpu_count n_cpu = cpu_count
elif get_runtime_name() == "docker": elif self.get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM # For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM # So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output( n_cpu_str = subprocess.check_output(
[get_runtime(), "info", "--format", "{{.NCPU}}"], [self.get_runtime(), "info", "--format", "{{.NCPU}}"],
text=True, text=True,
startupinfo=get_subprocess_startupinfo(), startupinfo=get_subprocess_startupinfo(),
) )

View file

@ -41,6 +41,8 @@ class DangerzoneCore(object):
self.documents: List[Document] = [] self.documents: List[Document] = []
self.isolation_provider = isolation_provider.Container()
def add_document_from_filename( def add_document_from_filename(
self, self,
input_filename: str, input_filename: str,
@ -59,13 +61,13 @@ class DangerzoneCore(object):
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None: ) -> None:
def convert_doc(document: Document) -> None: def convert_doc(document: Document) -> None:
success = isolation_provider.convert( success = self.isolation_provider.convert(
document, document,
ocr_lang, ocr_lang,
stdout_callback, stdout_callback,
) )
max_jobs = isolation_provider.get_max_parallel_conversions() max_jobs = self.isolation_provider.get_max_parallel_conversions()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
executor.map(convert_doc, self.documents) executor.map(convert_doc, self.documents)