mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 02:12:36 +02:00
Abstract container into an IsolationProvider
Encapsulate container logic into an implementation of AbstractIsolationProvider. This flexibility will allow for other types of isolation managers, such as a Dummy one.
This commit is contained in:
parent
1114a0dfa1
commit
a4f27afdc6
4 changed files with 331 additions and 290 deletions
|
@ -73,7 +73,7 @@ def cli_main(
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Ensure container is installed
|
# Ensure container is installed
|
||||||
isolation_provider.install()
|
dangerzone.isolation_provider.install()
|
||||||
|
|
||||||
# Convert the document
|
# Convert the document
|
||||||
print_header("Converting document to safe PDF")
|
print_header("Converting document to safe PDF")
|
||||||
|
|
|
@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||||
class InstallContainerThread(QtCore.QThread):
|
class InstallContainerThread(QtCore.QThread):
|
||||||
finished = QtCore.Signal()
|
finished = QtCore.Signal()
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, dangerzone: DangerzoneGui) -> None:
|
||||||
super(InstallContainerThread, self).__init__()
|
super(InstallContainerThread, self).__init__()
|
||||||
|
self.dangerzone = dangerzone
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
isolation_provider.install()
|
self.dangerzone.isolation_provider.install()
|
||||||
self.finished.emit()
|
self.finished.emit()
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
||||||
state: Optional[str] = None
|
state: Optional[str] = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
container_runtime = isolation_provider.get_runtime()
|
container_runtime = self.dangerzone.isolation_provider.get_runtime()
|
||||||
except isolation_provider.NoContainerTechException as e:
|
except isolation_provider.NoContainerTechException as e:
|
||||||
log.error(str(e))
|
log.error(str(e))
|
||||||
state = "not_installed"
|
state = "not_installed"
|
||||||
|
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
||||||
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
|
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
|
||||||
)
|
)
|
||||||
self.buttons.hide()
|
self.buttons.hide()
|
||||||
self.install_container_t = InstallContainerThread()
|
self.install_container_t = InstallContainerThread(self.dangerzone)
|
||||||
self.install_container_t.finished.connect(self.finished)
|
self.install_container_t.finished.connect(self.finished)
|
||||||
self.install_container_t.start()
|
self.install_container_t.start()
|
||||||
|
|
||||||
|
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
|
||||||
finished = QtCore.Signal(bool)
|
finished = QtCore.Signal(bool)
|
||||||
update = QtCore.Signal(bool, str, int)
|
update = QtCore.Signal(bool, str, int)
|
||||||
|
|
||||||
def __init__(self, document: Document, ocr_lang: str = None) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
dangerzone: DangerzoneGui,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: str = None,
|
||||||
|
) -> None:
|
||||||
super(ConvertTask, self).__init__()
|
super(ConvertTask, self).__init__()
|
||||||
self.document = document
|
self.document = document
|
||||||
self.ocr_lang = ocr_lang
|
self.ocr_lang = ocr_lang
|
||||||
self.error = False
|
self.error = False
|
||||||
|
self.dangerzone = dangerzone
|
||||||
|
|
||||||
def convert_document(self) -> None:
|
def convert_document(self) -> None:
|
||||||
isolation_provider.convert(
|
self.dangerzone.isolation_provider.convert(
|
||||||
self.document,
|
self.document,
|
||||||
self.ocr_lang,
|
self.ocr_lang,
|
||||||
self.stdout_callback,
|
self.stdout_callback,
|
||||||
|
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
|
||||||
|
|
||||||
def start_conversion(self) -> None:
|
def start_conversion(self) -> None:
|
||||||
if not self.thread_pool_initized:
|
if not self.thread_pool_initized:
|
||||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
|
||||||
self.thread_pool = ThreadPool(max_jobs)
|
self.thread_pool = ThreadPool(max_jobs)
|
||||||
|
|
||||||
for doc_widget in self.document_widgets:
|
for doc_widget in self.document_widgets:
|
||||||
task = ConvertTask(doc_widget.document, self.get_ocr_lang())
|
task = ConvertTask(
|
||||||
|
self.dangerzone, doc_widget.document, self.get_ocr_lang()
|
||||||
|
)
|
||||||
task.update.connect(doc_widget.update_progress)
|
task.update.connect(doc_widget.update_progress)
|
||||||
task.finished.connect(doc_widget.all_done)
|
task.finished.connect(doc_widget.all_done)
|
||||||
self.thread_pool.apply_async(task.convert_document)
|
self.thread_pool.apply_async(task.convert_document)
|
||||||
|
|
|
@ -7,6 +7,7 @@ import platform
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from typing import Callable, List, Optional, Tuple
|
from typing import Callable, List, Optional, Tuple
|
||||||
|
|
||||||
import appdirs
|
import appdirs
|
||||||
|
@ -15,8 +16,6 @@ from colorama import Fore, Style
|
||||||
from .document import Document
|
from .document import Document
|
||||||
from .util import get_resource_path, get_subprocess_startupinfo
|
from .util import get_resource_path, get_subprocess_startupinfo
|
||||||
|
|
||||||
container_name = "dangerzone.rocks/dangerzone"
|
|
||||||
|
|
||||||
# Define startupinfo for subprocesses
|
# Define startupinfo for subprocesses
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
|
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
|
||||||
|
@ -26,16 +25,44 @@ else:
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Name of the dangerzone container
|
|
||||||
container_name = "dangerzone.rocks/dangerzone"
|
|
||||||
|
|
||||||
|
|
||||||
class NoContainerTechException(Exception):
|
class NoContainerTechException(Exception):
|
||||||
def __init__(self, container_tech: str) -> None:
|
def __init__(self, container_tech: str) -> None:
|
||||||
super().__init__(f"{container_tech} is not installed")
|
super().__init__(f"{container_tech} is not installed")
|
||||||
|
|
||||||
|
|
||||||
def get_runtime_name() -> str:
|
class AbstractIsolationProvider(ABC):
|
||||||
|
"""
|
||||||
|
Abstracts an isolation provider
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def install(self) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def convert(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: Optional[str],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_max_parallel_conversions(self) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Container(AbstractIsolationProvider):
|
||||||
|
|
||||||
|
# Name of the dangerzone container
|
||||||
|
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_runtime_name(self) -> str:
|
||||||
if platform.system() == "Linux":
|
if platform.system() == "Linux":
|
||||||
runtime_name = "podman"
|
runtime_name = "podman"
|
||||||
else:
|
else:
|
||||||
|
@ -43,27 +70,25 @@ def get_runtime_name() -> str:
|
||||||
runtime_name = "docker"
|
runtime_name = "docker"
|
||||||
return runtime_name
|
return runtime_name
|
||||||
|
|
||||||
|
def get_runtime(self) -> str:
|
||||||
def get_runtime() -> str:
|
container_tech = self.get_runtime_name()
|
||||||
container_tech = get_runtime_name()
|
|
||||||
runtime = shutil.which(container_tech)
|
runtime = shutil.which(container_tech)
|
||||||
if runtime is None:
|
if runtime is None:
|
||||||
raise NoContainerTechException(container_tech)
|
raise NoContainerTechException(container_tech)
|
||||||
return runtime
|
return runtime
|
||||||
|
|
||||||
|
def install(self) -> bool:
|
||||||
def install() -> bool:
|
|
||||||
"""
|
"""
|
||||||
Make sure the podman container is installed. Linux only.
|
Make sure the podman container is installed. Linux only.
|
||||||
"""
|
"""
|
||||||
if is_container_installed():
|
if self.is_container_installed():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Load the container into podman
|
# Load the container into podman
|
||||||
log.info("Installing Dangerzone container image...")
|
log.info("Installing Dangerzone container image...")
|
||||||
|
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
[get_runtime(), "load"],
|
[self.get_runtime(), "load"],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
)
|
)
|
||||||
|
@ -80,15 +105,14 @@ def install() -> bool:
|
||||||
break
|
break
|
||||||
p.communicate()
|
p.communicate()
|
||||||
|
|
||||||
if not is_container_installed():
|
if not self.is_container_installed():
|
||||||
log.error("Failed to install the container image")
|
log.error("Failed to install the container image")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
log.info("Container image installed")
|
log.info("Container image installed")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def is_container_installed(self) -> bool:
|
||||||
def is_container_installed() -> bool:
|
|
||||||
"""
|
"""
|
||||||
See if the podman container is installed. Linux only.
|
See if the podman container is installed. Linux only.
|
||||||
"""
|
"""
|
||||||
|
@ -100,12 +124,12 @@ def is_container_installed() -> bool:
|
||||||
installed = False
|
installed = False
|
||||||
found_image_id = subprocess.check_output(
|
found_image_id = subprocess.check_output(
|
||||||
[
|
[
|
||||||
get_runtime(),
|
self.get_runtime(),
|
||||||
"image",
|
"image",
|
||||||
"list",
|
"list",
|
||||||
"--format",
|
"--format",
|
||||||
"{{.ID}}",
|
"{{.ID}}",
|
||||||
container_name,
|
self.CONTAINER_NAME,
|
||||||
],
|
],
|
||||||
text=True,
|
text=True,
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
|
@ -121,7 +145,7 @@ def is_container_installed() -> bool:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
[get_runtime(), "rmi", "--force", found_image_id],
|
[self.get_runtime(), "rmi", "--force", found_image_id],
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
|
@ -129,8 +153,7 @@ def is_container_installed() -> bool:
|
||||||
|
|
||||||
return installed
|
return installed
|
||||||
|
|
||||||
|
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
|
||||||
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
|
|
||||||
"""
|
"""
|
||||||
Parses a line returned by the container.
|
Parses a line returned by the container.
|
||||||
"""
|
"""
|
||||||
|
@ -152,12 +175,12 @@ def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
|
||||||
|
|
||||||
return (status["error"], status["text"], status["percentage"])
|
return (status["error"], status["text"], status["percentage"])
|
||||||
|
|
||||||
|
def exec(
|
||||||
def exec(
|
self,
|
||||||
document: Document,
|
document: Document,
|
||||||
args: List[str],
|
args: List[str],
|
||||||
stdout_callback: Optional[Callable] = None,
|
stdout_callback: Optional[Callable] = None,
|
||||||
) -> int:
|
) -> int:
|
||||||
args_str = " ".join(pipes.quote(s) for s in args)
|
args_str = " ".join(pipes.quote(s) for s in args)
|
||||||
log.info("> " + args_str)
|
log.info("> " + args_str)
|
||||||
|
|
||||||
|
@ -172,7 +195,7 @@ def exec(
|
||||||
) as p:
|
) as p:
|
||||||
if p.stdout is not None:
|
if p.stdout is not None:
|
||||||
for line in p.stdout:
|
for line in p.stdout:
|
||||||
(error, text, percentage) = parse_progress(document, line)
|
(error, text, percentage) = self.parse_progress(document, line)
|
||||||
if error:
|
if error:
|
||||||
document.mark_as_failed()
|
document.mark_as_failed()
|
||||||
if percentage == 100.0:
|
if percentage == 100.0:
|
||||||
|
@ -183,16 +206,16 @@ def exec(
|
||||||
p.communicate()
|
p.communicate()
|
||||||
return p.returncode
|
return p.returncode
|
||||||
|
|
||||||
|
def exec_container(
|
||||||
def exec_container(
|
self,
|
||||||
document: Document,
|
document: Document,
|
||||||
command: List[str],
|
command: List[str],
|
||||||
extra_args: List[str] = [],
|
extra_args: List[str] = [],
|
||||||
stdout_callback: Optional[Callable] = None,
|
stdout_callback: Optional[Callable] = None,
|
||||||
) -> int:
|
) -> int:
|
||||||
container_runtime = get_runtime()
|
container_runtime = self.get_runtime()
|
||||||
|
|
||||||
if get_runtime_name() == "podman":
|
if self.get_runtime_name() == "podman":
|
||||||
platform_args = []
|
platform_args = []
|
||||||
security_args = ["--security-opt", "no-new-privileges"]
|
security_args = ["--security-opt", "no-new-privileges"]
|
||||||
security_args += ["--userns", "keep-id"]
|
security_args += ["--userns", "keep-id"]
|
||||||
|
@ -213,19 +236,19 @@ def exec_container(
|
||||||
+ security_args
|
+ security_args
|
||||||
+ prevent_leakage_args
|
+ prevent_leakage_args
|
||||||
+ extra_args
|
+ extra_args
|
||||||
+ [container_name]
|
+ [self.CONTAINER_NAME]
|
||||||
+ command
|
+ command
|
||||||
)
|
)
|
||||||
|
|
||||||
args = [container_runtime] + args
|
args = [container_runtime] + args
|
||||||
return exec(document, args, stdout_callback)
|
return self.exec(document, args, stdout_callback)
|
||||||
|
|
||||||
|
def convert(
|
||||||
def convert(
|
self,
|
||||||
document: Document,
|
document: Document,
|
||||||
ocr_lang: Optional[str],
|
ocr_lang: Optional[str],
|
||||||
stdout_callback: Optional[Callable] = None,
|
stdout_callback: Optional[Callable] = None,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
success = False
|
success = False
|
||||||
document.mark_as_converting()
|
document.mark_as_converting()
|
||||||
|
|
||||||
|
@ -244,21 +267,29 @@ def convert(
|
||||||
os.makedirs(safe_dir, exist_ok=True)
|
os.makedirs(safe_dir, exist_ok=True)
|
||||||
|
|
||||||
# Convert document to pixels
|
# Convert document to pixels
|
||||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
|
command = [
|
||||||
|
"/usr/bin/python3",
|
||||||
|
"/usr/local/bin/dangerzone.py",
|
||||||
|
"document-to-pixels",
|
||||||
|
]
|
||||||
extra_args = [
|
extra_args = [
|
||||||
"-v",
|
"-v",
|
||||||
f"{document.input_filename}:/tmp/input_file",
|
f"{document.input_filename}:/tmp/input_file",
|
||||||
"-v",
|
"-v",
|
||||||
f"{pixel_dir}:/dangerzone",
|
f"{pixel_dir}:/dangerzone",
|
||||||
]
|
]
|
||||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
log.error("documents-to-pixels failed")
|
log.error("documents-to-pixels failed")
|
||||||
else:
|
else:
|
||||||
# TODO: validate convert to pixels output
|
# TODO: validate convert to pixels output
|
||||||
|
|
||||||
# Convert pixels to safe PDF
|
# Convert pixels to safe PDF
|
||||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
|
command = [
|
||||||
|
"/usr/bin/python3",
|
||||||
|
"/usr/local/bin/dangerzone.py",
|
||||||
|
"pixels-to-pdf",
|
||||||
|
]
|
||||||
extra_args = [
|
extra_args = [
|
||||||
"-v",
|
"-v",
|
||||||
f"{pixel_dir}:/dangerzone",
|
f"{pixel_dir}:/dangerzone",
|
||||||
|
@ -269,7 +300,7 @@ def convert(
|
||||||
"-e",
|
"-e",
|
||||||
f"OCR_LANGUAGE={ocr_lang}",
|
f"OCR_LANGUAGE={ocr_lang}",
|
||||||
]
|
]
|
||||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||||
if ret != 0:
|
if ret != 0:
|
||||||
log.error("pixels-to-pdf failed")
|
log.error("pixels-to-pdf failed")
|
||||||
else:
|
else:
|
||||||
|
@ -293,8 +324,7 @@ def convert(
|
||||||
|
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
def get_max_parallel_conversions(self) -> int:
|
||||||
def get_max_parallel_conversions() -> int:
|
|
||||||
|
|
||||||
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
||||||
# https://github.com/freedomofpress/dangerzone/issues/257
|
# https://github.com/freedomofpress/dangerzone/issues/257
|
||||||
|
@ -307,11 +337,11 @@ def get_max_parallel_conversions() -> int:
|
||||||
if cpu_count is not None:
|
if cpu_count is not None:
|
||||||
n_cpu = cpu_count
|
n_cpu = cpu_count
|
||||||
|
|
||||||
elif get_runtime_name() == "docker":
|
elif self.get_runtime_name() == "docker":
|
||||||
# For Windows and MacOS containers run in VM
|
# For Windows and MacOS containers run in VM
|
||||||
# So we obtain the CPU count for the VM
|
# So we obtain the CPU count for the VM
|
||||||
n_cpu_str = subprocess.check_output(
|
n_cpu_str = subprocess.check_output(
|
||||||
[get_runtime(), "info", "--format", "{{.NCPU}}"],
|
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
|
||||||
text=True,
|
text=True,
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
)
|
)
|
||||||
|
|
|
@ -41,6 +41,8 @@ class DangerzoneCore(object):
|
||||||
|
|
||||||
self.documents: List[Document] = []
|
self.documents: List[Document] = []
|
||||||
|
|
||||||
|
self.isolation_provider = isolation_provider.Container()
|
||||||
|
|
||||||
def add_document_from_filename(
|
def add_document_from_filename(
|
||||||
self,
|
self,
|
||||||
input_filename: str,
|
input_filename: str,
|
||||||
|
@ -59,13 +61,13 @@ class DangerzoneCore(object):
|
||||||
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
def convert_doc(document: Document) -> None:
|
def convert_doc(document: Document) -> None:
|
||||||
success = isolation_provider.convert(
|
success = self.isolation_provider.convert(
|
||||||
document,
|
document,
|
||||||
ocr_lang,
|
ocr_lang,
|
||||||
stdout_callback,
|
stdout_callback,
|
||||||
)
|
)
|
||||||
|
|
||||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
max_jobs = self.isolation_provider.get_max_parallel_conversions()
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
|
||||||
executor.map(convert_doc, self.documents)
|
executor.map(convert_doc, self.documents)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue