Abstract container into an IsolationProvider

Encapsulate container logic into an implementation of
AbstractIsolationProvider. This flexibility will allow for other types
of isolation managers, such as a Dummy one.
This commit is contained in:
deeplow 2022-12-27 13:57:19 +00:00
parent 1114a0dfa1
commit a4f27afdc6
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 331 additions and 290 deletions

View file

@ -73,7 +73,7 @@ def cli_main(
exit(1) exit(1)
# Ensure container is installed # Ensure container is installed
isolation_provider.install() dangerzone.isolation_provider.install()
# Convert the document # Convert the document
print_header("Converting document to safe PDF") print_header("Converting document to safe PDF")

View file

@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
class InstallContainerThread(QtCore.QThread): class InstallContainerThread(QtCore.QThread):
finished = QtCore.Signal() finished = QtCore.Signal()
def __init__(self) -> None: def __init__(self, dangerzone: DangerzoneGui) -> None:
super(InstallContainerThread, self).__init__() super(InstallContainerThread, self).__init__()
self.dangerzone = dangerzone
def run(self) -> None: def run(self) -> None:
isolation_provider.install() self.dangerzone.isolation_provider.install()
self.finished.emit() self.finished.emit()
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
state: Optional[str] = None state: Optional[str] = None
try: try:
container_runtime = isolation_provider.get_runtime() container_runtime = self.dangerzone.isolation_provider.get_runtime()
except isolation_provider.NoContainerTechException as e: except isolation_provider.NoContainerTechException as e:
log.error(str(e)) log.error(str(e))
state = "not_installed" state = "not_installed"
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
"Installing the Dangerzone container image.<br><br>This might take a few minutes..." "Installing the Dangerzone container image.<br><br>This might take a few minutes..."
) )
self.buttons.hide() self.buttons.hide()
self.install_container_t = InstallContainerThread() self.install_container_t = InstallContainerThread(self.dangerzone)
self.install_container_t.finished.connect(self.finished) self.install_container_t.finished.connect(self.finished)
self.install_container_t.start() self.install_container_t.start()
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
finished = QtCore.Signal(bool) finished = QtCore.Signal(bool)
update = QtCore.Signal(bool, str, int) update = QtCore.Signal(bool, str, int)
def __init__(self, document: Document, ocr_lang: str = None) -> None: def __init__(
self,
dangerzone: DangerzoneGui,
document: Document,
ocr_lang: str = None,
) -> None:
super(ConvertTask, self).__init__() super(ConvertTask, self).__init__()
self.document = document self.document = document
self.ocr_lang = ocr_lang self.ocr_lang = ocr_lang
self.error = False self.error = False
self.dangerzone = dangerzone
def convert_document(self) -> None: def convert_document(self) -> None:
isolation_provider.convert( self.dangerzone.isolation_provider.convert(
self.document, self.document,
self.ocr_lang, self.ocr_lang,
self.stdout_callback, self.stdout_callback,
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
def start_conversion(self) -> None: def start_conversion(self) -> None:
if not self.thread_pool_initized: if not self.thread_pool_initized:
max_jobs = isolation_provider.get_max_parallel_conversions() max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
self.thread_pool = ThreadPool(max_jobs) self.thread_pool = ThreadPool(max_jobs)
for doc_widget in self.document_widgets: for doc_widget in self.document_widgets:
task = ConvertTask(doc_widget.document, self.get_ocr_lang()) task = ConvertTask(
self.dangerzone, doc_widget.document, self.get_ocr_lang()
)
task.update.connect(doc_widget.update_progress) task.update.connect(doc_widget.update_progress)
task.finished.connect(doc_widget.all_done) task.finished.connect(doc_widget.all_done)
self.thread_pool.apply_async(task.convert_document) self.thread_pool.apply_async(task.convert_document)

View file

@ -7,6 +7,7 @@ import platform
import shutil import shutil
import subprocess import subprocess
import tempfile import tempfile
from abc import ABC, abstractmethod
from typing import Callable, List, Optional, Tuple from typing import Callable, List, Optional, Tuple
import appdirs import appdirs
@ -15,8 +16,6 @@ from colorama import Fore, Style
from .document import Document from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo from .util import get_resource_path, get_subprocess_startupinfo
container_name = "dangerzone.rocks/dangerzone"
# Define startupinfo for subprocesses # Define startupinfo for subprocesses
if platform.system() == "Windows": if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined] startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
@ -26,298 +25,329 @@ else:
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Name of the dangerzone container
container_name = "dangerzone.rocks/dangerzone"
class NoContainerTechException(Exception): class NoContainerTechException(Exception):
def __init__(self, container_tech: str) -> None: def __init__(self, container_tech: str) -> None:
super().__init__(f"{container_tech} is not installed") super().__init__(f"{container_tech} is not installed")
def get_runtime_name() -> str: class AbstractIsolationProvider(ABC):
if platform.system() == "Linux":
runtime_name = "podman"
else:
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
runtime_name = "docker"
return runtime_name
def get_runtime() -> str:
container_tech = get_runtime_name()
runtime = shutil.which(container_tech)
if runtime is None:
raise NoContainerTechException(container_tech)
return runtime
def install() -> bool:
""" """
Make sure the podman container is installed. Linux only. Abstracts an isolation provider
""" """
if is_container_installed():
@abstractmethod
def install(self) -> bool:
pass
@abstractmethod
def convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
pass
@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass
class Container(AbstractIsolationProvider):
# Name of the dangerzone container
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
def __init__(self) -> None:
pass
def get_runtime_name(self) -> str:
if platform.system() == "Linux":
runtime_name = "podman"
else:
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
runtime_name = "docker"
return runtime_name
def get_runtime(self) -> str:
container_tech = self.get_runtime_name()
runtime = shutil.which(container_tech)
if runtime is None:
raise NoContainerTechException(container_tech)
return runtime
def install(self) -> bool:
"""
Make sure the podman container is installed. Linux only.
"""
if self.is_container_installed():
return True
# Load the container into podman
log.info("Installing Dangerzone container image...")
p = subprocess.Popen(
[self.get_runtime(), "load"],
stdin=subprocess.PIPE,
startupinfo=get_subprocess_startupinfo(),
)
chunk_size = 10240
compressed_container_path = get_resource_path("container.tar.gz")
with gzip.open(compressed_container_path) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) > 0:
if p.stdin:
p.stdin.write(chunk)
else:
break
p.communicate()
if not self.is_container_installed():
log.error("Failed to install the container image")
return False
log.info("Container image installed")
return True return True
# Load the container into podman def is_container_installed(self) -> bool:
log.info("Installing Dangerzone container image...") """
See if the podman container is installed. Linux only.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_id = f.read().strip()
p = subprocess.Popen( # See if this image is already installed
[get_runtime(), "load"], installed = False
stdin=subprocess.PIPE, found_image_id = subprocess.check_output(
startupinfo=get_subprocess_startupinfo(), [
) self.get_runtime(),
"image",
chunk_size = 10240 "list",
compressed_container_path = get_resource_path("container.tar.gz") "--format",
with gzip.open(compressed_container_path) as f: "{{.ID}}",
while True: self.CONTAINER_NAME,
chunk = f.read(chunk_size) ],
if len(chunk) > 0:
if p.stdin:
p.stdin.write(chunk)
else:
break
p.communicate()
if not is_container_installed():
log.error("Failed to install the container image")
return False
log.info("Container image installed")
return True
def is_container_installed() -> bool:
"""
See if the podman container is installed. Linux only.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_id = f.read().strip()
# See if this image is already installed
installed = False
found_image_id = subprocess.check_output(
[
get_runtime(),
"image",
"list",
"--format",
"{{.ID}}",
container_name,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
found_image_id = found_image_id.strip()
if found_image_id == expected_image_id:
installed = True
elif found_image_id == "":
pass
else:
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
with subprocess.Popen(
args,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
(error, text, percentage) = parse_progress(document, line)
if error:
document.mark_as_failed()
if percentage == 100.0:
document.mark_as_safe()
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = get_runtime()
if get_runtime_name() == "podman":
platform_args = []
security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"]
else:
platform_args = ["--platform", "linux/amd64"]
security_args = ["--security-opt=no-new-privileges:true"]
# drop all linux kernel capabilities
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
prevent_leakage_args = ["--rm"]
args = (
["run", "--network", "none"]
+ platform_args
+ user_args
+ security_args
+ prevent_leakage_args
+ extra_args
+ [container_name]
+ command
)
args = [container_runtime] + args
return exec(document, args, stdout_callback)
def convert(
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
document.mark_as_converting()
if ocr_lang:
ocr = "1"
else:
ocr = "0"
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
os.makedirs(dz_tmp, exist_ok=True)
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
pixel_dir = os.path.join(tmpdir.name, "pixels")
safe_dir = os.path.join(tmpdir.name, "safe")
os.makedirs(pixel_dir, exist_ok=True)
os.makedirs(safe_dir, exist_ok=True)
# Convert document to pixels
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
extra_args = [
"-v",
f"{document.input_filename}:/tmp/input_file",
"-v",
f"{pixel_dir}:/dangerzone",
]
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
# TODO: validate convert to pixels output
# Convert pixels to safe PDF
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
extra_args = [
"-v",
f"{pixel_dir}:/dangerzone",
"-v",
f"{safe_dir}:/safezone",
"-e",
f"OCR={ocr}",
"-e",
f"OCR_LANGUAGE={ocr_lang}",
]
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, document.output_filename)
if document.archive_after_conversion:
document.archive()
# We did it
success = True
# Clean up
tmpdir.cleanup()
return success
def get_max_parallel_conversions() -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
return 1
n_cpu = 1 # type: ignore [unreachable]
if platform.system() == "Linux":
# if on linux containers run natively
cpu_count = os.cpu_count()
if cpu_count is not None:
n_cpu = cpu_count
elif get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output(
[get_runtime(), "info", "--format", "{{.NCPU}}"],
text=True, text=True,
startupinfo=get_subprocess_startupinfo(), startupinfo=get_subprocess_startupinfo(),
) )
n_cpu = int(n_cpu_str.strip()) found_image_id = found_image_id.strip()
return 2 * n_cpu + 1 if found_image_id == expected_image_id:
installed = True
elif found_image_id == "":
pass
else:
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[self.get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
self,
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
with subprocess.Popen(
args,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
(error, text, percentage) = self.parse_progress(document, line)
if error:
document.mark_as_failed()
if percentage == 100.0:
document.mark_as_safe()
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
self,
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = self.get_runtime()
if self.get_runtime_name() == "podman":
platform_args = []
security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"]
else:
platform_args = ["--platform", "linux/amd64"]
security_args = ["--security-opt=no-new-privileges:true"]
# drop all linux kernel capabilities
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
prevent_leakage_args = ["--rm"]
args = (
["run", "--network", "none"]
+ platform_args
+ user_args
+ security_args
+ prevent_leakage_args
+ extra_args
+ [self.CONTAINER_NAME]
+ command
)
args = [container_runtime] + args
return self.exec(document, args, stdout_callback)
def convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
document.mark_as_converting()
if ocr_lang:
ocr = "1"
else:
ocr = "0"
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
os.makedirs(dz_tmp, exist_ok=True)
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
pixel_dir = os.path.join(tmpdir.name, "pixels")
safe_dir = os.path.join(tmpdir.name, "safe")
os.makedirs(pixel_dir, exist_ok=True)
os.makedirs(safe_dir, exist_ok=True)
# Convert document to pixels
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"document-to-pixels",
]
extra_args = [
"-v",
f"{document.input_filename}:/tmp/input_file",
"-v",
f"{pixel_dir}:/dangerzone",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
# TODO: validate convert to pixels output
# Convert pixels to safe PDF
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"pixels-to-pdf",
]
extra_args = [
"-v",
f"{pixel_dir}:/dangerzone",
"-v",
f"{safe_dir}:/safezone",
"-e",
f"OCR={ocr}",
"-e",
f"OCR_LANGUAGE={ocr_lang}",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, document.output_filename)
if document.archive_after_conversion:
document.archive()
# We did it
success = True
# Clean up
tmpdir.cleanup()
return success
def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
return 1
n_cpu = 1 # type: ignore [unreachable]
if platform.system() == "Linux":
# if on linux containers run natively
cpu_count = os.cpu_count()
if cpu_count is not None:
n_cpu = cpu_count
elif self.get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output(
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
n_cpu = int(n_cpu_str.strip())
return 2 * n_cpu + 1
# From global_common: # From global_common:

View file

@ -41,6 +41,8 @@ class DangerzoneCore(object):
self.documents: List[Document] = [] self.documents: List[Document] = []
self.isolation_provider = isolation_provider.Container()
def add_document_from_filename( def add_document_from_filename(
self, self,
input_filename: str, input_filename: str,
@ -59,13 +61,13 @@ class DangerzoneCore(object):
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None: ) -> None:
def convert_doc(document: Document) -> None: def convert_doc(document: Document) -> None:
success = isolation_provider.convert( success = self.isolation_provider.convert(
document, document,
ocr_lang, ocr_lang,
stdout_callback, stdout_callback,
) )
max_jobs = isolation_provider.get_max_parallel_conversions() max_jobs = self.isolation_provider.get_max_parallel_conversions()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor: with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
executor.map(convert_doc, self.documents) executor.map(convert_doc, self.documents)