Abstract container into an IsolationProvider

Encapsulate container logic into an implementation of
AbstractIsolationProvider. This flexibility will allow for other types
of isolation managers, such as a Dummy one.
This commit is contained in:
deeplow 2022-12-27 13:57:19 +00:00
parent 1114a0dfa1
commit a4f27afdc6
No known key found for this signature in database
GPG key ID: 577982871529A52A
4 changed files with 331 additions and 290 deletions

View file

@ -73,7 +73,7 @@ def cli_main(
exit(1)
# Ensure container is installed
isolation_provider.install()
dangerzone.isolation_provider.install()
# Convert the document
print_header("Converting document to safe PDF")

View file

@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
class InstallContainerThread(QtCore.QThread):
finished = QtCore.Signal()
def __init__(self) -> None:
def __init__(self, dangerzone: DangerzoneGui) -> None:
super(InstallContainerThread, self).__init__()
self.dangerzone = dangerzone
def run(self) -> None:
isolation_provider.install()
self.dangerzone.isolation_provider.install()
self.finished.emit()
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
state: Optional[str] = None
try:
container_runtime = isolation_provider.get_runtime()
container_runtime = self.dangerzone.isolation_provider.get_runtime()
except isolation_provider.NoContainerTechException as e:
log.error(str(e))
state = "not_installed"
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
)
self.buttons.hide()
self.install_container_t = InstallContainerThread()
self.install_container_t = InstallContainerThread(self.dangerzone)
self.install_container_t.finished.connect(self.finished)
self.install_container_t.start()
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
finished = QtCore.Signal(bool)
update = QtCore.Signal(bool, str, int)
def __init__(self, document: Document, ocr_lang: str = None) -> None:
def __init__(
self,
dangerzone: DangerzoneGui,
document: Document,
ocr_lang: str = None,
) -> None:
super(ConvertTask, self).__init__()
self.document = document
self.ocr_lang = ocr_lang
self.error = False
self.dangerzone = dangerzone
def convert_document(self) -> None:
isolation_provider.convert(
self.dangerzone.isolation_provider.convert(
self.document,
self.ocr_lang,
self.stdout_callback,
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
def start_conversion(self) -> None:
if not self.thread_pool_initized:
max_jobs = isolation_provider.get_max_parallel_conversions()
max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
self.thread_pool = ThreadPool(max_jobs)
for doc_widget in self.document_widgets:
task = ConvertTask(doc_widget.document, self.get_ocr_lang())
task = ConvertTask(
self.dangerzone, doc_widget.document, self.get_ocr_lang()
)
task.update.connect(doc_widget.update_progress)
task.finished.connect(doc_widget.all_done)
self.thread_pool.apply_async(task.convert_document)

View file

@ -7,6 +7,7 @@ import platform
import shutil
import subprocess
import tempfile
from abc import ABC, abstractmethod
from typing import Callable, List, Optional, Tuple
import appdirs
@ -15,8 +16,6 @@ from colorama import Fore, Style
from .document import Document
from .util import get_resource_path, get_subprocess_startupinfo
container_name = "dangerzone.rocks/dangerzone"
# Define startupinfo for subprocesses
if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
@ -26,298 +25,329 @@ else:
log = logging.getLogger(__name__)
# Name of the dangerzone container
container_name = "dangerzone.rocks/dangerzone"
class NoContainerTechException(Exception):
def __init__(self, container_tech: str) -> None:
super().__init__(f"{container_tech} is not installed")
def get_runtime_name() -> str:
if platform.system() == "Linux":
runtime_name = "podman"
else:
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
runtime_name = "docker"
return runtime_name
def get_runtime() -> str:
container_tech = get_runtime_name()
runtime = shutil.which(container_tech)
if runtime is None:
raise NoContainerTechException(container_tech)
return runtime
def install() -> bool:
class AbstractIsolationProvider(ABC):
"""
Make sure the podman container is installed. Linux only.
Abstracts an isolation provider
"""
if is_container_installed():
@abstractmethod
def install(self) -> bool:
pass
@abstractmethod
def convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
pass
@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass
class Container(AbstractIsolationProvider):
# Name of the dangerzone container
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
def __init__(self) -> None:
pass
def get_runtime_name(self) -> str:
if platform.system() == "Linux":
runtime_name = "podman"
else:
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
runtime_name = "docker"
return runtime_name
def get_runtime(self) -> str:
container_tech = self.get_runtime_name()
runtime = shutil.which(container_tech)
if runtime is None:
raise NoContainerTechException(container_tech)
return runtime
def install(self) -> bool:
"""
Make sure the podman container is installed. Linux only.
"""
if self.is_container_installed():
return True
# Load the container into podman
log.info("Installing Dangerzone container image...")
p = subprocess.Popen(
[self.get_runtime(), "load"],
stdin=subprocess.PIPE,
startupinfo=get_subprocess_startupinfo(),
)
chunk_size = 10240
compressed_container_path = get_resource_path("container.tar.gz")
with gzip.open(compressed_container_path) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) > 0:
if p.stdin:
p.stdin.write(chunk)
else:
break
p.communicate()
if not self.is_container_installed():
log.error("Failed to install the container image")
return False
log.info("Container image installed")
return True
# Load the container into podman
log.info("Installing Dangerzone container image...")
def is_container_installed(self) -> bool:
"""
See if the podman container is installed. Linux only.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_id = f.read().strip()
p = subprocess.Popen(
[get_runtime(), "load"],
stdin=subprocess.PIPE,
startupinfo=get_subprocess_startupinfo(),
)
chunk_size = 10240
compressed_container_path = get_resource_path("container.tar.gz")
with gzip.open(compressed_container_path) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) > 0:
if p.stdin:
p.stdin.write(chunk)
else:
break
p.communicate()
if not is_container_installed():
log.error("Failed to install the container image")
return False
log.info("Container image installed")
return True
def is_container_installed() -> bool:
"""
See if the podman container is installed. Linux only.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_id = f.read().strip()
# See if this image is already installed
installed = False
found_image_id = subprocess.check_output(
[
get_runtime(),
"image",
"list",
"--format",
"{{.ID}}",
container_name,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
found_image_id = found_image_id.strip()
if found_image_id == expected_image_id:
installed = True
elif found_image_id == "":
pass
else:
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
with subprocess.Popen(
args,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
(error, text, percentage) = parse_progress(document, line)
if error:
document.mark_as_failed()
if percentage == 100.0:
document.mark_as_safe()
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = get_runtime()
if get_runtime_name() == "podman":
platform_args = []
security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"]
else:
platform_args = ["--platform", "linux/amd64"]
security_args = ["--security-opt=no-new-privileges:true"]
# drop all linux kernel capabilities
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
prevent_leakage_args = ["--rm"]
args = (
["run", "--network", "none"]
+ platform_args
+ user_args
+ security_args
+ prevent_leakage_args
+ extra_args
+ [container_name]
+ command
)
args = [container_runtime] + args
return exec(document, args, stdout_callback)
def convert(
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
document.mark_as_converting()
if ocr_lang:
ocr = "1"
else:
ocr = "0"
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
os.makedirs(dz_tmp, exist_ok=True)
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
pixel_dir = os.path.join(tmpdir.name, "pixels")
safe_dir = os.path.join(tmpdir.name, "safe")
os.makedirs(pixel_dir, exist_ok=True)
os.makedirs(safe_dir, exist_ok=True)
# Convert document to pixels
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
extra_args = [
"-v",
f"{document.input_filename}:/tmp/input_file",
"-v",
f"{pixel_dir}:/dangerzone",
]
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
# TODO: validate convert to pixels output
# Convert pixels to safe PDF
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
extra_args = [
"-v",
f"{pixel_dir}:/dangerzone",
"-v",
f"{safe_dir}:/safezone",
"-e",
f"OCR={ocr}",
"-e",
f"OCR_LANGUAGE={ocr_lang}",
]
ret = exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, document.output_filename)
if document.archive_after_conversion:
document.archive()
# We did it
success = True
# Clean up
tmpdir.cleanup()
return success
def get_max_parallel_conversions() -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
return 1
n_cpu = 1 # type: ignore [unreachable]
if platform.system() == "Linux":
# if on linux containers run natively
cpu_count = os.cpu_count()
if cpu_count is not None:
n_cpu = cpu_count
elif get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output(
[get_runtime(), "info", "--format", "{{.NCPU}}"],
# See if this image is already installed
installed = False
found_image_id = subprocess.check_output(
[
self.get_runtime(),
"image",
"list",
"--format",
"{{.ID}}",
self.CONTAINER_NAME,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
n_cpu = int(n_cpu_str.strip())
found_image_id = found_image_id.strip()
return 2 * n_cpu + 1
if found_image_id == expected_image_id:
installed = True
elif found_image_id == "":
pass
else:
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[self.get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{status['percentage']}% "
if status["error"]:
s += Style.RESET_ALL + Fore.RED + status["text"]
log.error(s)
else:
s += Style.RESET_ALL + status["text"]
log.info(s)
return (status["error"], status["text"], status["percentage"])
def exec(
self,
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
with subprocess.Popen(
args,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
(error, text, percentage) = self.parse_progress(document, line)
if error:
document.mark_as_failed()
if percentage == 100.0:
document.mark_as_safe()
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
self,
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = self.get_runtime()
if self.get_runtime_name() == "podman":
platform_args = []
security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"]
else:
platform_args = ["--platform", "linux/amd64"]
security_args = ["--security-opt=no-new-privileges:true"]
# drop all linux kernel capabilities
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
prevent_leakage_args = ["--rm"]
args = (
["run", "--network", "none"]
+ platform_args
+ user_args
+ security_args
+ prevent_leakage_args
+ extra_args
+ [self.CONTAINER_NAME]
+ command
)
args = [container_runtime] + args
return self.exec(document, args, stdout_callback)
def convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
document.mark_as_converting()
if ocr_lang:
ocr = "1"
else:
ocr = "0"
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
os.makedirs(dz_tmp, exist_ok=True)
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
pixel_dir = os.path.join(tmpdir.name, "pixels")
safe_dir = os.path.join(tmpdir.name, "safe")
os.makedirs(pixel_dir, exist_ok=True)
os.makedirs(safe_dir, exist_ok=True)
# Convert document to pixels
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"document-to-pixels",
]
extra_args = [
"-v",
f"{document.input_filename}:/tmp/input_file",
"-v",
f"{pixel_dir}:/dangerzone",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
# TODO: validate convert to pixels output
# Convert pixels to safe PDF
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"pixels-to-pdf",
]
extra_args = [
"-v",
f"{pixel_dir}:/dangerzone",
"-v",
f"{safe_dir}:/safezone",
"-e",
f"OCR={ocr}",
"-e",
f"OCR_LANGUAGE={ocr_lang}",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, document.output_filename)
if document.archive_after_conversion:
document.archive()
# We did it
success = True
# Clean up
tmpdir.cleanup()
return success
def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
return 1
n_cpu = 1 # type: ignore [unreachable]
if platform.system() == "Linux":
# if on linux containers run natively
cpu_count = os.cpu_count()
if cpu_count is not None:
n_cpu = cpu_count
elif self.get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output(
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
n_cpu = int(n_cpu_str.strip())
return 2 * n_cpu + 1
# From global_common:

View file

@ -41,6 +41,8 @@ class DangerzoneCore(object):
self.documents: List[Document] = []
self.isolation_provider = isolation_provider.Container()
def add_document_from_filename(
self,
input_filename: str,
@ -59,13 +61,13 @@ class DangerzoneCore(object):
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
) -> None:
def convert_doc(document: Document) -> None:
success = isolation_provider.convert(
success = self.isolation_provider.convert(
document,
ocr_lang,
stdout_callback,
)
max_jobs = isolation_provider.get_max_parallel_conversions()
max_jobs = self.isolation_provider.get_max_parallel_conversions()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
executor.map(convert_doc, self.documents)