mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
Abstract container into an IsolationProvider
Encapsulate container logic into an implementation of AbstractIsolationProvider. This flexibility will allow for other types of isolation managers, such as a Dummy one.
This commit is contained in:
parent
1114a0dfa1
commit
a4f27afdc6
4 changed files with 331 additions and 290 deletions
|
@ -73,7 +73,7 @@ def cli_main(
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Ensure container is installed
|
# Ensure container is installed
|
||||||
isolation_provider.install()
|
dangerzone.isolation_provider.install()
|
||||||
|
|
||||||
# Convert the document
|
# Convert the document
|
||||||
print_header("Converting document to safe PDF")
|
print_header("Converting document to safe PDF")
|
||||||
|
|
|
@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||||
class InstallContainerThread(QtCore.QThread):
|
class InstallContainerThread(QtCore.QThread):
|
||||||
finished = QtCore.Signal()
|
finished = QtCore.Signal()
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, dangerzone: DangerzoneGui) -> None:
|
||||||
super(InstallContainerThread, self).__init__()
|
super(InstallContainerThread, self).__init__()
|
||||||
|
self.dangerzone = dangerzone
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
isolation_provider.install()
|
self.dangerzone.isolation_provider.install()
|
||||||
self.finished.emit()
|
self.finished.emit()
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
||||||
state: Optional[str] = None
|
state: Optional[str] = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
container_runtime = isolation_provider.get_runtime()
|
container_runtime = self.dangerzone.isolation_provider.get_runtime()
|
||||||
except isolation_provider.NoContainerTechException as e:
|
except isolation_provider.NoContainerTechException as e:
|
||||||
log.error(str(e))
|
log.error(str(e))
|
||||||
state = "not_installed"
|
state = "not_installed"
|
||||||
|
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
||||||
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
|
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
|
||||||
)
|
)
|
||||||
self.buttons.hide()
|
self.buttons.hide()
|
||||||
self.install_container_t = InstallContainerThread()
|
self.install_container_t = InstallContainerThread(self.dangerzone)
|
||||||
self.install_container_t.finished.connect(self.finished)
|
self.install_container_t.finished.connect(self.finished)
|
||||||
self.install_container_t.start()
|
self.install_container_t.start()
|
||||||
|
|
||||||
|
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
|
||||||
finished = QtCore.Signal(bool)
|
finished = QtCore.Signal(bool)
|
||||||
update = QtCore.Signal(bool, str, int)
|
update = QtCore.Signal(bool, str, int)
|
||||||
|
|
||||||
def __init__(self, document: Document, ocr_lang: str = None) -> None:
|
def __init__(
|
||||||
|
self,
|
||||||
|
dangerzone: DangerzoneGui,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: str = None,
|
||||||
|
) -> None:
|
||||||
super(ConvertTask, self).__init__()
|
super(ConvertTask, self).__init__()
|
||||||
self.document = document
|
self.document = document
|
||||||
self.ocr_lang = ocr_lang
|
self.ocr_lang = ocr_lang
|
||||||
self.error = False
|
self.error = False
|
||||||
|
self.dangerzone = dangerzone
|
||||||
|
|
||||||
def convert_document(self) -> None:
|
def convert_document(self) -> None:
|
||||||
isolation_provider.convert(
|
self.dangerzone.isolation_provider.convert(
|
||||||
self.document,
|
self.document,
|
||||||
self.ocr_lang,
|
self.ocr_lang,
|
||||||
self.stdout_callback,
|
self.stdout_callback,
|
||||||
|
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
|
||||||
|
|
||||||
def start_conversion(self) -> None:
|
def start_conversion(self) -> None:
|
||||||
if not self.thread_pool_initized:
|
if not self.thread_pool_initized:
|
||||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
|
||||||
self.thread_pool = ThreadPool(max_jobs)
|
self.thread_pool = ThreadPool(max_jobs)
|
||||||
|
|
||||||
for doc_widget in self.document_widgets:
|
for doc_widget in self.document_widgets:
|
||||||
task = ConvertTask(doc_widget.document, self.get_ocr_lang())
|
task = ConvertTask(
|
||||||
|
self.dangerzone, doc_widget.document, self.get_ocr_lang()
|
||||||
|
)
|
||||||
task.update.connect(doc_widget.update_progress)
|
task.update.connect(doc_widget.update_progress)
|
||||||
task.finished.connect(doc_widget.all_done)
|
task.finished.connect(doc_widget.all_done)
|
||||||
self.thread_pool.apply_async(task.convert_document)
|
self.thread_pool.apply_async(task.convert_document)
|
||||||
|
|
|
@ -7,6 +7,7 @@ import platform
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from typing import Callable, List, Optional, Tuple
|
from typing import Callable, List, Optional, Tuple
|
||||||
|
|
||||||
import appdirs
|
import appdirs
|
||||||
|
@ -15,8 +16,6 @@ from colorama import Fore, Style
|
||||||
from .document import Document
|
from .document import Document
|
||||||
from .util import get_resource_path, get_subprocess_startupinfo
|
from .util import get_resource_path, get_subprocess_startupinfo
|
||||||
|
|
||||||
container_name = "dangerzone.rocks/dangerzone"
|
|
||||||
|
|
||||||
# Define startupinfo for subprocesses
|
# Define startupinfo for subprocesses
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
|
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
|
||||||
|
@ -26,298 +25,329 @@ else:
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Name of the dangerzone container
|
|
||||||
container_name = "dangerzone.rocks/dangerzone"
|
|
||||||
|
|
||||||
|
|
||||||
class NoContainerTechException(Exception):
|
class NoContainerTechException(Exception):
|
||||||
def __init__(self, container_tech: str) -> None:
|
def __init__(self, container_tech: str) -> None:
|
||||||
super().__init__(f"{container_tech} is not installed")
|
super().__init__(f"{container_tech} is not installed")
|
||||||
|
|
||||||
|
|
||||||
def get_runtime_name() -> str:
|
class AbstractIsolationProvider(ABC):
|
||||||
if platform.system() == "Linux":
|
|
||||||
runtime_name = "podman"
|
|
||||||
else:
|
|
||||||
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
|
|
||||||
runtime_name = "docker"
|
|
||||||
return runtime_name
|
|
||||||
|
|
||||||
|
|
||||||
def get_runtime() -> str:
|
|
||||||
container_tech = get_runtime_name()
|
|
||||||
runtime = shutil.which(container_tech)
|
|
||||||
if runtime is None:
|
|
||||||
raise NoContainerTechException(container_tech)
|
|
||||||
return runtime
|
|
||||||
|
|
||||||
|
|
||||||
def install() -> bool:
|
|
||||||
"""
|
"""
|
||||||
Make sure the podman container is installed. Linux only.
|
Abstracts an isolation provider
|
||||||
"""
|
"""
|
||||||
if is_container_installed():
|
|
||||||
|
@abstractmethod
|
||||||
|
def install(self) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def convert(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: Optional[str],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> bool:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_max_parallel_conversions(self) -> int:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Container(AbstractIsolationProvider):
|
||||||
|
|
||||||
|
# Name of the dangerzone container
|
||||||
|
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_runtime_name(self) -> str:
|
||||||
|
if platform.system() == "Linux":
|
||||||
|
runtime_name = "podman"
|
||||||
|
else:
|
||||||
|
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
|
||||||
|
runtime_name = "docker"
|
||||||
|
return runtime_name
|
||||||
|
|
||||||
|
def get_runtime(self) -> str:
|
||||||
|
container_tech = self.get_runtime_name()
|
||||||
|
runtime = shutil.which(container_tech)
|
||||||
|
if runtime is None:
|
||||||
|
raise NoContainerTechException(container_tech)
|
||||||
|
return runtime
|
||||||
|
|
||||||
|
def install(self) -> bool:
|
||||||
|
"""
|
||||||
|
Make sure the podman container is installed. Linux only.
|
||||||
|
"""
|
||||||
|
if self.is_container_installed():
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Load the container into podman
|
||||||
|
log.info("Installing Dangerzone container image...")
|
||||||
|
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[self.get_runtime(), "load"],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
|
)
|
||||||
|
|
||||||
|
chunk_size = 10240
|
||||||
|
compressed_container_path = get_resource_path("container.tar.gz")
|
||||||
|
with gzip.open(compressed_container_path) as f:
|
||||||
|
while True:
|
||||||
|
chunk = f.read(chunk_size)
|
||||||
|
if len(chunk) > 0:
|
||||||
|
if p.stdin:
|
||||||
|
p.stdin.write(chunk)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
p.communicate()
|
||||||
|
|
||||||
|
if not self.is_container_installed():
|
||||||
|
log.error("Failed to install the container image")
|
||||||
|
return False
|
||||||
|
|
||||||
|
log.info("Container image installed")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Load the container into podman
|
def is_container_installed(self) -> bool:
|
||||||
log.info("Installing Dangerzone container image...")
|
"""
|
||||||
|
See if the podman container is installed. Linux only.
|
||||||
|
"""
|
||||||
|
# Get the image id
|
||||||
|
with open(get_resource_path("image-id.txt")) as f:
|
||||||
|
expected_image_id = f.read().strip()
|
||||||
|
|
||||||
p = subprocess.Popen(
|
# See if this image is already installed
|
||||||
[get_runtime(), "load"],
|
installed = False
|
||||||
stdin=subprocess.PIPE,
|
found_image_id = subprocess.check_output(
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
[
|
||||||
)
|
self.get_runtime(),
|
||||||
|
"image",
|
||||||
chunk_size = 10240
|
"list",
|
||||||
compressed_container_path = get_resource_path("container.tar.gz")
|
"--format",
|
||||||
with gzip.open(compressed_container_path) as f:
|
"{{.ID}}",
|
||||||
while True:
|
self.CONTAINER_NAME,
|
||||||
chunk = f.read(chunk_size)
|
],
|
||||||
if len(chunk) > 0:
|
|
||||||
if p.stdin:
|
|
||||||
p.stdin.write(chunk)
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
p.communicate()
|
|
||||||
|
|
||||||
if not is_container_installed():
|
|
||||||
log.error("Failed to install the container image")
|
|
||||||
return False
|
|
||||||
|
|
||||||
log.info("Container image installed")
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_container_installed() -> bool:
|
|
||||||
"""
|
|
||||||
See if the podman container is installed. Linux only.
|
|
||||||
"""
|
|
||||||
# Get the image id
|
|
||||||
with open(get_resource_path("image-id.txt")) as f:
|
|
||||||
expected_image_id = f.read().strip()
|
|
||||||
|
|
||||||
# See if this image is already installed
|
|
||||||
installed = False
|
|
||||||
found_image_id = subprocess.check_output(
|
|
||||||
[
|
|
||||||
get_runtime(),
|
|
||||||
"image",
|
|
||||||
"list",
|
|
||||||
"--format",
|
|
||||||
"{{.ID}}",
|
|
||||||
container_name,
|
|
||||||
],
|
|
||||||
text=True,
|
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
|
||||||
)
|
|
||||||
found_image_id = found_image_id.strip()
|
|
||||||
|
|
||||||
if found_image_id == expected_image_id:
|
|
||||||
installed = True
|
|
||||||
elif found_image_id == "":
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
log.info("Deleting old dangerzone container image")
|
|
||||||
|
|
||||||
try:
|
|
||||||
subprocess.check_output(
|
|
||||||
[get_runtime(), "rmi", "--force", found_image_id],
|
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
log.warning("Couldn't delete old container image, so leaving it there")
|
|
||||||
|
|
||||||
return installed
|
|
||||||
|
|
||||||
|
|
||||||
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
|
|
||||||
"""
|
|
||||||
Parses a line returned by the container.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
status = json.loads(line)
|
|
||||||
except:
|
|
||||||
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
|
||||||
log.error(error_message)
|
|
||||||
return (True, error_message, -1)
|
|
||||||
|
|
||||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
|
||||||
s += Fore.CYAN + f"{status['percentage']}% "
|
|
||||||
if status["error"]:
|
|
||||||
s += Style.RESET_ALL + Fore.RED + status["text"]
|
|
||||||
log.error(s)
|
|
||||||
else:
|
|
||||||
s += Style.RESET_ALL + status["text"]
|
|
||||||
log.info(s)
|
|
||||||
|
|
||||||
return (status["error"], status["text"], status["percentage"])
|
|
||||||
|
|
||||||
|
|
||||||
def exec(
|
|
||||||
document: Document,
|
|
||||||
args: List[str],
|
|
||||||
stdout_callback: Optional[Callable] = None,
|
|
||||||
) -> int:
|
|
||||||
args_str = " ".join(pipes.quote(s) for s in args)
|
|
||||||
log.info("> " + args_str)
|
|
||||||
|
|
||||||
with subprocess.Popen(
|
|
||||||
args,
|
|
||||||
stdin=None,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
bufsize=1,
|
|
||||||
universal_newlines=True,
|
|
||||||
startupinfo=startupinfo,
|
|
||||||
) as p:
|
|
||||||
if p.stdout is not None:
|
|
||||||
for line in p.stdout:
|
|
||||||
(error, text, percentage) = parse_progress(document, line)
|
|
||||||
if error:
|
|
||||||
document.mark_as_failed()
|
|
||||||
if percentage == 100.0:
|
|
||||||
document.mark_as_safe()
|
|
||||||
if stdout_callback:
|
|
||||||
stdout_callback(error, text, percentage)
|
|
||||||
|
|
||||||
p.communicate()
|
|
||||||
return p.returncode
|
|
||||||
|
|
||||||
|
|
||||||
def exec_container(
|
|
||||||
document: Document,
|
|
||||||
command: List[str],
|
|
||||||
extra_args: List[str] = [],
|
|
||||||
stdout_callback: Optional[Callable] = None,
|
|
||||||
) -> int:
|
|
||||||
container_runtime = get_runtime()
|
|
||||||
|
|
||||||
if get_runtime_name() == "podman":
|
|
||||||
platform_args = []
|
|
||||||
security_args = ["--security-opt", "no-new-privileges"]
|
|
||||||
security_args += ["--userns", "keep-id"]
|
|
||||||
else:
|
|
||||||
platform_args = ["--platform", "linux/amd64"]
|
|
||||||
security_args = ["--security-opt=no-new-privileges:true"]
|
|
||||||
|
|
||||||
# drop all linux kernel capabilities
|
|
||||||
security_args += ["--cap-drop", "all"]
|
|
||||||
user_args = ["-u", "dangerzone"]
|
|
||||||
|
|
||||||
prevent_leakage_args = ["--rm"]
|
|
||||||
|
|
||||||
args = (
|
|
||||||
["run", "--network", "none"]
|
|
||||||
+ platform_args
|
|
||||||
+ user_args
|
|
||||||
+ security_args
|
|
||||||
+ prevent_leakage_args
|
|
||||||
+ extra_args
|
|
||||||
+ [container_name]
|
|
||||||
+ command
|
|
||||||
)
|
|
||||||
|
|
||||||
args = [container_runtime] + args
|
|
||||||
return exec(document, args, stdout_callback)
|
|
||||||
|
|
||||||
|
|
||||||
def convert(
|
|
||||||
document: Document,
|
|
||||||
ocr_lang: Optional[str],
|
|
||||||
stdout_callback: Optional[Callable] = None,
|
|
||||||
) -> bool:
|
|
||||||
success = False
|
|
||||||
document.mark_as_converting()
|
|
||||||
|
|
||||||
if ocr_lang:
|
|
||||||
ocr = "1"
|
|
||||||
else:
|
|
||||||
ocr = "0"
|
|
||||||
|
|
||||||
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
|
|
||||||
os.makedirs(dz_tmp, exist_ok=True)
|
|
||||||
|
|
||||||
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
|
|
||||||
pixel_dir = os.path.join(tmpdir.name, "pixels")
|
|
||||||
safe_dir = os.path.join(tmpdir.name, "safe")
|
|
||||||
os.makedirs(pixel_dir, exist_ok=True)
|
|
||||||
os.makedirs(safe_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# Convert document to pixels
|
|
||||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
|
|
||||||
extra_args = [
|
|
||||||
"-v",
|
|
||||||
f"{document.input_filename}:/tmp/input_file",
|
|
||||||
"-v",
|
|
||||||
f"{pixel_dir}:/dangerzone",
|
|
||||||
]
|
|
||||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
|
||||||
if ret != 0:
|
|
||||||
log.error("documents-to-pixels failed")
|
|
||||||
else:
|
|
||||||
# TODO: validate convert to pixels output
|
|
||||||
|
|
||||||
# Convert pixels to safe PDF
|
|
||||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
|
|
||||||
extra_args = [
|
|
||||||
"-v",
|
|
||||||
f"{pixel_dir}:/dangerzone",
|
|
||||||
"-v",
|
|
||||||
f"{safe_dir}:/safezone",
|
|
||||||
"-e",
|
|
||||||
f"OCR={ocr}",
|
|
||||||
"-e",
|
|
||||||
f"OCR_LANGUAGE={ocr_lang}",
|
|
||||||
]
|
|
||||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
|
||||||
if ret != 0:
|
|
||||||
log.error("pixels-to-pdf failed")
|
|
||||||
else:
|
|
||||||
# Move the final file to the right place
|
|
||||||
if os.path.exists(document.output_filename):
|
|
||||||
os.remove(document.output_filename)
|
|
||||||
|
|
||||||
container_output_filename = os.path.join(
|
|
||||||
safe_dir, "safe-output-compressed.pdf"
|
|
||||||
)
|
|
||||||
shutil.move(container_output_filename, document.output_filename)
|
|
||||||
|
|
||||||
if document.archive_after_conversion:
|
|
||||||
document.archive()
|
|
||||||
|
|
||||||
# We did it
|
|
||||||
success = True
|
|
||||||
|
|
||||||
# Clean up
|
|
||||||
tmpdir.cleanup()
|
|
||||||
|
|
||||||
return success
|
|
||||||
|
|
||||||
|
|
||||||
def get_max_parallel_conversions() -> int:
|
|
||||||
|
|
||||||
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
|
||||||
# https://github.com/freedomofpress/dangerzone/issues/257
|
|
||||||
return 1
|
|
||||||
|
|
||||||
n_cpu = 1 # type: ignore [unreachable]
|
|
||||||
if platform.system() == "Linux":
|
|
||||||
# if on linux containers run natively
|
|
||||||
cpu_count = os.cpu_count()
|
|
||||||
if cpu_count is not None:
|
|
||||||
n_cpu = cpu_count
|
|
||||||
|
|
||||||
elif get_runtime_name() == "docker":
|
|
||||||
# For Windows and MacOS containers run in VM
|
|
||||||
# So we obtain the CPU count for the VM
|
|
||||||
n_cpu_str = subprocess.check_output(
|
|
||||||
[get_runtime(), "info", "--format", "{{.NCPU}}"],
|
|
||||||
text=True,
|
text=True,
|
||||||
startupinfo=get_subprocess_startupinfo(),
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
)
|
)
|
||||||
n_cpu = int(n_cpu_str.strip())
|
found_image_id = found_image_id.strip()
|
||||||
|
|
||||||
return 2 * n_cpu + 1
|
if found_image_id == expected_image_id:
|
||||||
|
installed = True
|
||||||
|
elif found_image_id == "":
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
log.info("Deleting old dangerzone container image")
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.check_output(
|
||||||
|
[self.get_runtime(), "rmi", "--force", found_image_id],
|
||||||
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
log.warning("Couldn't delete old container image, so leaving it there")
|
||||||
|
|
||||||
|
return installed
|
||||||
|
|
||||||
|
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
|
||||||
|
"""
|
||||||
|
Parses a line returned by the container.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
status = json.loads(line)
|
||||||
|
except:
|
||||||
|
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
||||||
|
log.error(error_message)
|
||||||
|
return (True, error_message, -1)
|
||||||
|
|
||||||
|
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||||
|
s += Fore.CYAN + f"{status['percentage']}% "
|
||||||
|
if status["error"]:
|
||||||
|
s += Style.RESET_ALL + Fore.RED + status["text"]
|
||||||
|
log.error(s)
|
||||||
|
else:
|
||||||
|
s += Style.RESET_ALL + status["text"]
|
||||||
|
log.info(s)
|
||||||
|
|
||||||
|
return (status["error"], status["text"], status["percentage"])
|
||||||
|
|
||||||
|
def exec(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
args: List[str],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> int:
|
||||||
|
args_str = " ".join(pipes.quote(s) for s in args)
|
||||||
|
log.info("> " + args_str)
|
||||||
|
|
||||||
|
with subprocess.Popen(
|
||||||
|
args,
|
||||||
|
stdin=None,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
bufsize=1,
|
||||||
|
universal_newlines=True,
|
||||||
|
startupinfo=startupinfo,
|
||||||
|
) as p:
|
||||||
|
if p.stdout is not None:
|
||||||
|
for line in p.stdout:
|
||||||
|
(error, text, percentage) = self.parse_progress(document, line)
|
||||||
|
if error:
|
||||||
|
document.mark_as_failed()
|
||||||
|
if percentage == 100.0:
|
||||||
|
document.mark_as_safe()
|
||||||
|
if stdout_callback:
|
||||||
|
stdout_callback(error, text, percentage)
|
||||||
|
|
||||||
|
p.communicate()
|
||||||
|
return p.returncode
|
||||||
|
|
||||||
|
def exec_container(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
command: List[str],
|
||||||
|
extra_args: List[str] = [],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> int:
|
||||||
|
container_runtime = self.get_runtime()
|
||||||
|
|
||||||
|
if self.get_runtime_name() == "podman":
|
||||||
|
platform_args = []
|
||||||
|
security_args = ["--security-opt", "no-new-privileges"]
|
||||||
|
security_args += ["--userns", "keep-id"]
|
||||||
|
else:
|
||||||
|
platform_args = ["--platform", "linux/amd64"]
|
||||||
|
security_args = ["--security-opt=no-new-privileges:true"]
|
||||||
|
|
||||||
|
# drop all linux kernel capabilities
|
||||||
|
security_args += ["--cap-drop", "all"]
|
||||||
|
user_args = ["-u", "dangerzone"]
|
||||||
|
|
||||||
|
prevent_leakage_args = ["--rm"]
|
||||||
|
|
||||||
|
args = (
|
||||||
|
["run", "--network", "none"]
|
||||||
|
+ platform_args
|
||||||
|
+ user_args
|
||||||
|
+ security_args
|
||||||
|
+ prevent_leakage_args
|
||||||
|
+ extra_args
|
||||||
|
+ [self.CONTAINER_NAME]
|
||||||
|
+ command
|
||||||
|
)
|
||||||
|
|
||||||
|
args = [container_runtime] + args
|
||||||
|
return self.exec(document, args, stdout_callback)
|
||||||
|
|
||||||
|
def convert(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: Optional[str],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> bool:
|
||||||
|
success = False
|
||||||
|
document.mark_as_converting()
|
||||||
|
|
||||||
|
if ocr_lang:
|
||||||
|
ocr = "1"
|
||||||
|
else:
|
||||||
|
ocr = "0"
|
||||||
|
|
||||||
|
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
|
||||||
|
os.makedirs(dz_tmp, exist_ok=True)
|
||||||
|
|
||||||
|
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
|
||||||
|
pixel_dir = os.path.join(tmpdir.name, "pixels")
|
||||||
|
safe_dir = os.path.join(tmpdir.name, "safe")
|
||||||
|
os.makedirs(pixel_dir, exist_ok=True)
|
||||||
|
os.makedirs(safe_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Convert document to pixels
|
||||||
|
command = [
|
||||||
|
"/usr/bin/python3",
|
||||||
|
"/usr/local/bin/dangerzone.py",
|
||||||
|
"document-to-pixels",
|
||||||
|
]
|
||||||
|
extra_args = [
|
||||||
|
"-v",
|
||||||
|
f"{document.input_filename}:/tmp/input_file",
|
||||||
|
"-v",
|
||||||
|
f"{pixel_dir}:/dangerzone",
|
||||||
|
]
|
||||||
|
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||||
|
if ret != 0:
|
||||||
|
log.error("documents-to-pixels failed")
|
||||||
|
else:
|
||||||
|
# TODO: validate convert to pixels output
|
||||||
|
|
||||||
|
# Convert pixels to safe PDF
|
||||||
|
command = [
|
||||||
|
"/usr/bin/python3",
|
||||||
|
"/usr/local/bin/dangerzone.py",
|
||||||
|
"pixels-to-pdf",
|
||||||
|
]
|
||||||
|
extra_args = [
|
||||||
|
"-v",
|
||||||
|
f"{pixel_dir}:/dangerzone",
|
||||||
|
"-v",
|
||||||
|
f"{safe_dir}:/safezone",
|
||||||
|
"-e",
|
||||||
|
f"OCR={ocr}",
|
||||||
|
"-e",
|
||||||
|
f"OCR_LANGUAGE={ocr_lang}",
|
||||||
|
]
|
||||||
|
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||||
|
if ret != 0:
|
||||||
|
log.error("pixels-to-pdf failed")
|
||||||
|
else:
|
||||||
|
# Move the final file to the right place
|
||||||
|
if os.path.exists(document.output_filename):
|
||||||
|
os.remove(document.output_filename)
|
||||||
|
|
||||||
|
container_output_filename = os.path.join(
|
||||||
|
safe_dir, "safe-output-compressed.pdf"
|
||||||
|
)
|
||||||
|
shutil.move(container_output_filename, document.output_filename)
|
||||||
|
|
||||||
|
if document.archive_after_conversion:
|
||||||
|
document.archive()
|
||||||
|
|
||||||
|
# We did it
|
||||||
|
success = True
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
tmpdir.cleanup()
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
def get_max_parallel_conversions(self) -> int:
|
||||||
|
|
||||||
|
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
||||||
|
# https://github.com/freedomofpress/dangerzone/issues/257
|
||||||
|
return 1
|
||||||
|
|
||||||
|
n_cpu = 1 # type: ignore [unreachable]
|
||||||
|
if platform.system() == "Linux":
|
||||||
|
# if on linux containers run natively
|
||||||
|
cpu_count = os.cpu_count()
|
||||||
|
if cpu_count is not None:
|
||||||
|
n_cpu = cpu_count
|
||||||
|
|
||||||
|
elif self.get_runtime_name() == "docker":
|
||||||
|
# For Windows and MacOS containers run in VM
|
||||||
|
# So we obtain the CPU count for the VM
|
||||||
|
n_cpu_str = subprocess.check_output(
|
||||||
|
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
|
||||||
|
text=True,
|
||||||
|
startupinfo=get_subprocess_startupinfo(),
|
||||||
|
)
|
||||||
|
n_cpu = int(n_cpu_str.strip())
|
||||||
|
|
||||||
|
return 2 * n_cpu + 1
|
||||||
|
|
||||||
|
|
||||||
# From global_common:
|
# From global_common:
|
||||||
|
|
|
@ -41,6 +41,8 @@ class DangerzoneCore(object):
|
||||||
|
|
||||||
self.documents: List[Document] = []
|
self.documents: List[Document] = []
|
||||||
|
|
||||||
|
self.isolation_provider = isolation_provider.Container()
|
||||||
|
|
||||||
def add_document_from_filename(
|
def add_document_from_filename(
|
||||||
self,
|
self,
|
||||||
input_filename: str,
|
input_filename: str,
|
||||||
|
@ -59,13 +61,13 @@ class DangerzoneCore(object):
|
||||||
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
def convert_doc(document: Document) -> None:
|
def convert_doc(document: Document) -> None:
|
||||||
success = isolation_provider.convert(
|
success = self.isolation_provider.convert(
|
||||||
document,
|
document,
|
||||||
ocr_lang,
|
ocr_lang,
|
||||||
stdout_callback,
|
stdout_callback,
|
||||||
)
|
)
|
||||||
|
|
||||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
max_jobs = self.isolation_provider.get_max_parallel_conversions()
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
|
||||||
executor.map(convert_doc, self.documents)
|
executor.map(convert_doc, self.documents)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue