mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Abstract container into an IsolationProvider
Encapsulate container logic into an implementation of AbstractIsolationProvider. This flexibility will allow for other types of isolation managers, such as a Dummy one.
This commit is contained in:
parent
1114a0dfa1
commit
a4f27afdc6
4 changed files with 331 additions and 290 deletions
|
@ -73,7 +73,7 @@ def cli_main(
|
|||
exit(1)
|
||||
|
||||
# Ensure container is installed
|
||||
isolation_provider.install()
|
||||
dangerzone.isolation_provider.install()
|
||||
|
||||
# Convert the document
|
||||
print_header("Converting document to safe PDF")
|
||||
|
|
|
@ -110,11 +110,12 @@ class MainWindow(QtWidgets.QMainWindow):
|
|||
class InstallContainerThread(QtCore.QThread):
|
||||
finished = QtCore.Signal()
|
||||
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, dangerzone: DangerzoneGui) -> None:
|
||||
super(InstallContainerThread, self).__init__()
|
||||
self.dangerzone = dangerzone
|
||||
|
||||
def run(self) -> None:
|
||||
isolation_provider.install()
|
||||
self.dangerzone.isolation_provider.install()
|
||||
self.finished.emit()
|
||||
|
||||
|
||||
|
@ -166,7 +167,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
|||
state: Optional[str] = None
|
||||
|
||||
try:
|
||||
container_runtime = isolation_provider.get_runtime()
|
||||
container_runtime = self.dangerzone.isolation_provider.get_runtime()
|
||||
except isolation_provider.NoContainerTechException as e:
|
||||
log.error(str(e))
|
||||
state = "not_installed"
|
||||
|
@ -206,7 +207,7 @@ class WaitingWidget(QtWidgets.QWidget):
|
|||
"Installing the Dangerzone container image.<br><br>This might take a few minutes..."
|
||||
)
|
||||
self.buttons.hide()
|
||||
self.install_container_t = InstallContainerThread()
|
||||
self.install_container_t = InstallContainerThread(self.dangerzone)
|
||||
self.install_container_t.finished.connect(self.finished)
|
||||
self.install_container_t.start()
|
||||
|
||||
|
@ -624,14 +625,20 @@ class ConvertTask(QtCore.QObject):
|
|||
finished = QtCore.Signal(bool)
|
||||
update = QtCore.Signal(bool, str, int)
|
||||
|
||||
def __init__(self, document: Document, ocr_lang: str = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
dangerzone: DangerzoneGui,
|
||||
document: Document,
|
||||
ocr_lang: str = None,
|
||||
) -> None:
|
||||
super(ConvertTask, self).__init__()
|
||||
self.document = document
|
||||
self.ocr_lang = ocr_lang
|
||||
self.error = False
|
||||
self.dangerzone = dangerzone
|
||||
|
||||
def convert_document(self) -> None:
|
||||
isolation_provider.convert(
|
||||
self.dangerzone.isolation_provider.convert(
|
||||
self.document,
|
||||
self.ocr_lang,
|
||||
self.stdout_callback,
|
||||
|
@ -666,11 +673,13 @@ class DocumentsListWidget(QtWidgets.QListWidget):
|
|||
|
||||
def start_conversion(self) -> None:
|
||||
if not self.thread_pool_initized:
|
||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
||||
max_jobs = self.dangerzone.isolation_provider.get_max_parallel_conversions()
|
||||
self.thread_pool = ThreadPool(max_jobs)
|
||||
|
||||
for doc_widget in self.document_widgets:
|
||||
task = ConvertTask(doc_widget.document, self.get_ocr_lang())
|
||||
task = ConvertTask(
|
||||
self.dangerzone, doc_widget.document, self.get_ocr_lang()
|
||||
)
|
||||
task.update.connect(doc_widget.update_progress)
|
||||
task.finished.connect(doc_widget.all_done)
|
||||
self.thread_pool.apply_async(task.convert_document)
|
||||
|
|
|
@ -7,6 +7,7 @@ import platform
|
|||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
|
||||
import appdirs
|
||||
|
@ -15,8 +16,6 @@ from colorama import Fore, Style
|
|||
from .document import Document
|
||||
from .util import get_resource_path, get_subprocess_startupinfo
|
||||
|
||||
container_name = "dangerzone.rocks/dangerzone"
|
||||
|
||||
# Define startupinfo for subprocesses
|
||||
if platform.system() == "Windows":
|
||||
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
|
||||
|
@ -26,298 +25,329 @@ else:
|
|||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Name of the dangerzone container
|
||||
container_name = "dangerzone.rocks/dangerzone"
|
||||
|
||||
|
||||
class NoContainerTechException(Exception):
|
||||
def __init__(self, container_tech: str) -> None:
|
||||
super().__init__(f"{container_tech} is not installed")
|
||||
|
||||
|
||||
def get_runtime_name() -> str:
|
||||
if platform.system() == "Linux":
|
||||
runtime_name = "podman"
|
||||
else:
|
||||
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
|
||||
runtime_name = "docker"
|
||||
return runtime_name
|
||||
|
||||
|
||||
def get_runtime() -> str:
|
||||
container_tech = get_runtime_name()
|
||||
runtime = shutil.which(container_tech)
|
||||
if runtime is None:
|
||||
raise NoContainerTechException(container_tech)
|
||||
return runtime
|
||||
|
||||
|
||||
def install() -> bool:
|
||||
class AbstractIsolationProvider(ABC):
|
||||
"""
|
||||
Make sure the podman container is installed. Linux only.
|
||||
Abstracts an isolation provider
|
||||
"""
|
||||
if is_container_installed():
|
||||
|
||||
@abstractmethod
|
||||
def install(self) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def convert(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_max_parallel_conversions(self) -> int:
|
||||
pass
|
||||
|
||||
|
||||
class Container(AbstractIsolationProvider):
|
||||
|
||||
# Name of the dangerzone container
|
||||
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def get_runtime_name(self) -> str:
|
||||
if platform.system() == "Linux":
|
||||
runtime_name = "podman"
|
||||
else:
|
||||
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
|
||||
runtime_name = "docker"
|
||||
return runtime_name
|
||||
|
||||
def get_runtime(self) -> str:
|
||||
container_tech = self.get_runtime_name()
|
||||
runtime = shutil.which(container_tech)
|
||||
if runtime is None:
|
||||
raise NoContainerTechException(container_tech)
|
||||
return runtime
|
||||
|
||||
def install(self) -> bool:
|
||||
"""
|
||||
Make sure the podman container is installed. Linux only.
|
||||
"""
|
||||
if self.is_container_installed():
|
||||
return True
|
||||
|
||||
# Load the container into podman
|
||||
log.info("Installing Dangerzone container image...")
|
||||
|
||||
p = subprocess.Popen(
|
||||
[self.get_runtime(), "load"],
|
||||
stdin=subprocess.PIPE,
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
|
||||
chunk_size = 10240
|
||||
compressed_container_path = get_resource_path("container.tar.gz")
|
||||
with gzip.open(compressed_container_path) as f:
|
||||
while True:
|
||||
chunk = f.read(chunk_size)
|
||||
if len(chunk) > 0:
|
||||
if p.stdin:
|
||||
p.stdin.write(chunk)
|
||||
else:
|
||||
break
|
||||
p.communicate()
|
||||
|
||||
if not self.is_container_installed():
|
||||
log.error("Failed to install the container image")
|
||||
return False
|
||||
|
||||
log.info("Container image installed")
|
||||
return True
|
||||
|
||||
# Load the container into podman
|
||||
log.info("Installing Dangerzone container image...")
|
||||
def is_container_installed(self) -> bool:
|
||||
"""
|
||||
See if the podman container is installed. Linux only.
|
||||
"""
|
||||
# Get the image id
|
||||
with open(get_resource_path("image-id.txt")) as f:
|
||||
expected_image_id = f.read().strip()
|
||||
|
||||
p = subprocess.Popen(
|
||||
[get_runtime(), "load"],
|
||||
stdin=subprocess.PIPE,
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
|
||||
chunk_size = 10240
|
||||
compressed_container_path = get_resource_path("container.tar.gz")
|
||||
with gzip.open(compressed_container_path) as f:
|
||||
while True:
|
||||
chunk = f.read(chunk_size)
|
||||
if len(chunk) > 0:
|
||||
if p.stdin:
|
||||
p.stdin.write(chunk)
|
||||
else:
|
||||
break
|
||||
p.communicate()
|
||||
|
||||
if not is_container_installed():
|
||||
log.error("Failed to install the container image")
|
||||
return False
|
||||
|
||||
log.info("Container image installed")
|
||||
return True
|
||||
|
||||
|
||||
def is_container_installed() -> bool:
|
||||
"""
|
||||
See if the podman container is installed. Linux only.
|
||||
"""
|
||||
# Get the image id
|
||||
with open(get_resource_path("image-id.txt")) as f:
|
||||
expected_image_id = f.read().strip()
|
||||
|
||||
# See if this image is already installed
|
||||
installed = False
|
||||
found_image_id = subprocess.check_output(
|
||||
[
|
||||
get_runtime(),
|
||||
"image",
|
||||
"list",
|
||||
"--format",
|
||||
"{{.ID}}",
|
||||
container_name,
|
||||
],
|
||||
text=True,
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
found_image_id = found_image_id.strip()
|
||||
|
||||
if found_image_id == expected_image_id:
|
||||
installed = True
|
||||
elif found_image_id == "":
|
||||
pass
|
||||
else:
|
||||
log.info("Deleting old dangerzone container image")
|
||||
|
||||
try:
|
||||
subprocess.check_output(
|
||||
[get_runtime(), "rmi", "--force", found_image_id],
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
except:
|
||||
log.warning("Couldn't delete old container image, so leaving it there")
|
||||
|
||||
return installed
|
||||
|
||||
|
||||
def parse_progress(document: Document, line: str) -> Tuple[bool, str, int]:
|
||||
"""
|
||||
Parses a line returned by the container.
|
||||
"""
|
||||
try:
|
||||
status = json.loads(line)
|
||||
except:
|
||||
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
||||
log.error(error_message)
|
||||
return (True, error_message, -1)
|
||||
|
||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||
s += Fore.CYAN + f"{status['percentage']}% "
|
||||
if status["error"]:
|
||||
s += Style.RESET_ALL + Fore.RED + status["text"]
|
||||
log.error(s)
|
||||
else:
|
||||
s += Style.RESET_ALL + status["text"]
|
||||
log.info(s)
|
||||
|
||||
return (status["error"], status["text"], status["percentage"])
|
||||
|
||||
|
||||
def exec(
|
||||
document: Document,
|
||||
args: List[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> int:
|
||||
args_str = " ".join(pipes.quote(s) for s in args)
|
||||
log.info("> " + args_str)
|
||||
|
||||
with subprocess.Popen(
|
||||
args,
|
||||
stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
startupinfo=startupinfo,
|
||||
) as p:
|
||||
if p.stdout is not None:
|
||||
for line in p.stdout:
|
||||
(error, text, percentage) = parse_progress(document, line)
|
||||
if error:
|
||||
document.mark_as_failed()
|
||||
if percentage == 100.0:
|
||||
document.mark_as_safe()
|
||||
if stdout_callback:
|
||||
stdout_callback(error, text, percentage)
|
||||
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
|
||||
def exec_container(
|
||||
document: Document,
|
||||
command: List[str],
|
||||
extra_args: List[str] = [],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> int:
|
||||
container_runtime = get_runtime()
|
||||
|
||||
if get_runtime_name() == "podman":
|
||||
platform_args = []
|
||||
security_args = ["--security-opt", "no-new-privileges"]
|
||||
security_args += ["--userns", "keep-id"]
|
||||
else:
|
||||
platform_args = ["--platform", "linux/amd64"]
|
||||
security_args = ["--security-opt=no-new-privileges:true"]
|
||||
|
||||
# drop all linux kernel capabilities
|
||||
security_args += ["--cap-drop", "all"]
|
||||
user_args = ["-u", "dangerzone"]
|
||||
|
||||
prevent_leakage_args = ["--rm"]
|
||||
|
||||
args = (
|
||||
["run", "--network", "none"]
|
||||
+ platform_args
|
||||
+ user_args
|
||||
+ security_args
|
||||
+ prevent_leakage_args
|
||||
+ extra_args
|
||||
+ [container_name]
|
||||
+ command
|
||||
)
|
||||
|
||||
args = [container_runtime] + args
|
||||
return exec(document, args, stdout_callback)
|
||||
|
||||
|
||||
def convert(
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
success = False
|
||||
document.mark_as_converting()
|
||||
|
||||
if ocr_lang:
|
||||
ocr = "1"
|
||||
else:
|
||||
ocr = "0"
|
||||
|
||||
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
|
||||
os.makedirs(dz_tmp, exist_ok=True)
|
||||
|
||||
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
|
||||
pixel_dir = os.path.join(tmpdir.name, "pixels")
|
||||
safe_dir = os.path.join(tmpdir.name, "safe")
|
||||
os.makedirs(pixel_dir, exist_ok=True)
|
||||
os.makedirs(safe_dir, exist_ok=True)
|
||||
|
||||
# Convert document to pixels
|
||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "document-to-pixels"]
|
||||
extra_args = [
|
||||
"-v",
|
||||
f"{document.input_filename}:/tmp/input_file",
|
||||
"-v",
|
||||
f"{pixel_dir}:/dangerzone",
|
||||
]
|
||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
||||
if ret != 0:
|
||||
log.error("documents-to-pixels failed")
|
||||
else:
|
||||
# TODO: validate convert to pixels output
|
||||
|
||||
# Convert pixels to safe PDF
|
||||
command = ["/usr/bin/python3", "/usr/local/bin/dangerzone.py", "pixels-to-pdf"]
|
||||
extra_args = [
|
||||
"-v",
|
||||
f"{pixel_dir}:/dangerzone",
|
||||
"-v",
|
||||
f"{safe_dir}:/safezone",
|
||||
"-e",
|
||||
f"OCR={ocr}",
|
||||
"-e",
|
||||
f"OCR_LANGUAGE={ocr_lang}",
|
||||
]
|
||||
ret = exec_container(document, command, extra_args, stdout_callback)
|
||||
if ret != 0:
|
||||
log.error("pixels-to-pdf failed")
|
||||
else:
|
||||
# Move the final file to the right place
|
||||
if os.path.exists(document.output_filename):
|
||||
os.remove(document.output_filename)
|
||||
|
||||
container_output_filename = os.path.join(
|
||||
safe_dir, "safe-output-compressed.pdf"
|
||||
)
|
||||
shutil.move(container_output_filename, document.output_filename)
|
||||
|
||||
if document.archive_after_conversion:
|
||||
document.archive()
|
||||
|
||||
# We did it
|
||||
success = True
|
||||
|
||||
# Clean up
|
||||
tmpdir.cleanup()
|
||||
|
||||
return success
|
||||
|
||||
|
||||
def get_max_parallel_conversions() -> int:
|
||||
|
||||
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
||||
# https://github.com/freedomofpress/dangerzone/issues/257
|
||||
return 1
|
||||
|
||||
n_cpu = 1 # type: ignore [unreachable]
|
||||
if platform.system() == "Linux":
|
||||
# if on linux containers run natively
|
||||
cpu_count = os.cpu_count()
|
||||
if cpu_count is not None:
|
||||
n_cpu = cpu_count
|
||||
|
||||
elif get_runtime_name() == "docker":
|
||||
# For Windows and MacOS containers run in VM
|
||||
# So we obtain the CPU count for the VM
|
||||
n_cpu_str = subprocess.check_output(
|
||||
[get_runtime(), "info", "--format", "{{.NCPU}}"],
|
||||
# See if this image is already installed
|
||||
installed = False
|
||||
found_image_id = subprocess.check_output(
|
||||
[
|
||||
self.get_runtime(),
|
||||
"image",
|
||||
"list",
|
||||
"--format",
|
||||
"{{.ID}}",
|
||||
self.CONTAINER_NAME,
|
||||
],
|
||||
text=True,
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
n_cpu = int(n_cpu_str.strip())
|
||||
found_image_id = found_image_id.strip()
|
||||
|
||||
return 2 * n_cpu + 1
|
||||
if found_image_id == expected_image_id:
|
||||
installed = True
|
||||
elif found_image_id == "":
|
||||
pass
|
||||
else:
|
||||
log.info("Deleting old dangerzone container image")
|
||||
|
||||
try:
|
||||
subprocess.check_output(
|
||||
[self.get_runtime(), "rmi", "--force", found_image_id],
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
except:
|
||||
log.warning("Couldn't delete old container image, so leaving it there")
|
||||
|
||||
return installed
|
||||
|
||||
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
|
||||
"""
|
||||
Parses a line returned by the container.
|
||||
"""
|
||||
try:
|
||||
status = json.loads(line)
|
||||
except:
|
||||
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
||||
log.error(error_message)
|
||||
return (True, error_message, -1)
|
||||
|
||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||
s += Fore.CYAN + f"{status['percentage']}% "
|
||||
if status["error"]:
|
||||
s += Style.RESET_ALL + Fore.RED + status["text"]
|
||||
log.error(s)
|
||||
else:
|
||||
s += Style.RESET_ALL + status["text"]
|
||||
log.info(s)
|
||||
|
||||
return (status["error"], status["text"], status["percentage"])
|
||||
|
||||
def exec(
|
||||
self,
|
||||
document: Document,
|
||||
args: List[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> int:
|
||||
args_str = " ".join(pipes.quote(s) for s in args)
|
||||
log.info("> " + args_str)
|
||||
|
||||
with subprocess.Popen(
|
||||
args,
|
||||
stdin=None,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
bufsize=1,
|
||||
universal_newlines=True,
|
||||
startupinfo=startupinfo,
|
||||
) as p:
|
||||
if p.stdout is not None:
|
||||
for line in p.stdout:
|
||||
(error, text, percentage) = self.parse_progress(document, line)
|
||||
if error:
|
||||
document.mark_as_failed()
|
||||
if percentage == 100.0:
|
||||
document.mark_as_safe()
|
||||
if stdout_callback:
|
||||
stdout_callback(error, text, percentage)
|
||||
|
||||
p.communicate()
|
||||
return p.returncode
|
||||
|
||||
def exec_container(
|
||||
self,
|
||||
document: Document,
|
||||
command: List[str],
|
||||
extra_args: List[str] = [],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> int:
|
||||
container_runtime = self.get_runtime()
|
||||
|
||||
if self.get_runtime_name() == "podman":
|
||||
platform_args = []
|
||||
security_args = ["--security-opt", "no-new-privileges"]
|
||||
security_args += ["--userns", "keep-id"]
|
||||
else:
|
||||
platform_args = ["--platform", "linux/amd64"]
|
||||
security_args = ["--security-opt=no-new-privileges:true"]
|
||||
|
||||
# drop all linux kernel capabilities
|
||||
security_args += ["--cap-drop", "all"]
|
||||
user_args = ["-u", "dangerzone"]
|
||||
|
||||
prevent_leakage_args = ["--rm"]
|
||||
|
||||
args = (
|
||||
["run", "--network", "none"]
|
||||
+ platform_args
|
||||
+ user_args
|
||||
+ security_args
|
||||
+ prevent_leakage_args
|
||||
+ extra_args
|
||||
+ [self.CONTAINER_NAME]
|
||||
+ command
|
||||
)
|
||||
|
||||
args = [container_runtime] + args
|
||||
return self.exec(document, args, stdout_callback)
|
||||
|
||||
def convert(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
success = False
|
||||
document.mark_as_converting()
|
||||
|
||||
if ocr_lang:
|
||||
ocr = "1"
|
||||
else:
|
||||
ocr = "0"
|
||||
|
||||
dz_tmp = os.path.join(appdirs.user_config_dir("dangerzone"), "tmp")
|
||||
os.makedirs(dz_tmp, exist_ok=True)
|
||||
|
||||
tmpdir = tempfile.TemporaryDirectory(dir=dz_tmp)
|
||||
pixel_dir = os.path.join(tmpdir.name, "pixels")
|
||||
safe_dir = os.path.join(tmpdir.name, "safe")
|
||||
os.makedirs(pixel_dir, exist_ok=True)
|
||||
os.makedirs(safe_dir, exist_ok=True)
|
||||
|
||||
# Convert document to pixels
|
||||
command = [
|
||||
"/usr/bin/python3",
|
||||
"/usr/local/bin/dangerzone.py",
|
||||
"document-to-pixels",
|
||||
]
|
||||
extra_args = [
|
||||
"-v",
|
||||
f"{document.input_filename}:/tmp/input_file",
|
||||
"-v",
|
||||
f"{pixel_dir}:/dangerzone",
|
||||
]
|
||||
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||
if ret != 0:
|
||||
log.error("documents-to-pixels failed")
|
||||
else:
|
||||
# TODO: validate convert to pixels output
|
||||
|
||||
# Convert pixels to safe PDF
|
||||
command = [
|
||||
"/usr/bin/python3",
|
||||
"/usr/local/bin/dangerzone.py",
|
||||
"pixels-to-pdf",
|
||||
]
|
||||
extra_args = [
|
||||
"-v",
|
||||
f"{pixel_dir}:/dangerzone",
|
||||
"-v",
|
||||
f"{safe_dir}:/safezone",
|
||||
"-e",
|
||||
f"OCR={ocr}",
|
||||
"-e",
|
||||
f"OCR_LANGUAGE={ocr_lang}",
|
||||
]
|
||||
ret = self.exec_container(document, command, extra_args, stdout_callback)
|
||||
if ret != 0:
|
||||
log.error("pixels-to-pdf failed")
|
||||
else:
|
||||
# Move the final file to the right place
|
||||
if os.path.exists(document.output_filename):
|
||||
os.remove(document.output_filename)
|
||||
|
||||
container_output_filename = os.path.join(
|
||||
safe_dir, "safe-output-compressed.pdf"
|
||||
)
|
||||
shutil.move(container_output_filename, document.output_filename)
|
||||
|
||||
if document.archive_after_conversion:
|
||||
document.archive()
|
||||
|
||||
# We did it
|
||||
success = True
|
||||
|
||||
# Clean up
|
||||
tmpdir.cleanup()
|
||||
|
||||
return success
|
||||
|
||||
def get_max_parallel_conversions(self) -> int:
|
||||
|
||||
# FIXME hardcoded 1 until timeouts are more limited and better handled
|
||||
# https://github.com/freedomofpress/dangerzone/issues/257
|
||||
return 1
|
||||
|
||||
n_cpu = 1 # type: ignore [unreachable]
|
||||
if platform.system() == "Linux":
|
||||
# if on linux containers run natively
|
||||
cpu_count = os.cpu_count()
|
||||
if cpu_count is not None:
|
||||
n_cpu = cpu_count
|
||||
|
||||
elif self.get_runtime_name() == "docker":
|
||||
# For Windows and MacOS containers run in VM
|
||||
# So we obtain the CPU count for the VM
|
||||
n_cpu_str = subprocess.check_output(
|
||||
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
|
||||
text=True,
|
||||
startupinfo=get_subprocess_startupinfo(),
|
||||
)
|
||||
n_cpu = int(n_cpu_str.strip())
|
||||
|
||||
return 2 * n_cpu + 1
|
||||
|
||||
|
||||
# From global_common:
|
||||
|
|
|
@ -41,6 +41,8 @@ class DangerzoneCore(object):
|
|||
|
||||
self.documents: List[Document] = []
|
||||
|
||||
self.isolation_provider = isolation_provider.Container()
|
||||
|
||||
def add_document_from_filename(
|
||||
self,
|
||||
input_filename: str,
|
||||
|
@ -59,13 +61,13 @@ class DangerzoneCore(object):
|
|||
self, ocr_lang: Optional[str], stdout_callback: Optional[Callable] = None
|
||||
) -> None:
|
||||
def convert_doc(document: Document) -> None:
|
||||
success = isolation_provider.convert(
|
||||
success = self.isolation_provider.convert(
|
||||
document,
|
||||
ocr_lang,
|
||||
stdout_callback,
|
||||
)
|
||||
|
||||
max_jobs = isolation_provider.get_max_parallel_conversions()
|
||||
max_jobs = self.isolation_provider.get_max_parallel_conversions()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=max_jobs) as executor:
|
||||
executor.map(convert_doc, self.documents)
|
||||
|
||||
|
|
Loading…
Reference in a new issue