dangerzone/dangerzone/isolation_provider/container.py
Alex Pyrgiotis 2042591964
container: Copy files before mounting them
Copy input files in a temporary dir before mounting them, thereby
changing their permissions, without affecting the original files. This
way, we can avoid cases where a file is accessible to the user only due
to a supplemental user group, which does not work for containers.

Fixes #157
Fixes #260
Fixes #335
2023-02-17 01:15:08 +02:00

342 lines
11 KiB
Python

import gzip
import json
import logging
import os
import pathlib
import pipes
import platform
import shutil
import subprocess
import tempfile
from typing import Callable, List, Optional, Tuple
from ..document import Document
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
from .base import IsolationProvider
# Define startupinfo for subprocesses
if platform.system() == "Windows":
startupinfo = subprocess.STARTUPINFO() # type: ignore [attr-defined]
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW # type: ignore [attr-defined]
else:
startupinfo = None
log = logging.getLogger(__name__)
class NoContainerTechException(Exception):
def __init__(self, container_tech: str) -> None:
super().__init__(f"{container_tech} is not installed")
class Container(IsolationProvider):
# Name of the dangerzone container
CONTAINER_NAME = "dangerzone.rocks/dangerzone"
def __init__(self, enable_timeouts: bool) -> None:
self.enable_timeouts = 1 if enable_timeouts else 0
super().__init__()
@staticmethod
def get_runtime_name() -> str:
if platform.system() == "Linux":
runtime_name = "podman"
else:
# Windows, Darwin, and unknown use docker for now, dangerzone-vm eventually
runtime_name = "docker"
return runtime_name
@staticmethod
def get_runtime() -> str:
container_tech = Container.get_runtime_name()
runtime = shutil.which(container_tech)
if runtime is None:
raise NoContainerTechException(container_tech)
return runtime
@staticmethod
def install() -> bool:
"""
Make sure the podman container is installed. Linux only.
"""
if Container.is_container_installed():
return True
# Load the container into podman
log.info("Installing Dangerzone container image...")
p = subprocess.Popen(
[Container.get_runtime(), "load"],
stdin=subprocess.PIPE,
startupinfo=get_subprocess_startupinfo(),
)
chunk_size = 10240
compressed_container_path = get_resource_path("container.tar.gz")
with gzip.open(compressed_container_path) as f:
while True:
chunk = f.read(chunk_size)
if len(chunk) > 0:
if p.stdin:
p.stdin.write(chunk)
else:
break
p.communicate()
if not Container.is_container_installed():
log.error("Failed to install the container image")
return False
log.info("Container image installed")
return True
@staticmethod
def is_container_installed() -> bool:
"""
See if the podman container is installed. Linux only.
"""
# Get the image id
with open(get_resource_path("image-id.txt")) as f:
expected_image_id = f.read().strip()
# See if this image is already installed
installed = False
found_image_id = subprocess.check_output(
[
Container.get_runtime(),
"image",
"list",
"--format",
"{{.ID}}",
Container.CONTAINER_NAME,
],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
found_image_id = found_image_id.strip()
if found_image_id == expected_image_id:
installed = True
elif found_image_id == "":
pass
else:
log.info("Deleting old dangerzone container image")
try:
subprocess.check_output(
[Container.get_runtime(), "rmi", "--force", found_image_id],
startupinfo=get_subprocess_startupinfo(),
)
except:
log.warning("Couldn't delete old container image, so leaving it there")
return installed
def parse_progress(self, document: Document, line: str) -> Tuple[bool, str, int]:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
return (True, error_message, -1)
self.print_progress(
document, status["error"], status["text"], status["percentage"]
)
return (status["error"], status["text"], status["percentage"])
def exec(
self,
document: Document,
args: List[str],
stdout_callback: Optional[Callable] = None,
) -> int:
args_str = " ".join(pipes.quote(s) for s in args)
log.info("> " + args_str)
with subprocess.Popen(
args,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
bufsize=1,
universal_newlines=True,
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
(error, text, percentage) = self.parse_progress(document, line)
if stdout_callback:
stdout_callback(error, text, percentage)
p.communicate()
return p.returncode
def exec_container(
self,
document: Document,
command: List[str],
extra_args: List[str] = [],
stdout_callback: Optional[Callable] = None,
) -> int:
container_runtime = self.get_runtime()
if self.get_runtime_name() == "podman":
security_args = ["--security-opt", "no-new-privileges"]
security_args += ["--userns", "keep-id"]
else:
security_args = ["--security-opt=no-new-privileges:true"]
# drop all linux kernel capabilities
security_args += ["--cap-drop", "all"]
user_args = ["-u", "dangerzone"]
prevent_leakage_args = ["--rm"]
args = (
["run", "--network", "none"]
+ user_args
+ security_args
+ prevent_leakage_args
+ extra_args
+ [self.CONTAINER_NAME]
+ command
)
args = [container_runtime] + args
return self.exec(document, args, stdout_callback)
def _convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
# Create a temporary directory inside the cache directory for this run. Then,
# create some subdirectories for the various stages of the file conversion:
#
# * unsafe: Where the input file will be copied
# * pixel: Where the RGB data will be stored
# * safe: Where the final PDF file will be stored
with tempfile.TemporaryDirectory(dir=get_tmp_dir()) as t:
tmp_dir = pathlib.Path(t)
unsafe_dir = tmp_dir / "unsafe"
unsafe_dir.mkdir()
pixel_dir = tmp_dir / "pixels"
pixel_dir.mkdir()
safe_dir = tmp_dir / "safe"
safe_dir.mkdir()
return self._convert_with_tmpdirs(
document=document,
unsafe_dir=unsafe_dir,
pixel_dir=pixel_dir,
safe_dir=safe_dir,
ocr_lang=ocr_lang,
stdout_callback=stdout_callback,
)
def _convert_with_tmpdirs(
self,
document: Document,
unsafe_dir: pathlib.Path,
pixel_dir: pathlib.Path,
safe_dir: pathlib.Path,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
if ocr_lang:
ocr = "1"
else:
ocr = "0"
copied_file = unsafe_dir / "input_file"
shutil.copyfile(f"{document.input_filename}", copied_file)
# Convert document to pixels
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"document-to-pixels",
]
extra_args = [
"-v",
f"{copied_file}:/tmp/input_file:Z",
"-v",
f"{pixel_dir}:/dangerzone:Z",
"-e",
f"ENABLE_TIMEOUTS={self.enable_timeouts}",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("documents-to-pixels failed")
else:
# TODO: validate convert to pixels output
# Convert pixels to safe PDF
command = [
"/usr/bin/python3",
"/usr/local/bin/dangerzone.py",
"pixels-to-pdf",
]
extra_args = [
"-v",
f"{pixel_dir}:/dangerzone:Z",
"-v",
f"{safe_dir}:/safezone:Z",
"-e",
f"OCR={ocr}",
"-e",
f"OCR_LANGUAGE={ocr_lang}",
"-e",
f"ENABLE_TIMEOUTS={self.enable_timeouts}",
]
ret = self.exec_container(document, command, extra_args, stdout_callback)
if ret != 0:
log.error("pixels-to-pdf failed")
else:
# Move the final file to the right place
if os.path.exists(document.output_filename):
os.remove(document.output_filename)
container_output_filename = os.path.join(
safe_dir, "safe-output-compressed.pdf"
)
shutil.move(container_output_filename, document.output_filename)
# We did it
success = True
return success
def get_max_parallel_conversions(self) -> int:
# FIXME hardcoded 1 until timeouts are more limited and better handled
# https://github.com/freedomofpress/dangerzone/issues/257
return 1
n_cpu = 1 # type: ignore [unreachable]
if platform.system() == "Linux":
# if on linux containers run natively
cpu_count = os.cpu_count()
if cpu_count is not None:
n_cpu = cpu_count
elif self.get_runtime_name() == "docker":
# For Windows and MacOS containers run in VM
# So we obtain the CPU count for the VM
n_cpu_str = subprocess.check_output(
[self.get_runtime(), "info", "--format", "{{.NCPU}}"],
text=True,
startupinfo=get_subprocess_startupinfo(),
)
n_cpu = int(n_cpu_str.strip())
return 2 * n_cpu + 1