mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
Add Qubes isolation provider
Add an isolation provider for Qubes, that performs the document conversion as follows: Document to pixels phase ------------------------ 1. Starts a disposable qube by calling either the dz.Convert or the dz.ConvertDev RPC call, depending on the execution context. 2. Sends the file to disposable qube through its stdin. * If we call the conversion from the development environment, also pass the conversion module as a Python zipfile, before the suspicious document. 3. Reads the number of pages, their dimensions, and the page data. Pixels to PDF phase ------------------- 1. Writes the page data under /tmp/dangerzone, so that the `pixels_to_pdf` module can read them. 2. Pass OCR parameters as envvars. 3. Call the `pixels_to_pdf` main function, as if it was running within a container. Wait until the PDF gets created. 4. Move the resulting PDF to the proper directory. Fixes #414
This commit is contained in:
parent
c194606550
commit
baeab9d7eb
1 changed files with 181 additions and 0 deletions
181
dangerzone/isolation_provider/qubes.py
Normal file
181
dangerzone/isolation_provider/qubes.py
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
import asyncio
|
||||||
|
import glob
|
||||||
|
import inspect
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import IO, Callable, Optional
|
||||||
|
|
||||||
|
from ..document import Document
|
||||||
|
from ..util import get_resource_path
|
||||||
|
from .base import IsolationProvider
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
from ..conversion.pixels_to_pdf import PixelsToPDF
|
||||||
|
from ..util import get_subprocess_startupinfo, get_tmp_dir
|
||||||
|
|
||||||
|
CONVERTED_FILE_PATH = (
|
||||||
|
# FIXME won't work for parallel conversions (see #454)
|
||||||
|
"/tmp/safe-output-compressed.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
|
||||||
|
"""Read bytes from stdout."""
|
||||||
|
return p.stdout.read(buff_size) # type: ignore [union-attr]
|
||||||
|
|
||||||
|
|
||||||
|
def read_int(p: subprocess.Popen) -> int:
|
||||||
|
"""Read 2 bytes from stdout, and decode them as int."""
|
||||||
|
untrusted_int = p.stdout.read(2) # type: ignore [union-attr]
|
||||||
|
return int.from_bytes(untrusted_int, signed=False)
|
||||||
|
|
||||||
|
|
||||||
|
class Qubes(IsolationProvider):
|
||||||
|
"""Uses a disposable qube for performing the conversion"""
|
||||||
|
|
||||||
|
def install(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _convert(
|
||||||
|
self,
|
||||||
|
document: Document,
|
||||||
|
ocr_lang: Optional[str],
|
||||||
|
stdout_callback: Optional[Callable] = None,
|
||||||
|
) -> bool:
|
||||||
|
success = False
|
||||||
|
|
||||||
|
# FIXME won't work on windows, nor with multi-conversion
|
||||||
|
out_dir = Path("/tmp/dangerzone")
|
||||||
|
if out_dir.exists():
|
||||||
|
shutil.rmtree(out_dir)
|
||||||
|
out_dir.mkdir()
|
||||||
|
|
||||||
|
# Reset hard-coded state
|
||||||
|
if os.path.exists(CONVERTED_FILE_PATH):
|
||||||
|
os.remove(CONVERTED_FILE_PATH)
|
||||||
|
|
||||||
|
percentage = 0.0
|
||||||
|
|
||||||
|
with open(document.input_filename, "rb") as f:
|
||||||
|
# TODO handle lack of memory to start qube
|
||||||
|
if getattr(sys, "dangerzone_dev", False):
|
||||||
|
# Use dz.ConvertDev RPC call instead, if we are in development mode.
|
||||||
|
# Basically, the change is that we also transfer the necessary Python
|
||||||
|
# code as a zipfile, before sending the doc that the user requested.
|
||||||
|
p = subprocess.Popen(
|
||||||
|
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.ConvertDev"],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
assert p.stdin is not None
|
||||||
|
|
||||||
|
# Send the dangerzone module first.
|
||||||
|
self.teleport_dz_module(p.stdin)
|
||||||
|
|
||||||
|
# Finally, send the document, as in the normal case.
|
||||||
|
p.stdin.write(f.read())
|
||||||
|
p.stdin.close()
|
||||||
|
else:
|
||||||
|
p = subprocess.Popen(
|
||||||
|
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.Convert"],
|
||||||
|
stdin=f,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
n_pages = read_int(p)
|
||||||
|
if n_pages == 0:
|
||||||
|
# FIXME: Fail loudly in that case
|
||||||
|
return False
|
||||||
|
if ocr_lang:
|
||||||
|
percentage_per_page = 50.0 / n_pages
|
||||||
|
else:
|
||||||
|
percentage_per_page = 100.0 / n_pages
|
||||||
|
for page in range(1, n_pages + 1):
|
||||||
|
# TODO handle too width > MAX_PAGE_WIDTH
|
||||||
|
# TODO handle too big height > MAX_PAGE_HEIGHT
|
||||||
|
|
||||||
|
width = read_int(p)
|
||||||
|
height = read_int(p)
|
||||||
|
untrusted_pixels = read_bytes(
|
||||||
|
p, width * height * 3
|
||||||
|
) # three color channels
|
||||||
|
|
||||||
|
# Wrapper code
|
||||||
|
with open(f"/tmp/dangerzone/page-{page}.width", "w") as f_width:
|
||||||
|
f_width.write(str(width))
|
||||||
|
with open(f"/tmp/dangerzone/page-{page}.height", "w") as f_height:
|
||||||
|
f_height.write(str(height))
|
||||||
|
with open(f"/tmp/dangerzone/page-{page}.rgb", "wb") as f_rgb:
|
||||||
|
f_rgb.write(untrusted_pixels)
|
||||||
|
|
||||||
|
percentage += percentage_per_page
|
||||||
|
|
||||||
|
text = f"Converting page {page}/{n_pages} to pixels"
|
||||||
|
self.print_progress(document, False, text, percentage)
|
||||||
|
if stdout_callback:
|
||||||
|
stdout_callback(False, text, percentage)
|
||||||
|
|
||||||
|
# TODO handle leftover code input
|
||||||
|
text = "Converted document to pixels"
|
||||||
|
self.print_progress(document, False, text, percentage)
|
||||||
|
if stdout_callback:
|
||||||
|
stdout_callback(False, text, percentage)
|
||||||
|
|
||||||
|
# FIXME pass OCR stuff properly (see #455)
|
||||||
|
old_environ = dict(os.environ)
|
||||||
|
if ocr_lang:
|
||||||
|
os.environ["OCR"] = "1"
|
||||||
|
os.environ["OCR_LANGUAGE"] = ocr_lang
|
||||||
|
|
||||||
|
asyncio.run(
|
||||||
|
PixelsToPDF().convert()
|
||||||
|
) # TODO add progress updates on second stage
|
||||||
|
|
||||||
|
percentage = 100.0
|
||||||
|
text = "Safe PDF created"
|
||||||
|
self.print_progress(document, False, text, percentage)
|
||||||
|
if stdout_callback:
|
||||||
|
stdout_callback(False, text, percentage)
|
||||||
|
|
||||||
|
# FIXME remove once the OCR args are no longer passed with env vars
|
||||||
|
os.environ.clear()
|
||||||
|
os.environ.update(old_environ)
|
||||||
|
|
||||||
|
shutil.move(CONVERTED_FILE_PATH, document.output_filename)
|
||||||
|
success = True
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
def get_max_parallel_conversions(self) -> int:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def teleport_dz_module(self, wpipe: IO[bytes]) -> None:
|
||||||
|
"""Send the dangerzone module to another qube, as a zipfile."""
|
||||||
|
# Grab the absolute file path of the dangerzone module.
|
||||||
|
import dangerzone.conversion as _conv
|
||||||
|
|
||||||
|
_conv_path = Path(inspect.getfile(_conv)).parent
|
||||||
|
temp_file = io.BytesIO()
|
||||||
|
|
||||||
|
# Create a Python zipfile that contains all the files of the dangerzone module.
|
||||||
|
with zipfile.PyZipFile(temp_file, "w") as z:
|
||||||
|
z.mkdir("dangerzone/")
|
||||||
|
z.writestr("dangerzone/__init__.py", "")
|
||||||
|
z.writepy(str(_conv_path), basename="dangerzone/")
|
||||||
|
|
||||||
|
# Send the following data:
|
||||||
|
# 1. The size of the Python zipfile, so that the server can know when to
|
||||||
|
# stop.
|
||||||
|
# 2. The Python zipfile itself.
|
||||||
|
bufsize_bytes = len(temp_file.getvalue()).to_bytes(4)
|
||||||
|
wpipe.write(bufsize_bytes)
|
||||||
|
wpipe.write(temp_file.getvalue())
|
Loading…
Reference in a new issue