mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 10:12:38 +02:00
Add Qubes isolation provider
Add an isolation provider for Qubes, that performs the document conversion as follows: Document to pixels phase ------------------------ 1. Starts a disposable qube by calling either the dz.Convert or the dz.ConvertDev RPC call, depending on the execution context. 2. Sends the file to disposable qube through its stdin. * If we call the conversion from the development environment, also pass the conversion module as a Python zipfile, before the suspicious document. 3. Reads the number of pages, their dimensions, and the page data. Pixels to PDF phase ------------------- 1. Writes the page data under /tmp/dangerzone, so that the `pixels_to_pdf` module can read them. 2. Pass OCR parameters as envvars. 3. Call the `pixels_to_pdf` main function, as if it was running within a container. Wait until the PDF gets created. 4. Move the resulting PDF to the proper directory. Fixes #414
This commit is contained in:
parent
c194606550
commit
baeab9d7eb
1 changed files with 181 additions and 0 deletions
181
dangerzone/isolation_provider/qubes.py
Normal file
181
dangerzone/isolation_provider/qubes.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
import asyncio
|
||||
import glob
|
||||
import inspect
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import IO, Callable, Optional
|
||||
|
||||
from ..document import Document
|
||||
from ..util import get_resource_path
|
||||
from .base import IsolationProvider
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
from ..conversion.pixels_to_pdf import PixelsToPDF
|
||||
from ..util import get_subprocess_startupinfo, get_tmp_dir
|
||||
|
||||
CONVERTED_FILE_PATH = (
|
||||
# FIXME won't work for parallel conversions (see #454)
|
||||
"/tmp/safe-output-compressed.pdf"
|
||||
)
|
||||
|
||||
|
||||
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
|
||||
"""Read bytes from stdout."""
|
||||
return p.stdout.read(buff_size) # type: ignore [union-attr]
|
||||
|
||||
|
||||
def read_int(p: subprocess.Popen) -> int:
|
||||
"""Read 2 bytes from stdout, and decode them as int."""
|
||||
untrusted_int = p.stdout.read(2) # type: ignore [union-attr]
|
||||
return int.from_bytes(untrusted_int, signed=False)
|
||||
|
||||
|
||||
class Qubes(IsolationProvider):
|
||||
"""Uses a disposable qube for performing the conversion"""
|
||||
|
||||
def install(self) -> bool:
|
||||
return True
|
||||
|
||||
def _convert(
|
||||
self,
|
||||
document: Document,
|
||||
ocr_lang: Optional[str],
|
||||
stdout_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
success = False
|
||||
|
||||
# FIXME won't work on windows, nor with multi-conversion
|
||||
out_dir = Path("/tmp/dangerzone")
|
||||
if out_dir.exists():
|
||||
shutil.rmtree(out_dir)
|
||||
out_dir.mkdir()
|
||||
|
||||
# Reset hard-coded state
|
||||
if os.path.exists(CONVERTED_FILE_PATH):
|
||||
os.remove(CONVERTED_FILE_PATH)
|
||||
|
||||
percentage = 0.0
|
||||
|
||||
with open(document.input_filename, "rb") as f:
|
||||
# TODO handle lack of memory to start qube
|
||||
if getattr(sys, "dangerzone_dev", False):
|
||||
# Use dz.ConvertDev RPC call instead, if we are in development mode.
|
||||
# Basically, the change is that we also transfer the necessary Python
|
||||
# code as a zipfile, before sending the doc that the user requested.
|
||||
p = subprocess.Popen(
|
||||
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.ConvertDev"],
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
assert p.stdin is not None
|
||||
|
||||
# Send the dangerzone module first.
|
||||
self.teleport_dz_module(p.stdin)
|
||||
|
||||
# Finally, send the document, as in the normal case.
|
||||
p.stdin.write(f.read())
|
||||
p.stdin.close()
|
||||
else:
|
||||
p = subprocess.Popen(
|
||||
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.Convert"],
|
||||
stdin=f,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
|
||||
n_pages = read_int(p)
|
||||
if n_pages == 0:
|
||||
# FIXME: Fail loudly in that case
|
||||
return False
|
||||
if ocr_lang:
|
||||
percentage_per_page = 50.0 / n_pages
|
||||
else:
|
||||
percentage_per_page = 100.0 / n_pages
|
||||
for page in range(1, n_pages + 1):
|
||||
# TODO handle too width > MAX_PAGE_WIDTH
|
||||
# TODO handle too big height > MAX_PAGE_HEIGHT
|
||||
|
||||
width = read_int(p)
|
||||
height = read_int(p)
|
||||
untrusted_pixels = read_bytes(
|
||||
p, width * height * 3
|
||||
) # three color channels
|
||||
|
||||
# Wrapper code
|
||||
with open(f"/tmp/dangerzone/page-{page}.width", "w") as f_width:
|
||||
f_width.write(str(width))
|
||||
with open(f"/tmp/dangerzone/page-{page}.height", "w") as f_height:
|
||||
f_height.write(str(height))
|
||||
with open(f"/tmp/dangerzone/page-{page}.rgb", "wb") as f_rgb:
|
||||
f_rgb.write(untrusted_pixels)
|
||||
|
||||
percentage += percentage_per_page
|
||||
|
||||
text = f"Converting page {page}/{n_pages} to pixels"
|
||||
self.print_progress(document, False, text, percentage)
|
||||
if stdout_callback:
|
||||
stdout_callback(False, text, percentage)
|
||||
|
||||
# TODO handle leftover code input
|
||||
text = "Converted document to pixels"
|
||||
self.print_progress(document, False, text, percentage)
|
||||
if stdout_callback:
|
||||
stdout_callback(False, text, percentage)
|
||||
|
||||
# FIXME pass OCR stuff properly (see #455)
|
||||
old_environ = dict(os.environ)
|
||||
if ocr_lang:
|
||||
os.environ["OCR"] = "1"
|
||||
os.environ["OCR_LANGUAGE"] = ocr_lang
|
||||
|
||||
asyncio.run(
|
||||
PixelsToPDF().convert()
|
||||
) # TODO add progress updates on second stage
|
||||
|
||||
percentage = 100.0
|
||||
text = "Safe PDF created"
|
||||
self.print_progress(document, False, text, percentage)
|
||||
if stdout_callback:
|
||||
stdout_callback(False, text, percentage)
|
||||
|
||||
# FIXME remove once the OCR args are no longer passed with env vars
|
||||
os.environ.clear()
|
||||
os.environ.update(old_environ)
|
||||
|
||||
shutil.move(CONVERTED_FILE_PATH, document.output_filename)
|
||||
success = True
|
||||
|
||||
return success
|
||||
|
||||
def get_max_parallel_conversions(self) -> int:
|
||||
return 1
|
||||
|
||||
def teleport_dz_module(self, wpipe: IO[bytes]) -> None:
|
||||
"""Send the dangerzone module to another qube, as a zipfile."""
|
||||
# Grab the absolute file path of the dangerzone module.
|
||||
import dangerzone.conversion as _conv
|
||||
|
||||
_conv_path = Path(inspect.getfile(_conv)).parent
|
||||
temp_file = io.BytesIO()
|
||||
|
||||
# Create a Python zipfile that contains all the files of the dangerzone module.
|
||||
with zipfile.PyZipFile(temp_file, "w") as z:
|
||||
z.mkdir("dangerzone/")
|
||||
z.writestr("dangerzone/__init__.py", "")
|
||||
z.writepy(str(_conv_path), basename="dangerzone/")
|
||||
|
||||
# Send the following data:
|
||||
# 1. The size of the Python zipfile, so that the server can know when to
|
||||
# stop.
|
||||
# 2. The Python zipfile itself.
|
||||
bufsize_bytes = len(temp_file.getvalue()).to_bytes(4)
|
||||
wpipe.write(bufsize_bytes)
|
||||
wpipe.write(temp_file.getvalue())
|
Loading…
Reference in a new issue