Add Qubes isolation provider

Add an isolation provider for Qubes, that performs the document
conversion as follows:

Document to pixels phase
------------------------

1. Starts a disposable qube by calling either the dz.Convert or the
   dz.ConvertDev RPC call, depending on the execution context.
2. Sends the file to disposable qube through its stdin.
   * If we call the conversion from the development environment, also
     pass the conversion module as a Python zipfile, before the
     suspicious document.
3. Reads the number of pages, their dimensions, and the page data.

Pixels to PDF phase
-------------------

1. Writes the page data under /tmp/dangerzone, so that the
   `pixels_to_pdf` module can read them.
2. Pass OCR parameters as envvars.
3. Call the `pixels_to_pdf` main function, as if it was running within a
   container. Wait until the PDF gets created.
4. Move the resulting PDF to the proper directory.

Fixes #414
This commit is contained in:
deeplow 2023-06-20 19:50:07 +03:00 committed by Alex Pyrgiotis
parent c194606550
commit baeab9d7eb
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA

View file

@ -0,0 +1,181 @@
import asyncio
import glob
import inspect
import io
import logging
import os
import shutil
import subprocess
import sys
import tempfile
import time
import zipfile
from pathlib import Path
from typing import IO, Callable, Optional
from ..document import Document
from ..util import get_resource_path
from .base import IsolationProvider
log = logging.getLogger(__name__)
from ..conversion.pixels_to_pdf import PixelsToPDF
from ..util import get_subprocess_startupinfo, get_tmp_dir
CONVERTED_FILE_PATH = (
# FIXME won't work for parallel conversions (see #454)
"/tmp/safe-output-compressed.pdf"
)
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
"""Read bytes from stdout."""
return p.stdout.read(buff_size) # type: ignore [union-attr]
def read_int(p: subprocess.Popen) -> int:
"""Read 2 bytes from stdout, and decode them as int."""
untrusted_int = p.stdout.read(2) # type: ignore [union-attr]
return int.from_bytes(untrusted_int, signed=False)
class Qubes(IsolationProvider):
"""Uses a disposable qube for performing the conversion"""
def install(self) -> bool:
return True
def _convert(
self,
document: Document,
ocr_lang: Optional[str],
stdout_callback: Optional[Callable] = None,
) -> bool:
success = False
# FIXME won't work on windows, nor with multi-conversion
out_dir = Path("/tmp/dangerzone")
if out_dir.exists():
shutil.rmtree(out_dir)
out_dir.mkdir()
# Reset hard-coded state
if os.path.exists(CONVERTED_FILE_PATH):
os.remove(CONVERTED_FILE_PATH)
percentage = 0.0
with open(document.input_filename, "rb") as f:
# TODO handle lack of memory to start qube
if getattr(sys, "dangerzone_dev", False):
# Use dz.ConvertDev RPC call instead, if we are in development mode.
# Basically, the change is that we also transfer the necessary Python
# code as a zipfile, before sending the doc that the user requested.
p = subprocess.Popen(
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.ConvertDev"],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
)
assert p.stdin is not None
# Send the dangerzone module first.
self.teleport_dz_module(p.stdin)
# Finally, send the document, as in the normal case.
p.stdin.write(f.read())
p.stdin.close()
else:
p = subprocess.Popen(
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.Convert"],
stdin=f,
stdout=subprocess.PIPE,
)
n_pages = read_int(p)
if n_pages == 0:
# FIXME: Fail loudly in that case
return False
if ocr_lang:
percentage_per_page = 50.0 / n_pages
else:
percentage_per_page = 100.0 / n_pages
for page in range(1, n_pages + 1):
# TODO handle too width > MAX_PAGE_WIDTH
# TODO handle too big height > MAX_PAGE_HEIGHT
width = read_int(p)
height = read_int(p)
untrusted_pixels = read_bytes(
p, width * height * 3
) # three color channels
# Wrapper code
with open(f"/tmp/dangerzone/page-{page}.width", "w") as f_width:
f_width.write(str(width))
with open(f"/tmp/dangerzone/page-{page}.height", "w") as f_height:
f_height.write(str(height))
with open(f"/tmp/dangerzone/page-{page}.rgb", "wb") as f_rgb:
f_rgb.write(untrusted_pixels)
percentage += percentage_per_page
text = f"Converting page {page}/{n_pages} to pixels"
self.print_progress(document, False, text, percentage)
if stdout_callback:
stdout_callback(False, text, percentage)
# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress(document, False, text, percentage)
if stdout_callback:
stdout_callback(False, text, percentage)
# FIXME pass OCR stuff properly (see #455)
old_environ = dict(os.environ)
if ocr_lang:
os.environ["OCR"] = "1"
os.environ["OCR_LANGUAGE"] = ocr_lang
asyncio.run(
PixelsToPDF().convert()
) # TODO add progress updates on second stage
percentage = 100.0
text = "Safe PDF created"
self.print_progress(document, False, text, percentage)
if stdout_callback:
stdout_callback(False, text, percentage)
# FIXME remove once the OCR args are no longer passed with env vars
os.environ.clear()
os.environ.update(old_environ)
shutil.move(CONVERTED_FILE_PATH, document.output_filename)
success = True
return success
def get_max_parallel_conversions(self) -> int:
return 1
def teleport_dz_module(self, wpipe: IO[bytes]) -> None:
"""Send the dangerzone module to another qube, as a zipfile."""
# Grab the absolute file path of the dangerzone module.
import dangerzone.conversion as _conv
_conv_path = Path(inspect.getfile(_conv)).parent
temp_file = io.BytesIO()
# Create a Python zipfile that contains all the files of the dangerzone module.
with zipfile.PyZipFile(temp_file, "w") as z:
z.mkdir("dangerzone/")
z.writestr("dangerzone/__init__.py", "")
z.writepy(str(_conv_path), basename="dangerzone/")
# Send the following data:
# 1. The size of the Python zipfile, so that the server can know when to
# stop.
# 2. The Python zipfile itself.
bufsize_bytes = len(temp_file.getvalue()).to_bytes(4)
wpipe.write(bufsize_bytes)
wpipe.write(temp_file.getvalue())