mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 02:12:36 +02:00
Qubes: strategy for capturing conversion logs
Use qrexec stdout to send conversion data (pixels) and stderr to send conversion progress at the end of the conversion. This happens regardless of whether or not the conversion is in developer mode or not. It's the client that decides if it reads the debug data from stderr or not. In this case, it only reads it if developer mode is enabled.
This commit is contained in:
parent
00adf223a5
commit
874b8865e2
5 changed files with 109 additions and 86 deletions
|
@ -22,9 +22,15 @@ def running_on_qubes() -> bool:
|
||||||
return os.path.exists("/usr/share/qubes/marker-vm")
|
return os.path.exists("/usr/share/qubes/marker-vm")
|
||||||
|
|
||||||
|
|
||||||
async def read_stream(
|
class DangerzoneConverter:
|
||||||
sr: asyncio.StreamReader, callback: Optional[Callable] = None
|
def __init__(self, progress_callback: Optional[Callable] = None) -> None:
|
||||||
) -> bytes:
|
self.percentage: float = 0.0
|
||||||
|
self.progress_callback = progress_callback
|
||||||
|
self.captured_output: bytes = b""
|
||||||
|
|
||||||
|
async def read_stream(
|
||||||
|
self, sr: asyncio.StreamReader, callback: Optional[Callable] = None
|
||||||
|
) -> bytes:
|
||||||
"""Consume a byte stream line-by-line.
|
"""Consume a byte stream line-by-line.
|
||||||
|
|
||||||
Read all lines in a stream until EOF. If a user has passed a callback, call it for
|
Read all lines in a stream until EOF. If a user has passed a callback, call it for
|
||||||
|
@ -39,15 +45,14 @@ async def read_stream(
|
||||||
line = await sr.readline()
|
line = await sr.readline()
|
||||||
if sr.at_eof():
|
if sr.at_eof():
|
||||||
break
|
break
|
||||||
|
self.captured_output += line
|
||||||
if callback is not None:
|
if callback is not None:
|
||||||
callback(line)
|
callback(line)
|
||||||
# TODO: This would be a good place to log the received line, mostly for debug
|
|
||||||
# logging.
|
|
||||||
buf += line
|
buf += line
|
||||||
return buf
|
return buf
|
||||||
|
|
||||||
|
async def run_command(
|
||||||
async def run_command(
|
self,
|
||||||
args: List[str],
|
args: List[str],
|
||||||
*,
|
*,
|
||||||
error_message: str,
|
error_message: str,
|
||||||
|
@ -55,7 +60,7 @@ async def run_command(
|
||||||
timeout: Optional[float],
|
timeout: Optional[float],
|
||||||
stdout_callback: Optional[Callable] = None,
|
stdout_callback: Optional[Callable] = None,
|
||||||
stderr_callback: Optional[Callable] = None,
|
stderr_callback: Optional[Callable] = None,
|
||||||
) -> Tuple[bytes, bytes]:
|
) -> Tuple[bytes, bytes]:
|
||||||
"""Run a command and get its output.
|
"""Run a command and get its output.
|
||||||
|
|
||||||
Run a command using asyncio.subprocess, consume its standard streams, and return its
|
Run a command using asyncio.subprocess, consume its standard streams, and return its
|
||||||
|
@ -77,8 +82,12 @@ async def run_command(
|
||||||
|
|
||||||
# Create asynchronous tasks that will consume the standard streams of the command,
|
# Create asynchronous tasks that will consume the standard streams of the command,
|
||||||
# and call callbacks if necessary.
|
# and call callbacks if necessary.
|
||||||
stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback))
|
stdout_task = asyncio.create_task(
|
||||||
stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback))
|
self.read_stream(proc.stdout, stdout_callback)
|
||||||
|
)
|
||||||
|
stderr_task = asyncio.create_task(
|
||||||
|
self.read_stream(proc.stderr, stderr_callback)
|
||||||
|
)
|
||||||
|
|
||||||
# Wait until the command has finished, for a specific timeout. Then, verify that the
|
# Wait until the command has finished, for a specific timeout. Then, verify that the
|
||||||
# command has completed successfully. In any other case, raise an exception.
|
# command has completed successfully. In any other case, raise an exception.
|
||||||
|
@ -95,12 +104,6 @@ async def run_command(
|
||||||
stderr = await stderr_task
|
stderr = await stderr_task
|
||||||
return (stdout, stderr)
|
return (stdout, stderr)
|
||||||
|
|
||||||
|
|
||||||
class DangerzoneConverter:
|
|
||||||
def __init__(self, progress_callback: Optional[Callable] = None) -> None:
|
|
||||||
self.percentage: float = 0.0
|
|
||||||
self.progress_callback = progress_callback
|
|
||||||
|
|
||||||
def calculate_timeout(
|
def calculate_timeout(
|
||||||
self, size: float, pages: Optional[float] = None
|
self, size: float, pages: Optional[float] = None
|
||||||
) -> Optional[float]:
|
) -> Optional[float]:
|
||||||
|
|
|
@ -18,7 +18,7 @@ from typing import Dict, Optional
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
|
|
||||||
from .common import DangerzoneConverter, run_command, running_on_qubes
|
from .common import DangerzoneConverter, running_on_qubes
|
||||||
|
|
||||||
|
|
||||||
class DocumentToPixels(DangerzoneConverter):
|
class DocumentToPixels(DangerzoneConverter):
|
||||||
|
@ -189,7 +189,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
"/tmp",
|
"/tmp",
|
||||||
"/tmp/input_file",
|
"/tmp/input_file",
|
||||||
]
|
]
|
||||||
await run_command(
|
await self.run_command(
|
||||||
args,
|
args,
|
||||||
error_message="Conversion to PDF with LibreOffice failed",
|
error_message="Conversion to PDF with LibreOffice failed",
|
||||||
timeout_message=(
|
timeout_message=(
|
||||||
|
@ -213,7 +213,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
"/tmp/input_file",
|
"/tmp/input_file",
|
||||||
"/tmp/input_file.pdf",
|
"/tmp/input_file.pdf",
|
||||||
]
|
]
|
||||||
await run_command(
|
await self.run_command(
|
||||||
args,
|
args,
|
||||||
error_message="Conversion to PDF with GraphicsMagick failed",
|
error_message="Conversion to PDF with GraphicsMagick failed",
|
||||||
timeout_message=(
|
timeout_message=(
|
||||||
|
@ -231,7 +231,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
|
|
||||||
# Obtain number of pages
|
# Obtain number of pages
|
||||||
self.update_progress("Calculating number of pages")
|
self.update_progress("Calculating number of pages")
|
||||||
stdout, _ = await run_command(
|
stdout, _ = await self.run_command(
|
||||||
["pdfinfo", pdf_filename],
|
["pdfinfo", pdf_filename],
|
||||||
error_message="PDF file is corrupted",
|
error_message="PDF file is corrupted",
|
||||||
timeout_message=(
|
timeout_message=(
|
||||||
|
@ -317,7 +317,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
|
|
||||||
page_base = "/tmp/page"
|
page_base = "/tmp/page"
|
||||||
|
|
||||||
await run_command(
|
await self.run_command(
|
||||||
[
|
[
|
||||||
"pdftoppm",
|
"pdftoppm",
|
||||||
pdf_filename,
|
pdf_filename,
|
||||||
|
@ -351,7 +351,7 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
f"/usr/lib/libreoffice/share/extensions/{libreoffice_ext}/",
|
f"/usr/lib/libreoffice/share/extensions/{libreoffice_ext}/",
|
||||||
f"/libreoffice_ext/{libreoffice_ext}",
|
f"/libreoffice_ext/{libreoffice_ext}",
|
||||||
]
|
]
|
||||||
await run_command(
|
await self.run_command(
|
||||||
unzip_args,
|
unzip_args,
|
||||||
error_message="LibreOffice extension installation failed (unzipping)",
|
error_message="LibreOffice extension installation failed (unzipping)",
|
||||||
timeout_message="unzipping LibreOffice extension timed out 5 seconds",
|
timeout_message="unzipping LibreOffice extension timed out 5 seconds",
|
||||||
|
|
|
@ -84,6 +84,9 @@ async def main() -> None:
|
||||||
rgb_data = rgb_file.read()
|
rgb_data = rgb_file.read()
|
||||||
await write_bytes(rgb_data)
|
await write_bytes(rgb_data)
|
||||||
|
|
||||||
|
# Write debug information
|
||||||
|
await write_bytes(converter.captured_output, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(asyncio.run(main()))
|
sys.exit(asyncio.run(main()))
|
||||||
|
|
|
@ -12,7 +12,7 @@ import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from .common import DangerzoneConverter, run_command, running_on_qubes
|
from .common import DangerzoneConverter, running_on_qubes
|
||||||
|
|
||||||
|
|
||||||
class PixelsToPDF(DangerzoneConverter):
|
class PixelsToPDF(DangerzoneConverter):
|
||||||
|
@ -47,7 +47,7 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
self.update_progress(
|
self.update_progress(
|
||||||
f"Converting page {page}/{num_pages} from pixels to searchable PDF"
|
f"Converting page {page}/{num_pages} from pixels to searchable PDF"
|
||||||
)
|
)
|
||||||
await run_command(
|
await self.run_command(
|
||||||
[
|
[
|
||||||
"gm",
|
"gm",
|
||||||
"convert",
|
"convert",
|
||||||
|
@ -65,7 +65,7 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
),
|
),
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
await run_command(
|
await self.run_command(
|
||||||
[
|
[
|
||||||
"tesseract",
|
"tesseract",
|
||||||
png_filename,
|
png_filename,
|
||||||
|
@ -88,7 +88,7 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
self.update_progress(
|
self.update_progress(
|
||||||
f"Converting page {page}/{num_pages} from pixels to PDF"
|
f"Converting page {page}/{num_pages} from pixels to PDF"
|
||||||
)
|
)
|
||||||
await run_command(
|
await self.run_command(
|
||||||
[
|
[
|
||||||
"gm",
|
"gm",
|
||||||
"convert",
|
"convert",
|
||||||
|
@ -119,7 +119,7 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
for page in range(1, num_pages + 1):
|
for page in range(1, num_pages + 1):
|
||||||
args.append(f"/tmp/page-{page}.pdf")
|
args.append(f"/tmp/page-{page}.pdf")
|
||||||
args.append(f"/tmp/safe-output.pdf")
|
args.append(f"/tmp/safe-output.pdf")
|
||||||
await run_command(
|
await self.run_command(
|
||||||
args,
|
args,
|
||||||
error_message="Merging pages into a single PDF failed",
|
error_message="Merging pages into a single PDF failed",
|
||||||
timeout_message=(
|
timeout_message=(
|
||||||
|
@ -133,7 +133,7 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
|
|
||||||
# Compress
|
# Compress
|
||||||
self.update_progress("Compressing PDF")
|
self.update_progress("Compressing PDF")
|
||||||
await run_command(
|
await self.run_command(
|
||||||
["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"],
|
["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"],
|
||||||
error_message="Compressing PDF failed",
|
error_message="Compressing PDF failed",
|
||||||
timeout_message=(
|
timeout_message=(
|
||||||
|
|
|
@ -28,6 +28,8 @@ CONVERTED_FILE_PATH = (
|
||||||
"/tmp/safe-output-compressed.pdf"
|
"/tmp/safe-output-compressed.pdf"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MAX_CONVERSION_LOG_CHARS = 150 * 50 # up to ~150 lines of 50 characters
|
||||||
|
|
||||||
|
|
||||||
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
|
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
|
||||||
"""Read bytes from stdout."""
|
"""Read bytes from stdout."""
|
||||||
|
@ -40,6 +42,15 @@ def read_int(p: subprocess.Popen) -> int:
|
||||||
return int.from_bytes(untrusted_int, signed=False)
|
return int.from_bytes(untrusted_int, signed=False)
|
||||||
|
|
||||||
|
|
||||||
|
def read_debug_text(p: subprocess.Popen) -> str:
|
||||||
|
"""Read arbitrarily long text (for debug purposes)"""
|
||||||
|
if p.stderr:
|
||||||
|
untrusted_text = p.stderr.read(MAX_CONVERSION_LOG_CHARS)
|
||||||
|
return untrusted_text.decode("ascii", errors="replace")
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
class Qubes(IsolationProvider):
|
class Qubes(IsolationProvider):
|
||||||
"""Uses a disposable qube for performing the conversion"""
|
"""Uses a disposable qube for performing the conversion"""
|
||||||
|
|
||||||
|
@ -75,6 +86,7 @@ class Qubes(IsolationProvider):
|
||||||
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.ConvertDev"],
|
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.ConvertDev"],
|
||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
)
|
)
|
||||||
assert p.stdin is not None
|
assert p.stdin is not None
|
||||||
|
|
||||||
|
@ -89,6 +101,7 @@ class Qubes(IsolationProvider):
|
||||||
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.Convert"],
|
["/usr/bin/qrexec-client-vm", "@dispvm:dz-dvm", "dz.Convert"],
|
||||||
stdin=f,
|
stdin=f,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
)
|
)
|
||||||
|
|
||||||
n_pages = read_int(p)
|
n_pages = read_int(p)
|
||||||
|
@ -126,6 +139,10 @@ class Qubes(IsolationProvider):
|
||||||
text = "Converted document to pixels"
|
text = "Converted document to pixels"
|
||||||
self.print_progress_trusted(document, False, text, percentage)
|
self.print_progress_trusted(document, False, text, percentage)
|
||||||
|
|
||||||
|
if getattr(sys, "dangerzone_dev", False):
|
||||||
|
text = f"Conversion output (doc to pixels):\n{read_debug_text(p)}"
|
||||||
|
log.info(text)
|
||||||
|
|
||||||
# FIXME pass OCR stuff properly (see #455)
|
# FIXME pass OCR stuff properly (see #455)
|
||||||
old_environ = dict(os.environ)
|
old_environ = dict(os.environ)
|
||||||
if ocr_lang:
|
if ocr_lang:
|
||||||
|
|
Loading…
Reference in a new issue