qubes: Add client-side timeouts

Extend the client-side capabilities of the Qubes isolation provider, by
adding client-side timeout logic.

This implementation brings the same logic that we used server-side to
the client, by taking into account the original file size and the number
of pages that the server returns.

Since the code does not have the exact same insight as the server has,
the calculated timeouts are in two places:

1. The timeout for getting the number of pages. This timeout takes into
   account:
   * the disposable qube startup time, and
   * the time it takes to convert a file type to PDF
2. The total timeout for converting the PDF into pixels, in the same way
   that we do it on the server-side.

Besides these changes, we also ensure that partial reads (e.g., due to
EOF) are detected (see exact=... argument)

Some things that are not resolved in this commit are:
* We have both client-side and server-side timeouts for the first phase
  of the conversion. Once containers can stream data back to the
  application (see #443), these server-side timeouts can be removed.
* We do not show a proper error message when a timeout occurs. This will
  be part of the error handling PR (see #430)

Fixes #446
Refs #443
Refs #430
This commit is contained in:
Alex Pyrgiotis 2023-09-13 15:09:28 +03:00
parent 55a4491ced
commit 99dd5f5139
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA

View file

@ -13,11 +13,17 @@ import zipfile
from pathlib import Path
from typing import IO, Callable, Optional
from ..conversion.common import running_on_qubes
from ..conversion.common import calculate_timeout, running_on_qubes
from ..conversion.errors import exception_from_error_code
from ..conversion.pixels_to_pdf import PixelsToPDF
from ..document import Document
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
from ..util import (
Stopwatch,
get_resource_path,
get_subprocess_startupinfo,
get_tmp_dir,
nonblocking_read,
)
from .base import MAX_CONVERSION_LOG_CHARS, IsolationProvider
log = logging.getLogger(__name__)
@ -27,28 +33,29 @@ CONVERTED_FILE_PATH = (
"/tmp/safe-output-compressed.pdf"
)
def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes:
"""Read bytes from stdout."""
return p.stdout.read(buff_size) # type: ignore [union-attr]
# The maximum time a qube takes to start up.
STARTUP_TIME_SECONDS = 5 * 60 # 5 minutes
def read_int(p: subprocess.Popen) -> int:
"""Read 2 bytes from stdout, and decode them as int."""
untrusted_int = p.stdout.read(2) # type: ignore [union-attr]
if untrusted_int == b"":
raise ValueError("Nothing read from stdout (expected an integer)")
def read_bytes(f: IO[bytes], size: int, timeout: float, exact: bool = True) -> bytes:
"""Read bytes from a file-like object."""
buf = nonblocking_read(f, size, timeout)
if exact and len(buf) != size:
raise ValueError("Did not receive exact number of bytes")
return buf
def read_int(f: IO[bytes], timeout: float) -> int:
"""Read 2 bytes from a file-like object, and decode them as int."""
untrusted_int = read_bytes(f, 2, timeout)
return int.from_bytes(untrusted_int, signed=False)
def read_debug_text(p: subprocess.Popen) -> str:
def read_debug_text(f: IO[bytes], size: int) -> str:
"""Read arbitrarily long text (for debug purposes)"""
if p.stderr:
untrusted_text = p.stderr.read(MAX_CONVERSION_LOG_CHARS)
p.stderr.close()
return untrusted_text.decode("ascii", errors="replace")
else:
return ""
timeout = calculate_timeout(size)
untrusted_text = read_bytes(f, size, timeout, exact=False)
return untrusted_text.decode("ascii", errors="replace")
class Qubes(IsolationProvider):
@ -104,12 +111,20 @@ class Qubes(IsolationProvider):
stderr=subprocess.DEVNULL,
)
# Get file size (in MiB)
size = os.path.getsize(document.input_filename) / 1024**2
timeout = calculate_timeout(size) + STARTUP_TIME_SECONDS
assert p.stdout is not None
os.set_blocking(p.stdout.fileno(), False)
try:
n_pages = read_int(p)
n_pages = read_int(p.stdout, timeout)
except ValueError:
error_code = p.wait()
# XXX Reconstruct exception from error code
raise exception_from_error_code(error_code) # type: ignore [misc]
if n_pages == 0:
# FIXME: Fail loudly in that case
return False
@ -117,14 +132,19 @@ class Qubes(IsolationProvider):
percentage_per_page = 50.0 / n_pages
else:
percentage_per_page = 100.0 / n_pages
timeout = calculate_timeout(size, n_pages)
sw = Stopwatch(timeout)
sw.start()
for page in range(1, n_pages + 1):
# TODO handle too width > MAX_PAGE_WIDTH
# TODO handle too big height > MAX_PAGE_HEIGHT
width = read_int(p)
height = read_int(p)
width = read_int(p.stdout, timeout=sw.remaining)
height = read_int(p.stdout, timeout=sw.remaining)
untrusted_pixels = read_bytes(
p, width * height * 3
p.stdout,
width * height * 3,
timeout=sw.remaining,
) # three color channels
# Wrapper code
@ -141,14 +161,17 @@ class Qubes(IsolationProvider):
self.print_progress_trusted(document, False, text, percentage)
# Ensure nothing else is read after all bitmaps are obtained
p.stdout.close() # type: ignore [union-attr]
p.stdout.close()
# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress_trusted(document, False, text, percentage)
if getattr(sys, "dangerzone_dev", False):
untrusted_log = read_debug_text(p)
assert p.stderr is not None
os.set_blocking(p.stderr.fileno(), False)
untrusted_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS)
p.stderr.close()
log.info(
f"Conversion output (doc to pixels)\n{self.sanitize_conversion_str(untrusted_log)}"
)