Allow each conversion to have its own proc

If we increased the number of parallel conversions, we'd run into an
issue where the streams were getting mixed together. This was because
the Converter.proc was a single attribute. This breaks it down into a
local variable such that this mixup doesn't happen.
This commit is contained in:
deeplow 2024-01-25 14:00:34 +00:00
parent 943bab2def
commit 1835756b45
No known key found for this signature in database
GPG key ID: 577982871529A52A
5 changed files with 64 additions and 44 deletions

View file

@ -1,3 +1,4 @@
import subprocess
from typing import List, Optional, Type, Union
# XXX: errors start at 128 for conversion-related issues
@ -7,6 +8,23 @@ MAX_PAGE_WIDTH = 10000
MAX_PAGE_HEIGHT = 10000
class InterruptedConversionException(Exception):
"""Data received was less than expected"""
def __init__(self) -> None:
super().__init__(
"Something interrupted the conversion and it could not be completed."
)
class ConverterProcException(Exception):
"""Some exception occurred in the converter"""
def __init__(self, proc: subprocess.Popen) -> None:
self.proc = proc
super().__init__()
class ConversionException(Exception):
error_message = "Unspecified error"
error_code = ERROR_SHIFT
@ -86,15 +104,6 @@ class PageCountMismatch(PagesException):
)
class ConverterProcException(ConversionException):
"""Some exception occurred in the converter"""
error_code = ERROR_SHIFT + 60
error_message = (
"Something interrupted the conversion and it could not be completed."
)
class UnexpectedConversionError(ConversionException):
error_code = ERROR_SHIFT + 100
error_message = "Some unexpected error occurred while converting the document"

View file

@ -27,13 +27,15 @@ def read_bytes(f: IO[bytes], size: int, timeout: float, exact: bool = True) -> b
"""Read bytes from a file-like object."""
buf = nonblocking_read(f, size, timeout)
if exact and len(buf) != size:
raise errors.ConverterProcException()
raise errors.InterruptedConversionException()
return buf
def read_int(f: IO[bytes], timeout: float) -> int:
"""Read 2 bytes from a file-like object, and decode them as int."""
untrusted_int = read_bytes(f, 2, timeout)
if len(untrusted_int) != 2:
raise errors.InterruptedConversionException()
return int.from_bytes(untrusted_int, signed=False)
@ -52,8 +54,6 @@ class IsolationProvider(ABC):
STARTUP_TIME_SECONDS = 0 # The maximum time it takes a the provider to start up.
def __init__(self) -> None:
self.proc: Optional[subprocess.Popen] = None
if getattr(sys, "dangerzone_dev", False) == True:
self.proc_stderr = subprocess.PIPE
else:
@ -80,8 +80,8 @@ class IsolationProvider(ABC):
document.mark_as_safe()
if document.archive_after_conversion:
document.archive()
except errors.ConverterProcException:
exception = self.get_proc_exception()
except errors.ConverterProcException as e:
exception = self.get_proc_exception(e.proc)
self.print_progress(document, True, str(exception), 0)
document.mark_as_failed()
except errors.ConversionException as e:
@ -97,23 +97,23 @@ class IsolationProvider(ABC):
def doc_to_pixels(self, document: Document, tempdir: str) -> None:
percentage = 0.0
with open(document.input_filename, "rb") as f:
self.proc = self.start_doc_to_pixels_proc()
p = self.start_doc_to_pixels_proc()
try:
assert self.proc.stdin is not None
self.proc.stdin.write(f.read())
self.proc.stdin.close()
assert p.stdin is not None
p.stdin.write(f.read())
p.stdin.close()
except BrokenPipeError as e:
raise errors.ConverterProcException()
raise errors.ConverterProcException(p)
# Get file size (in MiB)
size = os.path.getsize(document.input_filename) / 1024**2
timeout = calculate_timeout(size) + self.STARTUP_TIME_SECONDS
assert self.proc is not None
assert self.proc.stdout is not None
os.set_blocking(self.proc.stdout.fileno(), False)
assert p is not None
assert p.stdout is not None
os.set_blocking(p.stdout.fileno(), False)
n_pages = read_int(self.proc.stdout, timeout)
n_pages = read_int(p.stdout, timeout)
if n_pages == 0 or n_pages > errors.MAX_PAGES:
raise errors.MaxPagesException()
percentage_per_page = 50.0 / n_pages
@ -125,8 +125,8 @@ class IsolationProvider(ABC):
text = f"Converting page {page}/{n_pages} to pixels"
self.print_progress(document, False, text, percentage)
width = read_int(self.proc.stdout, timeout=sw.remaining)
height = read_int(self.proc.stdout, timeout=sw.remaining)
width = read_int(p.stdout, timeout=sw.remaining)
height = read_int(p.stdout, timeout=sw.remaining)
if not (1 <= width <= errors.MAX_PAGE_WIDTH):
raise errors.MaxPageWidthException()
if not (1 <= height <= errors.MAX_PAGE_HEIGHT):
@ -134,7 +134,7 @@ class IsolationProvider(ABC):
num_pixels = width * height * 3 # three color channels
untrusted_pixels = read_bytes(
self.proc.stdout,
p.stdout,
num_pixels,
timeout=sw.remaining,
)
@ -150,17 +150,17 @@ class IsolationProvider(ABC):
percentage += percentage_per_page
# Ensure nothing else is read after all bitmaps are obtained
self.proc.stdout.close()
p.stdout.close()
# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress(document, False, text, percentage)
if getattr(sys, "dangerzone_dev", False):
assert self.proc.stderr is not None
os.set_blocking(self.proc.stderr.fileno(), False)
untrusted_log = read_debug_text(self.proc.stderr, MAX_CONVERSION_LOG_CHARS)
self.proc.stderr.close()
assert p.stderr is not None
os.set_blocking(p.stderr.fileno(), False)
untrusted_log = read_debug_text(p.stderr, MAX_CONVERSION_LOG_CHARS)
p.stderr.close()
log.info(
f"Conversion output (doc to pixels)\n{self.sanitize_conversion_str(untrusted_log)}"
)
@ -186,10 +186,9 @@ class IsolationProvider(ABC):
if self.progress_callback:
self.progress_callback(error, text, percentage)
def get_proc_exception(self) -> Exception:
def get_proc_exception(self, p: subprocess.Popen) -> Exception:
"""Returns an exception associated with a process exit code"""
assert self.proc
error_code = self.proc.wait(3)
error_code = p.wait(3)
return errors.exception_from_error_code(error_code)
@abstractmethod

View file

@ -25,10 +25,6 @@ class Qubes(IsolationProvider):
STARTUP_TIME_SECONDS = 5 * 60 # 5 minutes
def __init__(self) -> None:
self.proc: Optional[subprocess.Popen] = None
super().__init__()
def install(self) -> bool:
return True

View file

@ -1,7 +1,9 @@
import os
import subprocess
import pytest
from colorama import Style
from pytest import MonkeyPatch
from pytest_mock import MockerFixture
from dangerzone.conversion import errors
@ -30,13 +32,25 @@ class IsolationProviderTest:
pdf_11k_pages: str,
provider: base.IsolationProvider,
mocker: MockerFixture,
monkeypatch: MonkeyPatch,
tmpdir: str,
) -> None:
provider.progress_callback = mocker.MagicMock()
doc = Document(pdf_11k_pages)
with pytest.raises(errors.ConverterProcException):
proc = None
provider.old_start_doc_to_pixels_proc = provider.start_doc_to_pixels_proc # type: ignore [attr-defined]
def start_doc_to_pixels_proc() -> subprocess.Popen:
proc = provider.old_start_doc_to_pixels_proc() # type: ignore [attr-defined]
return proc
monkeypatch.setattr(
provider, "start_doc_to_pixels_proc", start_doc_to_pixels_proc
)
with pytest.raises(errors.InterruptedConversionException):
provider.doc_to_pixels(doc, tmpdir)
assert provider.get_proc_exception() == errors.MaxPagesException
assert provider.get_proc_exception(proc) == errors.MaxPagesException # type: ignore [arg-type]
def test_max_pages_client_enforcement(
self,

View file

@ -40,8 +40,10 @@ class TestQubes(IsolationProviderTest):
) -> None:
provider.progress_callback = mocker.MagicMock()
proc = None
def start_doc_to_pixels_proc() -> subprocess.Popen:
p = subprocess.Popen(
proc = subprocess.Popen(
# XXX error 126 simulates a qrexec-policy failure. Source:
# https://github.com/QubesOS/qubes-core-qrexec/blob/fdcbfd7/daemon/qrexec-daemon.c#L1022
["exit 126"],
@ -50,13 +52,13 @@ class TestQubes(IsolationProviderTest):
stderr=subprocess.PIPE,
shell=True,
)
return p
return proc
monkeypatch.setattr(
provider, "start_doc_to_pixels_proc", start_doc_to_pixels_proc
)
with pytest.raises(errors.ConverterProcException) as e:
with pytest.raises(errors.InterruptedConversionException) as e:
doc = Document(sample_doc)
provider.doc_to_pixels(doc, tmpdir)
assert provider.get_proc_exception() == errors.QubesQrexecFailed
assert provider.get_proc_exception(proc) == errors.QubesQrexecFailed # type: ignore [arg-type]