Remove untrusted progress parsing (stderr instead)

Now that only the second container can send JSON-encoded progress
information, we can the untrusted JSON parsing. The parse_progress was
also renamed to `parse_progress_trusted` to ensure future developers
don't mistake this as a safe method.

The old methods for sending untrusted JSON were repurposed to send the
progress instead to stderr for troubleshooting in development mode.

Fixes #456
This commit is contained in:
deeplow 2024-01-04 15:02:35 +00:00
parent c991e530d0
commit 550786adfe
No known key found for this signature in database
GPG key ID: 577982871529A52A
8 changed files with 27 additions and 160 deletions

View file

@ -192,5 +192,6 @@ class DangerzoneConverter:
async def convert(self) -> None:
pass
def update_progress(self, text: str, *, error: bool = False) -> None:
@abstractmethod
def update_progress(self, text: str) -> None:
pass

View file

@ -35,6 +35,9 @@ class DocumentToPixels(DangerzoneConverter):
async def write_page_data(self, data: bytes) -> None:
return await self.write_bytes(data)
def update_progress(self, text: str, *, error: bool = False) -> None:
print(text, file=sys.stderr)
async def convert(self) -> None:
conversions: Dict[str, Dict[str, Optional[str]]] = {
# .pdf

View file

@ -93,11 +93,11 @@ class PixelsToPDF(DangerzoneConverter):
def update_progress(self, text: str, *, error: bool = False) -> None:
if running_on_qubes():
if self.progress_callback:
self.progress_callback(error, text, int(self.percentage))
self.progress_callback(error, text, self.percentage)
else:
print(
json.dumps(
{"error": error, "text": text, "percentage": int(self.percentage)}
{"error": error, "text": text, "percentage": self.percentage}
)
)
sys.stdout.flush()

View file

@ -87,13 +87,13 @@ class IsolationProvider(ABC):
exception = errors.exception_from_error_code(error_code)
document.mark_as_failed()
except errors.ConversionException as e:
self.print_progress_trusted(document, True, str(e), 0)
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
except Exception as e:
log.exception(
f"An exception occurred while converting document '{document.id}'"
)
self.print_progress_trusted(document, True, str(e), 0)
self.print_progress(document, True, str(e), 0)
document.mark_as_failed()
def doc_to_pixels(self, document: Document, tempdir: str) -> None:
@ -125,7 +125,7 @@ class IsolationProvider(ABC):
sw.start()
for page in range(1, n_pages + 1):
text = f"Converting page {page}/{n_pages} to pixels"
self.print_progress_trusted(document, False, text, percentage)
self.print_progress(document, False, text, percentage)
width = read_int(self.proc.stdout, timeout=sw.remaining)
height = read_int(self.proc.stdout, timeout=sw.remaining)
@ -156,7 +156,7 @@ class IsolationProvider(ABC):
# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress_trusted(document, False, text, percentage)
self.print_progress(document, False, text, percentage)
if getattr(sys, "dangerzone_dev", False):
assert self.proc.stderr is not None
@ -173,11 +173,11 @@ class IsolationProvider(ABC):
) -> None:
pass
def _print_progress(
def print_progress(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
if error:
s += Fore.RED + text + Style.RESET_ALL
log.error(s)
@ -188,19 +188,6 @@ class IsolationProvider(ABC):
if self.progress_callback:
self.progress_callback(error, text, percentage)
def print_progress_trusted(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
return self._print_progress(document, error, text, int(percentage))
def print_progress(
self, document: Document, error: bool, untrusted_text: str, percentage: float
) -> None:
text = replace_control_chars(untrusted_text)
return self.print_progress_trusted(
document, error, "UNTRUSTED> " + text, percentage
)
@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass

View file

@ -11,12 +11,7 @@ from typing import Any, List, Optional
from ..conversion import errors
from ..document import Document
from ..util import (
get_resource_path,
get_subprocess_startupinfo,
get_tmp_dir,
replace_control_chars,
)
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
from .base import IsolationProvider
# Define startupinfo for subprocesses
@ -147,29 +142,22 @@ class Container(IsolationProvider):
if not type(val) == _type:
raise ValueError("Status field has incorrect type")
def parse_progress(self, document: Document, untrusted_line: str) -> None:
def parse_progress_trusted(self, document: Document, line: str) -> None:
"""
Parses a line returned by the container.
"""
try:
untrusted_status = json.loads(untrusted_line)
text = untrusted_status["text"]
status = json.loads(line)
text = status["text"]
self.assert_field_type(text, str)
error = untrusted_status["error"]
error = status["error"]
self.assert_field_type(error, bool)
percentage = untrusted_status["percentage"]
self.assert_field_type(percentage, int)
percentage = status["percentage"]
self.assert_field_type(percentage, float)
self.print_progress(document, error, text, percentage)
except Exception:
line = replace_control_chars(untrusted_line)
error_message = (
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}"
)
self.print_progress_trusted(document, True, error_message, -1)
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
self.print_progress(document, True, error_message, -1)
def exec(
self,
@ -243,8 +231,9 @@ class Container(IsolationProvider):
]
pixels_to_pdf_proc = self.exec_container(command, extra_args)
if pixels_to_pdf_proc.stdout:
for line in pixels_to_pdf_proc.stdout:
self.parse_progress(document, line)
self.parse_progress_trusted(document, line.decode())
error_code = pixels_to_pdf_proc.wait()
if error_code != 0:
log.error("pixels-to-pdf failed")

View file

@ -36,7 +36,7 @@ class Qubes(IsolationProvider):
self, document: Document, tempdir: str, ocr_lang: Optional[str]
) -> None:
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
self.print_progress_trusted(document, error, text, percentage)
self.print_progress(document, error, text, percentage)
converter = PixelsToPDF(progress_callback=print_progress_wrapper)
try:

View file

@ -18,46 +18,6 @@ from .. import pdf_11k_pages, sanitized_text, uncommon_text
)
@pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system")
class IsolationProviderTest:
def test_print_progress(
self,
provider: base.IsolationProvider,
uncommon_text: str,
sanitized_text: str,
mocker: MockerFixture,
) -> None:
"""Test that the print_progress() method of our isolation providers sanitizes text.
Iterate our isolation providers and make sure that their print_progress() methods
sanitizes the provided text, before passing it to the logging functions and other
callbacks.
"""
d = Document()
provider.progress_callback = mocker.MagicMock()
log_info_spy = mocker.spy(base.log, "info")
log_error_spy = mocker.spy(base.log, "error")
_print_progress_spy = mocker.spy(provider, "_print_progress")
for error, untrusted_text, sanitized_text in [
(True, "normal text", "UNTRUSTED> normal text"),
(False, "normal text", "UNTRUSTED> normal text"),
(True, uncommon_text, "UNTRUSTED> " + sanitized_text),
(False, uncommon_text, "UNTRUSTED> " + sanitized_text),
]:
log_info_spy.reset_mock()
log_error_spy.reset_mock()
provider.print_progress(d, error, untrusted_text, 0)
provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr]
_print_progress_spy.assert_called_with(d, error, sanitized_text, 0)
if error:
assert log_error_spy.call_args[0][0].endswith(
sanitized_text + Style.RESET_ALL
)
log_info_spy.assert_not_called()
else:
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
log_error_spy.assert_not_called()
def test_max_pages_received(
self,
pdf_11k_pages: str,

View file

@ -19,77 +19,4 @@ def provider() -> Container:
class TestContainer(IsolationProviderTest):
def test_parse_progress(
self,
provider: Container,
uncommon_text: str,
sanitized_text: str,
mocker: MockerFixture,
) -> None:
"""Test that the `parse_progress()` function handles escape sequences properly."""
container = Container(enable_timeouts=False)
container.progress_callback = mocker.MagicMock()
print_progress_mock = mocker.patch.object(container, "_print_progress")
d = Document()
# Test 1 - Check that normal JSON values are printed as is.
simple_json = json.dumps({"text": "text", "error": False, "percentage": 0})
container.parse_progress(d, simple_json)
print_progress_mock.assert_called_with(d, False, "UNTRUSTED> text", 0)
# Test 2 - Check that a totally invalid string is reported as a failure. If this
# string contains escape characters, they should be sanitized as well.
def assert_invalid_json(text: str) -> None:
print_progress_mock.assert_called_with(
d,
True,
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {text}",
-1,
)
# Try first with a trivially invalid string.
invalid_json = "()"
container.parse_progress(d, invalid_json)
assert_invalid_json(invalid_json)
# Try next with an invalid string that contains uncommon text.
container.parse_progress(d, uncommon_text)
assert_invalid_json(sanitized_text)
# Test 3 - Check that a structurally valid JSON value with escape characters in it
# is sanitized.
valid_json = json.dumps(
{"text": uncommon_text, "error": False, "percentage": 0}
)
sanitized_json = json.dumps(
{"text": sanitized_text, "error": False, "percentage": 0}
)
container.parse_progress(d, valid_json)
print_progress_mock.assert_called_with(
d, False, "UNTRUSTED> " + sanitized_text, 0
)
# Test 4 - Check that a structurally valid JSON, that otherwise does not have the
# expected keys, or the expected value types, is reported as an error, and any
# escape sequences are sanitized.
keys = ["text", "error", "percentage", uncommon_text]
values = [uncommon_text, False, 0, None]
possible_kvs = list(itertools.product(keys, values, repeat=1))
# Based on the above keys and values, create any combination possible, from 0 to 3
# elements. Take extra care to:
#
# * Remove combinations with non-unique keys.
# * Ignore the sole valid combination (see `valid_json`), since we have already
# tested it above.
for i in range(len(keys)):
for combo in itertools.combinations(possible_kvs, i):
dict_combo: Dict[str, Any] = dict(combo) # type: ignore [arg-type]
if len(combo) == len(dict_combo.keys()):
bad_json = json.dumps(dict_combo)
sanitized_json = bad_json.replace(uncommon_text, sanitized_text)
if bad_json == valid_json:
continue
container.parse_progress(d, bad_json)
assert_invalid_json(sanitized_json)
pass