mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Remove untrusted progress parsing (stderr instead)
Now that only the second container can send JSON-encoded progress information, we can the untrusted JSON parsing. The parse_progress was also renamed to `parse_progress_trusted` to ensure future developers don't mistake this as a safe method. The old methods for sending untrusted JSON were repurposed to send the progress instead to stderr for troubleshooting in development mode. Fixes #456
This commit is contained in:
parent
c991e530d0
commit
550786adfe
8 changed files with 27 additions and 160 deletions
|
@ -192,5 +192,6 @@ class DangerzoneConverter:
|
|||
async def convert(self) -> None:
|
||||
pass
|
||||
|
||||
def update_progress(self, text: str, *, error: bool = False) -> None:
|
||||
@abstractmethod
|
||||
def update_progress(self, text: str) -> None:
|
||||
pass
|
||||
|
|
|
@ -35,6 +35,9 @@ class DocumentToPixels(DangerzoneConverter):
|
|||
async def write_page_data(self, data: bytes) -> None:
|
||||
return await self.write_bytes(data)
|
||||
|
||||
def update_progress(self, text: str, *, error: bool = False) -> None:
|
||||
print(text, file=sys.stderr)
|
||||
|
||||
async def convert(self) -> None:
|
||||
conversions: Dict[str, Dict[str, Optional[str]]] = {
|
||||
# .pdf
|
||||
|
|
|
@ -93,11 +93,11 @@ class PixelsToPDF(DangerzoneConverter):
|
|||
def update_progress(self, text: str, *, error: bool = False) -> None:
|
||||
if running_on_qubes():
|
||||
if self.progress_callback:
|
||||
self.progress_callback(error, text, int(self.percentage))
|
||||
self.progress_callback(error, text, self.percentage)
|
||||
else:
|
||||
print(
|
||||
json.dumps(
|
||||
{"error": error, "text": text, "percentage": int(self.percentage)}
|
||||
{"error": error, "text": text, "percentage": self.percentage}
|
||||
)
|
||||
)
|
||||
sys.stdout.flush()
|
||||
|
|
|
@ -87,13 +87,13 @@ class IsolationProvider(ABC):
|
|||
exception = errors.exception_from_error_code(error_code)
|
||||
document.mark_as_failed()
|
||||
except errors.ConversionException as e:
|
||||
self.print_progress_trusted(document, True, str(e), 0)
|
||||
self.print_progress(document, True, str(e), 0)
|
||||
document.mark_as_failed()
|
||||
except Exception as e:
|
||||
log.exception(
|
||||
f"An exception occurred while converting document '{document.id}'"
|
||||
)
|
||||
self.print_progress_trusted(document, True, str(e), 0)
|
||||
self.print_progress(document, True, str(e), 0)
|
||||
document.mark_as_failed()
|
||||
|
||||
def doc_to_pixels(self, document: Document, tempdir: str) -> None:
|
||||
|
@ -125,7 +125,7 @@ class IsolationProvider(ABC):
|
|||
sw.start()
|
||||
for page in range(1, n_pages + 1):
|
||||
text = f"Converting page {page}/{n_pages} to pixels"
|
||||
self.print_progress_trusted(document, False, text, percentage)
|
||||
self.print_progress(document, False, text, percentage)
|
||||
|
||||
width = read_int(self.proc.stdout, timeout=sw.remaining)
|
||||
height = read_int(self.proc.stdout, timeout=sw.remaining)
|
||||
|
@ -156,7 +156,7 @@ class IsolationProvider(ABC):
|
|||
|
||||
# TODO handle leftover code input
|
||||
text = "Converted document to pixels"
|
||||
self.print_progress_trusted(document, False, text, percentage)
|
||||
self.print_progress(document, False, text, percentage)
|
||||
|
||||
if getattr(sys, "dangerzone_dev", False):
|
||||
assert self.proc.stderr is not None
|
||||
|
@ -173,11 +173,11 @@ class IsolationProvider(ABC):
|
|||
) -> None:
|
||||
pass
|
||||
|
||||
def _print_progress(
|
||||
def print_progress(
|
||||
self, document: Document, error: bool, text: str, percentage: float
|
||||
) -> None:
|
||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||
s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL
|
||||
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
|
||||
if error:
|
||||
s += Fore.RED + text + Style.RESET_ALL
|
||||
log.error(s)
|
||||
|
@ -188,19 +188,6 @@ class IsolationProvider(ABC):
|
|||
if self.progress_callback:
|
||||
self.progress_callback(error, text, percentage)
|
||||
|
||||
def print_progress_trusted(
|
||||
self, document: Document, error: bool, text: str, percentage: float
|
||||
) -> None:
|
||||
return self._print_progress(document, error, text, int(percentage))
|
||||
|
||||
def print_progress(
|
||||
self, document: Document, error: bool, untrusted_text: str, percentage: float
|
||||
) -> None:
|
||||
text = replace_control_chars(untrusted_text)
|
||||
return self.print_progress_trusted(
|
||||
document, error, "UNTRUSTED> " + text, percentage
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def get_max_parallel_conversions(self) -> int:
|
||||
pass
|
||||
|
|
|
@ -11,12 +11,7 @@ from typing import Any, List, Optional
|
|||
|
||||
from ..conversion import errors
|
||||
from ..document import Document
|
||||
from ..util import (
|
||||
get_resource_path,
|
||||
get_subprocess_startupinfo,
|
||||
get_tmp_dir,
|
||||
replace_control_chars,
|
||||
)
|
||||
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
|
||||
from .base import IsolationProvider
|
||||
|
||||
# Define startupinfo for subprocesses
|
||||
|
@ -147,29 +142,22 @@ class Container(IsolationProvider):
|
|||
if not type(val) == _type:
|
||||
raise ValueError("Status field has incorrect type")
|
||||
|
||||
def parse_progress(self, document: Document, untrusted_line: str) -> None:
|
||||
def parse_progress_trusted(self, document: Document, line: str) -> None:
|
||||
"""
|
||||
Parses a line returned by the container.
|
||||
"""
|
||||
try:
|
||||
untrusted_status = json.loads(untrusted_line)
|
||||
|
||||
text = untrusted_status["text"]
|
||||
status = json.loads(line)
|
||||
text = status["text"]
|
||||
self.assert_field_type(text, str)
|
||||
|
||||
error = untrusted_status["error"]
|
||||
error = status["error"]
|
||||
self.assert_field_type(error, bool)
|
||||
|
||||
percentage = untrusted_status["percentage"]
|
||||
self.assert_field_type(percentage, int)
|
||||
|
||||
percentage = status["percentage"]
|
||||
self.assert_field_type(percentage, float)
|
||||
self.print_progress(document, error, text, percentage)
|
||||
except Exception:
|
||||
line = replace_control_chars(untrusted_line)
|
||||
error_message = (
|
||||
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}"
|
||||
)
|
||||
self.print_progress_trusted(document, True, error_message, -1)
|
||||
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
||||
self.print_progress(document, True, error_message, -1)
|
||||
|
||||
def exec(
|
||||
self,
|
||||
|
@ -243,8 +231,9 @@ class Container(IsolationProvider):
|
|||
]
|
||||
|
||||
pixels_to_pdf_proc = self.exec_container(command, extra_args)
|
||||
for line in pixels_to_pdf_proc.stdout:
|
||||
self.parse_progress(document, line)
|
||||
if pixels_to_pdf_proc.stdout:
|
||||
for line in pixels_to_pdf_proc.stdout:
|
||||
self.parse_progress_trusted(document, line.decode())
|
||||
error_code = pixels_to_pdf_proc.wait()
|
||||
if error_code != 0:
|
||||
log.error("pixels-to-pdf failed")
|
||||
|
|
|
@ -36,7 +36,7 @@ class Qubes(IsolationProvider):
|
|||
self, document: Document, tempdir: str, ocr_lang: Optional[str]
|
||||
) -> None:
|
||||
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
|
||||
self.print_progress_trusted(document, error, text, percentage)
|
||||
self.print_progress(document, error, text, percentage)
|
||||
|
||||
converter = PixelsToPDF(progress_callback=print_progress_wrapper)
|
||||
try:
|
||||
|
|
|
@ -18,46 +18,6 @@ from .. import pdf_11k_pages, sanitized_text, uncommon_text
|
|||
)
|
||||
@pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system")
|
||||
class IsolationProviderTest:
|
||||
def test_print_progress(
|
||||
self,
|
||||
provider: base.IsolationProvider,
|
||||
uncommon_text: str,
|
||||
sanitized_text: str,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Test that the print_progress() method of our isolation providers sanitizes text.
|
||||
|
||||
Iterate our isolation providers and make sure that their print_progress() methods
|
||||
sanitizes the provided text, before passing it to the logging functions and other
|
||||
callbacks.
|
||||
"""
|
||||
d = Document()
|
||||
provider.progress_callback = mocker.MagicMock()
|
||||
log_info_spy = mocker.spy(base.log, "info")
|
||||
log_error_spy = mocker.spy(base.log, "error")
|
||||
_print_progress_spy = mocker.spy(provider, "_print_progress")
|
||||
|
||||
for error, untrusted_text, sanitized_text in [
|
||||
(True, "normal text", "UNTRUSTED> normal text"),
|
||||
(False, "normal text", "UNTRUSTED> normal text"),
|
||||
(True, uncommon_text, "UNTRUSTED> " + sanitized_text),
|
||||
(False, uncommon_text, "UNTRUSTED> " + sanitized_text),
|
||||
]:
|
||||
log_info_spy.reset_mock()
|
||||
log_error_spy.reset_mock()
|
||||
|
||||
provider.print_progress(d, error, untrusted_text, 0)
|
||||
provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr]
|
||||
_print_progress_spy.assert_called_with(d, error, sanitized_text, 0)
|
||||
if error:
|
||||
assert log_error_spy.call_args[0][0].endswith(
|
||||
sanitized_text + Style.RESET_ALL
|
||||
)
|
||||
log_info_spy.assert_not_called()
|
||||
else:
|
||||
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
|
||||
log_error_spy.assert_not_called()
|
||||
|
||||
def test_max_pages_received(
|
||||
self,
|
||||
pdf_11k_pages: str,
|
||||
|
|
|
@ -19,77 +19,4 @@ def provider() -> Container:
|
|||
|
||||
|
||||
class TestContainer(IsolationProviderTest):
|
||||
def test_parse_progress(
|
||||
self,
|
||||
provider: Container,
|
||||
uncommon_text: str,
|
||||
sanitized_text: str,
|
||||
mocker: MockerFixture,
|
||||
) -> None:
|
||||
"""Test that the `parse_progress()` function handles escape sequences properly."""
|
||||
container = Container(enable_timeouts=False)
|
||||
container.progress_callback = mocker.MagicMock()
|
||||
print_progress_mock = mocker.patch.object(container, "_print_progress")
|
||||
d = Document()
|
||||
|
||||
# Test 1 - Check that normal JSON values are printed as is.
|
||||
simple_json = json.dumps({"text": "text", "error": False, "percentage": 0})
|
||||
container.parse_progress(d, simple_json)
|
||||
print_progress_mock.assert_called_with(d, False, "UNTRUSTED> text", 0)
|
||||
|
||||
# Test 2 - Check that a totally invalid string is reported as a failure. If this
|
||||
# string contains escape characters, they should be sanitized as well.
|
||||
def assert_invalid_json(text: str) -> None:
|
||||
print_progress_mock.assert_called_with(
|
||||
d,
|
||||
True,
|
||||
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {text}",
|
||||
-1,
|
||||
)
|
||||
|
||||
# Try first with a trivially invalid string.
|
||||
invalid_json = "()"
|
||||
container.parse_progress(d, invalid_json)
|
||||
assert_invalid_json(invalid_json)
|
||||
|
||||
# Try next with an invalid string that contains uncommon text.
|
||||
container.parse_progress(d, uncommon_text)
|
||||
assert_invalid_json(sanitized_text)
|
||||
|
||||
# Test 3 - Check that a structurally valid JSON value with escape characters in it
|
||||
# is sanitized.
|
||||
valid_json = json.dumps(
|
||||
{"text": uncommon_text, "error": False, "percentage": 0}
|
||||
)
|
||||
sanitized_json = json.dumps(
|
||||
{"text": sanitized_text, "error": False, "percentage": 0}
|
||||
)
|
||||
container.parse_progress(d, valid_json)
|
||||
print_progress_mock.assert_called_with(
|
||||
d, False, "UNTRUSTED> " + sanitized_text, 0
|
||||
)
|
||||
|
||||
# Test 4 - Check that a structurally valid JSON, that otherwise does not have the
|
||||
# expected keys, or the expected value types, is reported as an error, and any
|
||||
# escape sequences are sanitized.
|
||||
|
||||
keys = ["text", "error", "percentage", uncommon_text]
|
||||
values = [uncommon_text, False, 0, None]
|
||||
possible_kvs = list(itertools.product(keys, values, repeat=1))
|
||||
|
||||
# Based on the above keys and values, create any combination possible, from 0 to 3
|
||||
# elements. Take extra care to:
|
||||
#
|
||||
# * Remove combinations with non-unique keys.
|
||||
# * Ignore the sole valid combination (see `valid_json`), since we have already
|
||||
# tested it above.
|
||||
for i in range(len(keys)):
|
||||
for combo in itertools.combinations(possible_kvs, i):
|
||||
dict_combo: Dict[str, Any] = dict(combo) # type: ignore [arg-type]
|
||||
if len(combo) == len(dict_combo.keys()):
|
||||
bad_json = json.dumps(dict_combo)
|
||||
sanitized_json = bad_json.replace(uncommon_text, sanitized_text)
|
||||
if bad_json == valid_json:
|
||||
continue
|
||||
container.parse_progress(d, bad_json)
|
||||
assert_invalid_json(sanitized_json)
|
||||
pass
|
||||
|
|
Loading…
Reference in a new issue