mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Remove untrusted progress parsing (stderr instead)
Now that only the second container can send JSON-encoded progress information, we can the untrusted JSON parsing. The parse_progress was also renamed to `parse_progress_trusted` to ensure future developers don't mistake this as a safe method. The old methods for sending untrusted JSON were repurposed to send the progress instead to stderr for troubleshooting in development mode. Fixes #456
This commit is contained in:
parent
c991e530d0
commit
550786adfe
8 changed files with 27 additions and 160 deletions
|
@ -192,5 +192,6 @@ class DangerzoneConverter:
|
||||||
async def convert(self) -> None:
|
async def convert(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def update_progress(self, text: str, *, error: bool = False) -> None:
|
@abstractmethod
|
||||||
|
def update_progress(self, text: str) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -35,6 +35,9 @@ class DocumentToPixels(DangerzoneConverter):
|
||||||
async def write_page_data(self, data: bytes) -> None:
|
async def write_page_data(self, data: bytes) -> None:
|
||||||
return await self.write_bytes(data)
|
return await self.write_bytes(data)
|
||||||
|
|
||||||
|
def update_progress(self, text: str, *, error: bool = False) -> None:
|
||||||
|
print(text, file=sys.stderr)
|
||||||
|
|
||||||
async def convert(self) -> None:
|
async def convert(self) -> None:
|
||||||
conversions: Dict[str, Dict[str, Optional[str]]] = {
|
conversions: Dict[str, Dict[str, Optional[str]]] = {
|
||||||
# .pdf
|
# .pdf
|
||||||
|
|
|
@ -93,11 +93,11 @@ class PixelsToPDF(DangerzoneConverter):
|
||||||
def update_progress(self, text: str, *, error: bool = False) -> None:
|
def update_progress(self, text: str, *, error: bool = False) -> None:
|
||||||
if running_on_qubes():
|
if running_on_qubes():
|
||||||
if self.progress_callback:
|
if self.progress_callback:
|
||||||
self.progress_callback(error, text, int(self.percentage))
|
self.progress_callback(error, text, self.percentage)
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
json.dumps(
|
json.dumps(
|
||||||
{"error": error, "text": text, "percentage": int(self.percentage)}
|
{"error": error, "text": text, "percentage": self.percentage}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
|
@ -87,13 +87,13 @@ class IsolationProvider(ABC):
|
||||||
exception = errors.exception_from_error_code(error_code)
|
exception = errors.exception_from_error_code(error_code)
|
||||||
document.mark_as_failed()
|
document.mark_as_failed()
|
||||||
except errors.ConversionException as e:
|
except errors.ConversionException as e:
|
||||||
self.print_progress_trusted(document, True, str(e), 0)
|
self.print_progress(document, True, str(e), 0)
|
||||||
document.mark_as_failed()
|
document.mark_as_failed()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(
|
log.exception(
|
||||||
f"An exception occurred while converting document '{document.id}'"
|
f"An exception occurred while converting document '{document.id}'"
|
||||||
)
|
)
|
||||||
self.print_progress_trusted(document, True, str(e), 0)
|
self.print_progress(document, True, str(e), 0)
|
||||||
document.mark_as_failed()
|
document.mark_as_failed()
|
||||||
|
|
||||||
def doc_to_pixels(self, document: Document, tempdir: str) -> None:
|
def doc_to_pixels(self, document: Document, tempdir: str) -> None:
|
||||||
|
@ -125,7 +125,7 @@ class IsolationProvider(ABC):
|
||||||
sw.start()
|
sw.start()
|
||||||
for page in range(1, n_pages + 1):
|
for page in range(1, n_pages + 1):
|
||||||
text = f"Converting page {page}/{n_pages} to pixels"
|
text = f"Converting page {page}/{n_pages} to pixels"
|
||||||
self.print_progress_trusted(document, False, text, percentage)
|
self.print_progress(document, False, text, percentage)
|
||||||
|
|
||||||
width = read_int(self.proc.stdout, timeout=sw.remaining)
|
width = read_int(self.proc.stdout, timeout=sw.remaining)
|
||||||
height = read_int(self.proc.stdout, timeout=sw.remaining)
|
height = read_int(self.proc.stdout, timeout=sw.remaining)
|
||||||
|
@ -156,7 +156,7 @@ class IsolationProvider(ABC):
|
||||||
|
|
||||||
# TODO handle leftover code input
|
# TODO handle leftover code input
|
||||||
text = "Converted document to pixels"
|
text = "Converted document to pixels"
|
||||||
self.print_progress_trusted(document, False, text, percentage)
|
self.print_progress(document, False, text, percentage)
|
||||||
|
|
||||||
if getattr(sys, "dangerzone_dev", False):
|
if getattr(sys, "dangerzone_dev", False):
|
||||||
assert self.proc.stderr is not None
|
assert self.proc.stderr is not None
|
||||||
|
@ -173,11 +173,11 @@ class IsolationProvider(ABC):
|
||||||
) -> None:
|
) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _print_progress(
|
def print_progress(
|
||||||
self, document: Document, error: bool, text: str, percentage: float
|
self, document: Document, error: bool, text: str, percentage: float
|
||||||
) -> None:
|
) -> None:
|
||||||
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
|
||||||
s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL
|
s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL
|
||||||
if error:
|
if error:
|
||||||
s += Fore.RED + text + Style.RESET_ALL
|
s += Fore.RED + text + Style.RESET_ALL
|
||||||
log.error(s)
|
log.error(s)
|
||||||
|
@ -188,19 +188,6 @@ class IsolationProvider(ABC):
|
||||||
if self.progress_callback:
|
if self.progress_callback:
|
||||||
self.progress_callback(error, text, percentage)
|
self.progress_callback(error, text, percentage)
|
||||||
|
|
||||||
def print_progress_trusted(
|
|
||||||
self, document: Document, error: bool, text: str, percentage: float
|
|
||||||
) -> None:
|
|
||||||
return self._print_progress(document, error, text, int(percentage))
|
|
||||||
|
|
||||||
def print_progress(
|
|
||||||
self, document: Document, error: bool, untrusted_text: str, percentage: float
|
|
||||||
) -> None:
|
|
||||||
text = replace_control_chars(untrusted_text)
|
|
||||||
return self.print_progress_trusted(
|
|
||||||
document, error, "UNTRUSTED> " + text, percentage
|
|
||||||
)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_max_parallel_conversions(self) -> int:
|
def get_max_parallel_conversions(self) -> int:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -11,12 +11,7 @@ from typing import Any, List, Optional
|
||||||
|
|
||||||
from ..conversion import errors
|
from ..conversion import errors
|
||||||
from ..document import Document
|
from ..document import Document
|
||||||
from ..util import (
|
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
|
||||||
get_resource_path,
|
|
||||||
get_subprocess_startupinfo,
|
|
||||||
get_tmp_dir,
|
|
||||||
replace_control_chars,
|
|
||||||
)
|
|
||||||
from .base import IsolationProvider
|
from .base import IsolationProvider
|
||||||
|
|
||||||
# Define startupinfo for subprocesses
|
# Define startupinfo for subprocesses
|
||||||
|
@ -147,29 +142,22 @@ class Container(IsolationProvider):
|
||||||
if not type(val) == _type:
|
if not type(val) == _type:
|
||||||
raise ValueError("Status field has incorrect type")
|
raise ValueError("Status field has incorrect type")
|
||||||
|
|
||||||
def parse_progress(self, document: Document, untrusted_line: str) -> None:
|
def parse_progress_trusted(self, document: Document, line: str) -> None:
|
||||||
"""
|
"""
|
||||||
Parses a line returned by the container.
|
Parses a line returned by the container.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
untrusted_status = json.loads(untrusted_line)
|
status = json.loads(line)
|
||||||
|
text = status["text"]
|
||||||
text = untrusted_status["text"]
|
|
||||||
self.assert_field_type(text, str)
|
self.assert_field_type(text, str)
|
||||||
|
error = status["error"]
|
||||||
error = untrusted_status["error"]
|
|
||||||
self.assert_field_type(error, bool)
|
self.assert_field_type(error, bool)
|
||||||
|
percentage = status["percentage"]
|
||||||
percentage = untrusted_status["percentage"]
|
self.assert_field_type(percentage, float)
|
||||||
self.assert_field_type(percentage, int)
|
|
||||||
|
|
||||||
self.print_progress(document, error, text, percentage)
|
self.print_progress(document, error, text, percentage)
|
||||||
except Exception:
|
except Exception:
|
||||||
line = replace_control_chars(untrusted_line)
|
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
|
||||||
error_message = (
|
self.print_progress(document, True, error_message, -1)
|
||||||
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}"
|
|
||||||
)
|
|
||||||
self.print_progress_trusted(document, True, error_message, -1)
|
|
||||||
|
|
||||||
def exec(
|
def exec(
|
||||||
self,
|
self,
|
||||||
|
@ -243,8 +231,9 @@ class Container(IsolationProvider):
|
||||||
]
|
]
|
||||||
|
|
||||||
pixels_to_pdf_proc = self.exec_container(command, extra_args)
|
pixels_to_pdf_proc = self.exec_container(command, extra_args)
|
||||||
for line in pixels_to_pdf_proc.stdout:
|
if pixels_to_pdf_proc.stdout:
|
||||||
self.parse_progress(document, line)
|
for line in pixels_to_pdf_proc.stdout:
|
||||||
|
self.parse_progress_trusted(document, line.decode())
|
||||||
error_code = pixels_to_pdf_proc.wait()
|
error_code = pixels_to_pdf_proc.wait()
|
||||||
if error_code != 0:
|
if error_code != 0:
|
||||||
log.error("pixels-to-pdf failed")
|
log.error("pixels-to-pdf failed")
|
||||||
|
|
|
@ -36,7 +36,7 @@ class Qubes(IsolationProvider):
|
||||||
self, document: Document, tempdir: str, ocr_lang: Optional[str]
|
self, document: Document, tempdir: str, ocr_lang: Optional[str]
|
||||||
) -> None:
|
) -> None:
|
||||||
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
|
def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
|
||||||
self.print_progress_trusted(document, error, text, percentage)
|
self.print_progress(document, error, text, percentage)
|
||||||
|
|
||||||
converter = PixelsToPDF(progress_callback=print_progress_wrapper)
|
converter = PixelsToPDF(progress_callback=print_progress_wrapper)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -18,46 +18,6 @@ from .. import pdf_11k_pages, sanitized_text, uncommon_text
|
||||||
)
|
)
|
||||||
@pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system")
|
@pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system")
|
||||||
class IsolationProviderTest:
|
class IsolationProviderTest:
|
||||||
def test_print_progress(
|
|
||||||
self,
|
|
||||||
provider: base.IsolationProvider,
|
|
||||||
uncommon_text: str,
|
|
||||||
sanitized_text: str,
|
|
||||||
mocker: MockerFixture,
|
|
||||||
) -> None:
|
|
||||||
"""Test that the print_progress() method of our isolation providers sanitizes text.
|
|
||||||
|
|
||||||
Iterate our isolation providers and make sure that their print_progress() methods
|
|
||||||
sanitizes the provided text, before passing it to the logging functions and other
|
|
||||||
callbacks.
|
|
||||||
"""
|
|
||||||
d = Document()
|
|
||||||
provider.progress_callback = mocker.MagicMock()
|
|
||||||
log_info_spy = mocker.spy(base.log, "info")
|
|
||||||
log_error_spy = mocker.spy(base.log, "error")
|
|
||||||
_print_progress_spy = mocker.spy(provider, "_print_progress")
|
|
||||||
|
|
||||||
for error, untrusted_text, sanitized_text in [
|
|
||||||
(True, "normal text", "UNTRUSTED> normal text"),
|
|
||||||
(False, "normal text", "UNTRUSTED> normal text"),
|
|
||||||
(True, uncommon_text, "UNTRUSTED> " + sanitized_text),
|
|
||||||
(False, uncommon_text, "UNTRUSTED> " + sanitized_text),
|
|
||||||
]:
|
|
||||||
log_info_spy.reset_mock()
|
|
||||||
log_error_spy.reset_mock()
|
|
||||||
|
|
||||||
provider.print_progress(d, error, untrusted_text, 0)
|
|
||||||
provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr]
|
|
||||||
_print_progress_spy.assert_called_with(d, error, sanitized_text, 0)
|
|
||||||
if error:
|
|
||||||
assert log_error_spy.call_args[0][0].endswith(
|
|
||||||
sanitized_text + Style.RESET_ALL
|
|
||||||
)
|
|
||||||
log_info_spy.assert_not_called()
|
|
||||||
else:
|
|
||||||
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
|
|
||||||
log_error_spy.assert_not_called()
|
|
||||||
|
|
||||||
def test_max_pages_received(
|
def test_max_pages_received(
|
||||||
self,
|
self,
|
||||||
pdf_11k_pages: str,
|
pdf_11k_pages: str,
|
||||||
|
|
|
@ -19,77 +19,4 @@ def provider() -> Container:
|
||||||
|
|
||||||
|
|
||||||
class TestContainer(IsolationProviderTest):
|
class TestContainer(IsolationProviderTest):
|
||||||
def test_parse_progress(
|
pass
|
||||||
self,
|
|
||||||
provider: Container,
|
|
||||||
uncommon_text: str,
|
|
||||||
sanitized_text: str,
|
|
||||||
mocker: MockerFixture,
|
|
||||||
) -> None:
|
|
||||||
"""Test that the `parse_progress()` function handles escape sequences properly."""
|
|
||||||
container = Container(enable_timeouts=False)
|
|
||||||
container.progress_callback = mocker.MagicMock()
|
|
||||||
print_progress_mock = mocker.patch.object(container, "_print_progress")
|
|
||||||
d = Document()
|
|
||||||
|
|
||||||
# Test 1 - Check that normal JSON values are printed as is.
|
|
||||||
simple_json = json.dumps({"text": "text", "error": False, "percentage": 0})
|
|
||||||
container.parse_progress(d, simple_json)
|
|
||||||
print_progress_mock.assert_called_with(d, False, "UNTRUSTED> text", 0)
|
|
||||||
|
|
||||||
# Test 2 - Check that a totally invalid string is reported as a failure. If this
|
|
||||||
# string contains escape characters, they should be sanitized as well.
|
|
||||||
def assert_invalid_json(text: str) -> None:
|
|
||||||
print_progress_mock.assert_called_with(
|
|
||||||
d,
|
|
||||||
True,
|
|
||||||
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {text}",
|
|
||||||
-1,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Try first with a trivially invalid string.
|
|
||||||
invalid_json = "()"
|
|
||||||
container.parse_progress(d, invalid_json)
|
|
||||||
assert_invalid_json(invalid_json)
|
|
||||||
|
|
||||||
# Try next with an invalid string that contains uncommon text.
|
|
||||||
container.parse_progress(d, uncommon_text)
|
|
||||||
assert_invalid_json(sanitized_text)
|
|
||||||
|
|
||||||
# Test 3 - Check that a structurally valid JSON value with escape characters in it
|
|
||||||
# is sanitized.
|
|
||||||
valid_json = json.dumps(
|
|
||||||
{"text": uncommon_text, "error": False, "percentage": 0}
|
|
||||||
)
|
|
||||||
sanitized_json = json.dumps(
|
|
||||||
{"text": sanitized_text, "error": False, "percentage": 0}
|
|
||||||
)
|
|
||||||
container.parse_progress(d, valid_json)
|
|
||||||
print_progress_mock.assert_called_with(
|
|
||||||
d, False, "UNTRUSTED> " + sanitized_text, 0
|
|
||||||
)
|
|
||||||
|
|
||||||
# Test 4 - Check that a structurally valid JSON, that otherwise does not have the
|
|
||||||
# expected keys, or the expected value types, is reported as an error, and any
|
|
||||||
# escape sequences are sanitized.
|
|
||||||
|
|
||||||
keys = ["text", "error", "percentage", uncommon_text]
|
|
||||||
values = [uncommon_text, False, 0, None]
|
|
||||||
possible_kvs = list(itertools.product(keys, values, repeat=1))
|
|
||||||
|
|
||||||
# Based on the above keys and values, create any combination possible, from 0 to 3
|
|
||||||
# elements. Take extra care to:
|
|
||||||
#
|
|
||||||
# * Remove combinations with non-unique keys.
|
|
||||||
# * Ignore the sole valid combination (see `valid_json`), since we have already
|
|
||||||
# tested it above.
|
|
||||||
for i in range(len(keys)):
|
|
||||||
for combo in itertools.combinations(possible_kvs, i):
|
|
||||||
dict_combo: Dict[str, Any] = dict(combo) # type: ignore [arg-type]
|
|
||||||
if len(combo) == len(dict_combo.keys()):
|
|
||||||
bad_json = json.dumps(dict_combo)
|
|
||||||
sanitized_json = bad_json.replace(uncommon_text, sanitized_text)
|
|
||||||
if bad_json == valid_json:
|
|
||||||
continue
|
|
||||||
container.parse_progress(d, bad_json)
|
|
||||||
assert_invalid_json(sanitized_json)
|
|
||||||
|
|
Loading…
Reference in a new issue