From 0b453603846889edfb7d24cf3bc90ddc177ece53 Mon Sep 17 00:00:00 2001 From: Alex Pyrgiotis Date: Tue, 30 Apr 2024 13:32:02 +0300 Subject: [PATCH] Keep newlines when reading debug logs In d632908a4478bc1587ef14ac28730dbf6df62e94 we improved our `replace_control_chars()` function, by replacing every control or invalid Unicode character with a placeholder one. This change, however, made our debug logs harder to read, since newlines were not preserved. There are indeed various cases in which replacing newlines is wise (e.g., in filenames), so we should keep this behavior by default. However, specifically for reading debug logs, we add an option to keep newlines to improve readability, at no expense to security. --- dangerzone/isolation_provider/base.py | 4 ++-- dangerzone/util.py | 10 ++++++++-- tests/test_util.py | 5 +++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index e8c9d5c..6d4d3ff 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -45,9 +45,9 @@ def read_int(f: IO[bytes]) -> int: def read_debug_text(f: IO[bytes], size: int) -> str: - """Read arbitrarily long text (for debug purposes)""" + """Read arbitrarily long text (for debug purposes), and sanitize it.""" untrusted_text = f.read(size).decode("ascii", errors="replace") - return replace_control_chars(untrusted_text) + return replace_control_chars(untrusted_text, keep_newlines=True) class IsolationProvider(ABC): diff --git a/dangerzone/util.py b/dangerzone/util.py index 5f25da0..b56a2e8 100644 --- a/dangerzone/util.py +++ b/dangerzone/util.py @@ -66,11 +66,14 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def] return None -def replace_control_chars(untrusted_str: str) -> str: +def replace_control_chars(untrusted_str: str, keep_newlines: bool = False) -> str: """Remove control characters from string. Protects a terminal emulator from obscure control characters. Control characters are replaced by � U+FFFD Replacement Character. + + If a user wants to keep the newline character (e.g., because they are sanitizing a + multi-line text), they must pass `keep_newlines=True`. """ def is_safe(chr: str) -> bool: @@ -90,5 +93,8 @@ def replace_control_chars(untrusted_str: str) -> str: sanitized_str = "" for char in untrusted_str: - sanitized_str += char if is_safe(char) else "�" + if (keep_newlines and char == "\n") or is_safe(char): + sanitized_str += char + else: + sanitized_str += "�" return sanitized_str diff --git a/tests/test_util.py b/tests/test_util.py index 04eecb2..a4126f9 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -30,3 +30,8 @@ def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None: assert util.replace_control_chars(uncommon_text) == sanitized_text assert util.replace_control_chars("normal text") == "normal text" assert util.replace_control_chars("") == "" + assert util.replace_control_chars("multi-line\ntext") == "multi-line�text" + assert ( + util.replace_control_chars("multi-line\ntext", keep_newlines=True) + == "multi-line\ntext" + )