diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index e8c9d5c..6d4d3ff 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -45,9 +45,9 @@ def read_int(f: IO[bytes]) -> int: def read_debug_text(f: IO[bytes], size: int) -> str: - """Read arbitrarily long text (for debug purposes)""" + """Read arbitrarily long text (for debug purposes), and sanitize it.""" untrusted_text = f.read(size).decode("ascii", errors="replace") - return replace_control_chars(untrusted_text) + return replace_control_chars(untrusted_text, keep_newlines=True) class IsolationProvider(ABC): diff --git a/dangerzone/util.py b/dangerzone/util.py index 5f25da0..b56a2e8 100644 --- a/dangerzone/util.py +++ b/dangerzone/util.py @@ -66,11 +66,14 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def] return None -def replace_control_chars(untrusted_str: str) -> str: +def replace_control_chars(untrusted_str: str, keep_newlines: bool = False) -> str: """Remove control characters from string. Protects a terminal emulator from obscure control characters. Control characters are replaced by � U+FFFD Replacement Character. + + If a user wants to keep the newline character (e.g., because they are sanitizing a + multi-line text), they must pass `keep_newlines=True`. """ def is_safe(chr: str) -> bool: @@ -90,5 +93,8 @@ def replace_control_chars(untrusted_str: str) -> str: sanitized_str = "" for char in untrusted_str: - sanitized_str += char if is_safe(char) else "�" + if (keep_newlines and char == "\n") or is_safe(char): + sanitized_str += char + else: + sanitized_str += "�" return sanitized_str diff --git a/tests/test_util.py b/tests/test_util.py index 04eecb2..a4126f9 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -30,3 +30,8 @@ def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None: assert util.replace_control_chars(uncommon_text) == sanitized_text assert util.replace_control_chars("normal text") == "normal text" assert util.replace_control_chars("") == "" + assert util.replace_control_chars("multi-line\ntext") == "multi-line�text" + assert ( + util.replace_control_chars("multi-line\ntext", keep_newlines=True) + == "multi-line\ntext" + )