Keep newlines when reading debug logs

In d632908a44 we improved our `replace_control_chars()` function, by replacing every control or invalid Unicode character with a placeholder one. This change, however, made our debug logs harder to read, since newlines were not preserved. There are indeed various cases in which replacing newlines is wise (e.g., in filenames), so we should keep this behavior by default. However, specifically for reading debug logs, we add an option to keep newlines to improve readability, at no expense to security.
2025-04-28 18:02:38 +02:00 · 2024-04-30 13:32:02 +03:00 · 2024-04-30 13:32:02 +03:00 · 0b45360384
commit 0b45360384
parent e11aaec3ac
3 changed files with 15 additions and 4 deletions
--- a/dangerzone/isolation_provider/base.py
+++ b/dangerzone/isolation_provider/base.py
@ -45,9 +45,9 @@ def read_int(f: IO[bytes]) -> int:


 def read_debug_text(f: IO[bytes], size: int) -> str:
-    """Read arbitrarily long text (for debug purposes)"""
+    """Read arbitrarily long text (for debug purposes), and sanitize it."""
    untrusted_text = f.read(size).decode("ascii", errors="replace")
-    return replace_control_chars(untrusted_text)
+    return replace_control_chars(untrusted_text, keep_newlines=True)


 class IsolationProvider(ABC):
--- a/dangerzone/util.py
+++ b/dangerzone/util.py
@ -66,11 +66,14 @@ def get_subprocess_startupinfo():  # type: ignore [no-untyped-def]
        return None


-def replace_control_chars(untrusted_str: str) -> str:
+def replace_control_chars(untrusted_str: str, keep_newlines: bool = False) -> str:
    """Remove control characters from string. Protects a terminal emulator
    from obscure control characters.

    Control characters are replaced by <EFBFBD> U+FFFD Replacement Character.
+
+    If a user wants to keep the newline character (e.g., because they are sanitizing a
+    multi-line text), they must pass `keep_newlines=True`.
    """

    def is_safe(chr: str) -> bool:
@ -90,5 +93,8 @@ def replace_control_chars(untrusted_str: str) -> str:

    sanitized_str = ""
    for char in untrusted_str:
-        sanitized_str += char if is_safe(char) else "<EFBFBD>"
+        if (keep_newlines and char == "\n") or is_safe(char):
+            sanitized_str += char
+        else:
+            sanitized_str += "<EFBFBD>"
    return sanitized_str
--- a/tests/test_util.py
+++ b/tests/test_util.py
@ -30,3 +30,8 @@ def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
    assert util.replace_control_chars(uncommon_text) == sanitized_text
    assert util.replace_control_chars("normal text") == "normal text"
    assert util.replace_control_chars("") == ""
+    assert util.replace_control_chars("multi-line\ntext") == "multi-line<6E>text"
+    assert (
+        util.replace_control_chars("multi-line\ntext", keep_newlines=True)
+        == "multi-line\ntext"
+    )