Keep newlines when reading debug logs

In d632908a44 we improved our
`replace_control_chars()` function, by replacing every control or
invalid Unicode character with a placeholder one. This change, however,
made our debug logs harder to read, since newlines were not preserved.

There are indeed various cases in which replacing newlines is wise
(e.g., in filenames), so we should keep this behavior by default.
However, specifically for reading debug logs, we add an option to keep
newlines to improve readability, at no expense to security.
This commit is contained in:
Alex Pyrgiotis 2024-04-30 13:32:02 +03:00
parent e11aaec3ac
commit 0b45360384
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
3 changed files with 15 additions and 4 deletions

View file

@ -45,9 +45,9 @@ def read_int(f: IO[bytes]) -> int:
def read_debug_text(f: IO[bytes], size: int) -> str:
"""Read arbitrarily long text (for debug purposes)"""
"""Read arbitrarily long text (for debug purposes), and sanitize it."""
untrusted_text = f.read(size).decode("ascii", errors="replace")
return replace_control_chars(untrusted_text)
return replace_control_chars(untrusted_text, keep_newlines=True)
class IsolationProvider(ABC):

View file

@ -66,11 +66,14 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
return None
def replace_control_chars(untrusted_str: str) -> str:
def replace_control_chars(untrusted_str: str, keep_newlines: bool = False) -> str:
"""Remove control characters from string. Protects a terminal emulator
from obscure control characters.
Control characters are replaced by <EFBFBD> U+FFFD Replacement Character.
If a user wants to keep the newline character (e.g., because they are sanitizing a
multi-line text), they must pass `keep_newlines=True`.
"""
def is_safe(chr: str) -> bool:
@ -90,5 +93,8 @@ def replace_control_chars(untrusted_str: str) -> str:
sanitized_str = ""
for char in untrusted_str:
sanitized_str += char if is_safe(char) else "<EFBFBD>"
if (keep_newlines and char == "\n") or is_safe(char):
sanitized_str += char
else:
sanitized_str += "<EFBFBD>"
return sanitized_str

View file

@ -30,3 +30,8 @@ def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
assert util.replace_control_chars(uncommon_text) == sanitized_text
assert util.replace_control_chars("normal text") == "normal text"
assert util.replace_control_chars("") == ""
assert util.replace_control_chars("multi-line\ntext") == "multi-line<6E>text"
assert (
util.replace_control_chars("multi-line\ntext", keep_newlines=True)
== "multi-line\ntext"
)