mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Keep newlines when reading debug logs
In d632908a44
we improved our
`replace_control_chars()` function, by replacing every control or
invalid Unicode character with a placeholder one. This change, however,
made our debug logs harder to read, since newlines were not preserved.
There are indeed various cases in which replacing newlines is wise
(e.g., in filenames), so we should keep this behavior by default.
However, specifically for reading debug logs, we add an option to keep
newlines to improve readability, at no expense to security.
This commit is contained in:
parent
e11aaec3ac
commit
0b45360384
3 changed files with 15 additions and 4 deletions
|
@ -45,9 +45,9 @@ def read_int(f: IO[bytes]) -> int:
|
|||
|
||||
|
||||
def read_debug_text(f: IO[bytes], size: int) -> str:
|
||||
"""Read arbitrarily long text (for debug purposes)"""
|
||||
"""Read arbitrarily long text (for debug purposes), and sanitize it."""
|
||||
untrusted_text = f.read(size).decode("ascii", errors="replace")
|
||||
return replace_control_chars(untrusted_text)
|
||||
return replace_control_chars(untrusted_text, keep_newlines=True)
|
||||
|
||||
|
||||
class IsolationProvider(ABC):
|
||||
|
|
|
@ -66,11 +66,14 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
|
|||
return None
|
||||
|
||||
|
||||
def replace_control_chars(untrusted_str: str) -> str:
|
||||
def replace_control_chars(untrusted_str: str, keep_newlines: bool = False) -> str:
|
||||
"""Remove control characters from string. Protects a terminal emulator
|
||||
from obscure control characters.
|
||||
|
||||
Control characters are replaced by <EFBFBD> U+FFFD Replacement Character.
|
||||
|
||||
If a user wants to keep the newline character (e.g., because they are sanitizing a
|
||||
multi-line text), they must pass `keep_newlines=True`.
|
||||
"""
|
||||
|
||||
def is_safe(chr: str) -> bool:
|
||||
|
@ -90,5 +93,8 @@ def replace_control_chars(untrusted_str: str) -> str:
|
|||
|
||||
sanitized_str = ""
|
||||
for char in untrusted_str:
|
||||
sanitized_str += char if is_safe(char) else "<EFBFBD>"
|
||||
if (keep_newlines and char == "\n") or is_safe(char):
|
||||
sanitized_str += char
|
||||
else:
|
||||
sanitized_str += "<EFBFBD>"
|
||||
return sanitized_str
|
||||
|
|
|
@ -30,3 +30,8 @@ def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
|
|||
assert util.replace_control_chars(uncommon_text) == sanitized_text
|
||||
assert util.replace_control_chars("normal text") == "normal text"
|
||||
assert util.replace_control_chars("") == ""
|
||||
assert util.replace_control_chars("multi-line\ntext") == "multi-line<6E>text"
|
||||
assert (
|
||||
util.replace_control_chars("multi-line\ntext", keep_newlines=True)
|
||||
== "multi-line\ntext"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue