mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-29 02:12:36 +02:00
Add utility for sanitizing strings
Add `replace_control_chars()` function in `util.py`, which can be used to sanitize strings from ANSI escape sequences or weird Unicode symbols.
This commit is contained in:
parent
cb08c198ad
commit
3788139d26
3 changed files with 41 additions and 1 deletions
|
@ -1,5 +1,6 @@
|
||||||
import pathlib
|
import pathlib
|
||||||
import platform
|
import platform
|
||||||
|
import string
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
@ -62,3 +63,12 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
|
||||||
return startupinfo
|
return startupinfo
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def replace_control_chars(untrusted_str: str) -> str:
|
||||||
|
"""Remove control characters from string. Protects a terminal emulator
|
||||||
|
from obcure control characters"""
|
||||||
|
sanitized_str = ""
|
||||||
|
for char in untrusted_str:
|
||||||
|
sanitized_str += char if char in string.printable else "_"
|
||||||
|
return sanitized_str
|
||||||
|
|
|
@ -37,3 +37,24 @@ def unreadable_pdf(tmp_path: Path) -> str:
|
||||||
file_path = tmp_path / "document.pdf"
|
file_path = tmp_path / "document.pdf"
|
||||||
file_path.touch(mode=0o000)
|
file_path.touch(mode=0o000)
|
||||||
return str(file_path)
|
return str(file_path)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def uncommon_text() -> str:
|
||||||
|
"""Craft a string with Unicode characters that are considered not common.
|
||||||
|
|
||||||
|
Create a string that contains the following uncommon characters:
|
||||||
|
|
||||||
|
* ANSI escape sequences: \033[31;1;4m and \033[0m
|
||||||
|
* A Unicode character that resembles an English character: greek "X" (U+03A7)
|
||||||
|
* A Unicode control character that is not part of ASCII: zero-width joiner
|
||||||
|
(U+200D)
|
||||||
|
* An emoji: Cross Mark (U+274C)
|
||||||
|
"""
|
||||||
|
return "\033[31;1;4m BaD TeΧt \u200d ❌ \033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sanitized_text() -> str:
|
||||||
|
"""Return a sanitized version of the uncommon_text."""
|
||||||
|
return "_[31;1;4m BaD Te_t _ _ _[0m"
|
||||||
|
|
|
@ -4,7 +4,9 @@ from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
import dangerzone.util as util
|
from dangerzone import util
|
||||||
|
|
||||||
|
from . import sanitized_text, uncommon_text
|
||||||
|
|
||||||
VERSION_FILE_NAME = "version.txt"
|
VERSION_FILE_NAME = "version.txt"
|
||||||
|
|
||||||
|
@ -21,3 +23,10 @@ def test_get_resource_path() -> None:
|
||||||
def test_get_subprocess_startupinfo() -> None:
|
def test_get_subprocess_startupinfo() -> None:
|
||||||
startupinfo = util.get_subprocess_startupinfo()
|
startupinfo = util.get_subprocess_startupinfo()
|
||||||
assert isinstance(startupinfo, subprocess.STARTUPINFO) # type: ignore[attr-defined]
|
assert isinstance(startupinfo, subprocess.STARTUPINFO) # type: ignore[attr-defined]
|
||||||
|
|
||||||
|
|
||||||
|
def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
|
||||||
|
"""Test that the replace_control_chars() function works properly."""
|
||||||
|
assert util.replace_control_chars(uncommon_text) == sanitized_text
|
||||||
|
assert util.replace_control_chars("normal text") == "normal text"
|
||||||
|
assert util.replace_control_chars("") == ""
|
||||||
|
|
Loading…
Reference in a new issue