mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00
Add utility for sanitizing strings
Add `replace_control_chars()` function in `util.py`, which can be used to sanitize strings from ANSI escape sequences or weird Unicode symbols.
This commit is contained in:
parent
cb08c198ad
commit
3788139d26
3 changed files with 41 additions and 1 deletions
|
@ -1,5 +1,6 @@
|
|||
import pathlib
|
||||
import platform
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
@ -62,3 +63,12 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
|
|||
return startupinfo
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def replace_control_chars(untrusted_str: str) -> str:
|
||||
"""Remove control characters from string. Protects a terminal emulator
|
||||
from obcure control characters"""
|
||||
sanitized_str = ""
|
||||
for char in untrusted_str:
|
||||
sanitized_str += char if char in string.printable else "_"
|
||||
return sanitized_str
|
||||
|
|
|
@ -37,3 +37,24 @@ def unreadable_pdf(tmp_path: Path) -> str:
|
|||
file_path = tmp_path / "document.pdf"
|
||||
file_path.touch(mode=0o000)
|
||||
return str(file_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def uncommon_text() -> str:
|
||||
"""Craft a string with Unicode characters that are considered not common.
|
||||
|
||||
Create a string that contains the following uncommon characters:
|
||||
|
||||
* ANSI escape sequences: \033[31;1;4m and \033[0m
|
||||
* A Unicode character that resembles an English character: greek "X" (U+03A7)
|
||||
* A Unicode control character that is not part of ASCII: zero-width joiner
|
||||
(U+200D)
|
||||
* An emoji: Cross Mark (U+274C)
|
||||
"""
|
||||
return "\033[31;1;4m BaD TeΧt \u200d ❌ \033[0m"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sanitized_text() -> str:
|
||||
"""Return a sanitized version of the uncommon_text."""
|
||||
return "_[31;1;4m BaD Te_t _ _ _[0m"
|
||||
|
|
|
@ -4,7 +4,9 @@ from pathlib import Path
|
|||
|
||||
import pytest
|
||||
|
||||
import dangerzone.util as util
|
||||
from dangerzone import util
|
||||
|
||||
from . import sanitized_text, uncommon_text
|
||||
|
||||
VERSION_FILE_NAME = "version.txt"
|
||||
|
||||
|
@ -21,3 +23,10 @@ def test_get_resource_path() -> None:
|
|||
def test_get_subprocess_startupinfo() -> None:
|
||||
startupinfo = util.get_subprocess_startupinfo()
|
||||
assert isinstance(startupinfo, subprocess.STARTUPINFO) # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
|
||||
"""Test that the replace_control_chars() function works properly."""
|
||||
assert util.replace_control_chars(uncommon_text) == sanitized_text
|
||||
assert util.replace_control_chars("normal text") == "normal text"
|
||||
assert util.replace_control_chars("") == ""
|
||||
|
|
Loading…
Reference in a new issue