Add utility for sanitizing strings

Add `replace_control_chars()` function in `util.py`, which can be used
to sanitize strings from ANSI escape sequences or weird Unicode symbols.
This commit is contained in:
deeplow 2023-07-28 18:55:58 +03:00 committed by Alex Pyrgiotis
parent cb08c198ad
commit 3788139d26
No known key found for this signature in database
GPG key ID: B6C15EBA0357C9AA
3 changed files with 41 additions and 1 deletions

View file

@ -1,5 +1,6 @@
import pathlib
import platform
import string
import subprocess
import sys
from typing import Optional
@ -62,3 +63,12 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
return startupinfo
else:
return None
def replace_control_chars(untrusted_str: str) -> str:
"""Remove control characters from string. Protects a terminal emulator
from obcure control characters"""
sanitized_str = ""
for char in untrusted_str:
sanitized_str += char if char in string.printable else "_"
return sanitized_str

View file

@ -37,3 +37,24 @@ def unreadable_pdf(tmp_path: Path) -> str:
file_path = tmp_path / "document.pdf"
file_path.touch(mode=0o000)
return str(file_path)
@pytest.fixture
def uncommon_text() -> str:
"""Craft a string with Unicode characters that are considered not common.
Create a string that contains the following uncommon characters:
* ANSI escape sequences: \033[31;1;4m and \033[0m
* A Unicode character that resembles an English character: greek "X" (U+03A7)
* A Unicode control character that is not part of ASCII: zero-width joiner
(U+200D)
* An emoji: Cross Mark (U+274C)
"""
return "\033[31;1;4m BaD TeΧt \u200d\033[0m"
@pytest.fixture
def sanitized_text() -> str:
"""Return a sanitized version of the uncommon_text."""
return "_[31;1;4m BaD Te_t _ _ _[0m"

View file

@ -4,7 +4,9 @@ from pathlib import Path
import pytest
import dangerzone.util as util
from dangerzone import util
from . import sanitized_text, uncommon_text
VERSION_FILE_NAME = "version.txt"
@ -21,3 +23,10 @@ def test_get_resource_path() -> None:
def test_get_subprocess_startupinfo() -> None:
startupinfo = util.get_subprocess_startupinfo()
assert isinstance(startupinfo, subprocess.STARTUPINFO) # type: ignore[attr-defined]
def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None:
"""Test that the replace_control_chars() function works properly."""
assert util.replace_control_chars(uncommon_text) == sanitized_text
assert util.replace_control_chars("normal text") == "normal text"
assert util.replace_control_chars("") == ""