From 3788139d26556b39896de329e6a2f2b2b1a2d406 Mon Sep 17 00:00:00 2001 From: deeplow Date: Fri, 28 Jul 2023 18:55:58 +0300 Subject: [PATCH] Add utility for sanitizing strings Add `replace_control_chars()` function in `util.py`, which can be used to sanitize strings from ANSI escape sequences or weird Unicode symbols. --- dangerzone/util.py | 10 ++++++++++ tests/__init__.py | 21 +++++++++++++++++++++ tests/test_util.py | 11 ++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/dangerzone/util.py b/dangerzone/util.py index c9b0be0..dc188f2 100644 --- a/dangerzone/util.py +++ b/dangerzone/util.py @@ -1,5 +1,6 @@ import pathlib import platform +import string import subprocess import sys from typing import Optional @@ -62,3 +63,12 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def] return startupinfo else: return None + + +def replace_control_chars(untrusted_str: str) -> str: + """Remove control characters from string. Protects a terminal emulator + from obcure control characters""" + sanitized_str = "" + for char in untrusted_str: + sanitized_str += char if char in string.printable else "_" + return sanitized_str diff --git a/tests/__init__.py b/tests/__init__.py index 32168ff..03bbb06 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -37,3 +37,24 @@ def unreadable_pdf(tmp_path: Path) -> str: file_path = tmp_path / "document.pdf" file_path.touch(mode=0o000) return str(file_path) + + +@pytest.fixture +def uncommon_text() -> str: + """Craft a string with Unicode characters that are considered not common. + + Create a string that contains the following uncommon characters: + + * ANSI escape sequences: \033[31;1;4m and \033[0m + * A Unicode character that resembles an English character: greek "X" (U+03A7) + * A Unicode control character that is not part of ASCII: zero-width joiner + (U+200D) + * An emoji: Cross Mark (U+274C) + """ + return "\033[31;1;4m BaD TeΧt \u200d ❌ \033[0m" + + +@pytest.fixture +def sanitized_text() -> str: + """Return a sanitized version of the uncommon_text.""" + return "_[31;1;4m BaD Te_t _ _ _[0m" diff --git a/tests/test_util.py b/tests/test_util.py index e0cf2b6..04eecb2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -4,7 +4,9 @@ from pathlib import Path import pytest -import dangerzone.util as util +from dangerzone import util + +from . import sanitized_text, uncommon_text VERSION_FILE_NAME = "version.txt" @@ -21,3 +23,10 @@ def test_get_resource_path() -> None: def test_get_subprocess_startupinfo() -> None: startupinfo = util.get_subprocess_startupinfo() assert isinstance(startupinfo, subprocess.STARTUPINFO) # type: ignore[attr-defined] + + +def test_replace_control_chars(uncommon_text: str, sanitized_text: str) -> None: + """Test that the replace_control_chars() function works properly.""" + assert util.replace_control_chars(uncommon_text) == sanitized_text + assert util.replace_control_chars("normal text") == "normal text" + assert util.replace_control_chars("") == ""