mirror of
https://github.com/freedomofpress/dangerzone.git
synced 2025-04-28 18:02:38 +02:00

Add a pytest fixture that crafts a filename with Unicode characters that are not considered common for this use. By default, this fixture uses an invalid Unicode character as well, but we strip it in case of macOS (APFS) since filenames must be UTF-8 encoded. [1]: https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
140 lines
4.1 KiB
Python
140 lines
4.1 KiB
Python
import platform
|
||
import sys
|
||
import zipfile
|
||
from pathlib import Path
|
||
from typing import Callable, List
|
||
|
||
import pytest
|
||
|
||
from dangerzone.document import SAFE_EXTENSION
|
||
|
||
sys.dangerzone_dev = True # type: ignore[attr-defined]
|
||
|
||
|
||
SAMPLE_DIRECTORY = "test_docs"
|
||
BASIC_SAMPLE_PDF = "sample-pdf.pdf"
|
||
BASIC_SAMPLE_DOC = "sample-doc.doc"
|
||
SAMPLE_EXTERNAL_DIRECTORY = "test_docs_external"
|
||
SAMPLE_COMPRESSED_DIRECTORY = "test_docs_compressed"
|
||
|
||
test_docs_dir = Path(__file__).parent.joinpath(SAMPLE_DIRECTORY)
|
||
test_docs_compressed_dir = Path(__file__).parent.joinpath(SAMPLE_COMPRESSED_DIRECTORY)
|
||
|
||
test_docs = [
|
||
p
|
||
for p in test_docs_dir.rglob("*")
|
||
if p.is_file()
|
||
and not (p.name.endswith(SAFE_EXTENSION) or p.name.startswith("sample_bad"))
|
||
]
|
||
|
||
# Pytest parameter decorators
|
||
for_each_doc = pytest.mark.parametrize(
|
||
"doc", test_docs, ids=[str(doc.name) for doc in test_docs]
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_pdf() -> str:
|
||
return str(test_docs_dir.joinpath(BASIC_SAMPLE_PDF))
|
||
|
||
|
||
# External Docs - base64 docs encoded for externally sourced documents
|
||
# XXX to reduce the chance of accidentally opening them
|
||
test_docs_external_dir = Path(__file__).parent.joinpath(SAMPLE_EXTERNAL_DIRECTORY)
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_doc() -> str:
|
||
return str(test_docs_dir.joinpath(BASIC_SAMPLE_DOC))
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_bad_height() -> str:
|
||
return str(test_docs_dir.joinpath("sample_bad_max_height.pdf"))
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_bad_width() -> str:
|
||
return str(test_docs_dir.joinpath("sample_bad_max_width.pdf"))
|
||
|
||
|
||
def get_docs_external(pattern: str = "*") -> List[Path]:
|
||
if not pattern.endswith("*"):
|
||
pattern = f"{pattern}.b64"
|
||
return [
|
||
p
|
||
for p in test_docs_external_dir.rglob(pattern)
|
||
if p.is_file() and not (p.name.endswith(SAFE_EXTENSION))
|
||
]
|
||
|
||
|
||
# Pytest parameter decorators
|
||
def for_each_external_doc(glob_pattern: str = "*") -> Callable:
|
||
test_docs_external = get_docs_external(glob_pattern)
|
||
return pytest.mark.parametrize(
|
||
"doc",
|
||
test_docs_external,
|
||
ids=[str(doc.name).rstrip(".b64") for doc in test_docs_external],
|
||
)
|
||
|
||
|
||
class TestBase:
|
||
sample_doc = str(test_docs_dir.joinpath(BASIC_SAMPLE_PDF))
|
||
|
||
|
||
@pytest.fixture
|
||
def unreadable_pdf(tmp_path: Path) -> str:
|
||
file_path = tmp_path / "document.pdf"
|
||
file_path.touch(mode=0o000)
|
||
return str(file_path)
|
||
|
||
|
||
@pytest.fixture
|
||
def pdf_11k_pages(tmp_path: Path) -> str:
|
||
"""11K page document with pages of 1x1 px. Generated with the command:
|
||
|
||
gs -sDEVICE=pdfwrite -o sample-11k-pages.pdf -dDEVICEWIDTHPOINTS=1 -dDEVICEHEIGHTPOINTS=1 -c 11000 {showpage} repeat
|
||
"""
|
||
|
||
filename = "sample-11k-pages.pdf"
|
||
zip_path = test_docs_compressed_dir / f"{filename}.zip"
|
||
with zipfile.ZipFile(zip_path, "r") as zip_file:
|
||
zip_file.extractall(tmp_path)
|
||
return str(tmp_path / filename)
|
||
|
||
|
||
@pytest.fixture
|
||
def uncommon_text() -> str:
|
||
"""Craft a string with Unicode characters that are considered not common.
|
||
|
||
Create a string that contains the following uncommon characters:
|
||
|
||
* ANSI escape sequences: \033[31;1;4m and \033[0m
|
||
* A Unicode character that resembles an English character: greek "X" (U+03A7)
|
||
* A Unicode control character that is not part of ASCII: zero-width joiner
|
||
(U+200D)
|
||
* An emoji: Cross Mark (U+274C)
|
||
"""
|
||
return "\033[31;1;4m BaD TeΧt \u200d ❌ \033[0m"
|
||
|
||
|
||
@pytest.fixture
|
||
def uncommon_filename(uncommon_text: str) -> str:
|
||
"""Craft a filename with Unicode characters that are considered not common.
|
||
|
||
We reuse the same uncommon string as above, with a small exception for macOS.
|
||
Because the APFS filesystem in macOS accepts only UTF-8 encoded strings [1], we
|
||
cannot create a filename with invalid Unicode characters. So, in order to test the
|
||
rest of the corner cases, we replace U+DCF0 with an empty string.
|
||
|
||
[1]: https://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations
|
||
"""
|
||
if platform.system() == "Darwin":
|
||
uncommon_text = uncommon_text.replace("\udcf0", "")
|
||
return uncommon_text + ".pdf"
|
||
|
||
|
||
@pytest.fixture
|
||
def sanitized_text() -> str:
|
||
"""Return a sanitized version of the uncommon_text."""
|
||
return "_[31;1;4m BaD Te_t _ _ _[0m"
|